Vaibhav Srivastav commited on
Commit
2bede7c
·
1 Parent(s): eecc2cb
Files changed (2) hide show
  1. Dockerfile +61 -0
  2. app.py +59 -0
Dockerfile ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
2
+ ENV DEBIAN_FRONTEND=noninteractive
3
+ RUN apt-get update && \
4
+ apt-get upgrade -y && \
5
+ apt-get install -y --no-install-recommends \
6
+ git \
7
+ git-lfs \
8
+ wget \
9
+ curl \
10
+ # python build dependencies \
11
+ build-essential \
12
+ libssl-dev \
13
+ zlib1g-dev \
14
+ libbz2-dev \
15
+ libreadline-dev \
16
+ libsqlite3-dev \
17
+ libncursesw5-dev \
18
+ xz-utils \
19
+ tk-dev \
20
+ libxml2-dev \
21
+ libxmlsec1-dev \
22
+ libffi-dev \
23
+ liblzma-dev \
24
+ # gradio dependencies \
25
+ ffmpeg
26
+
27
+ RUN useradd -m -u 1000 user
28
+ USER user
29
+ ENV HOME=/home/user \
30
+ PATH=/home/user/.local/bin:${PATH}
31
+ WORKDIR ${HOME}/app
32
+
33
+ RUN curl https://pyenv.run | bash
34
+ ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
35
+ ARG PYTHON_VERSION=3.10.13
36
+ RUN pyenv install ${PYTHON_VERSION} && \
37
+ pyenv global ${PYTHON_VERSION} && \
38
+ pyenv rehash && \
39
+ pip install --no-cache-dir -U pip setuptools wheel && \
40
+ pip install "huggingface-hub" "hf-transfer"
41
+
42
+ COPY --chown=1000 . ${HOME}/app
43
+ RUN git clone https://github.com/ggerganov/llama.cpp && \
44
+ cd llama.cpp && \
45
+ make clean && \
46
+ LLAMA_CUDA=1 make
47
+
48
+ RUN pip install -r llama.cpp/requirements.txt
49
+
50
+ ENV PYTHONPATH=${HOME}/app \
51
+ PYTHONUNBUFFERED=1 \
52
+ HF_HUB_ENABLE_HF_TRANSFER=1 \
53
+ GRADIO_ALLOW_FLAGGING=never \
54
+ GRADIO_NUM_PORTS=1 \
55
+ GRADIO_SERVER_NAME=0.0.0.0 \
56
+ GRADIO_THEME=huggingface \
57
+ TQDM_POSITION=-1 \
58
+ TQDM_MININTERVAL=1 \
59
+ SYSTEM=spaces
60
+
61
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+
4
+ from huggingface_hub import create_repo, HfApi
5
+ from huggingface_hub import snapshot_download
6
+
7
+ api = HfApi()
8
+
9
+ def process_model(model_id, q_method, username, hf_token):
10
+
11
+ MODEL_NAME = model_id.split('/')[-1]
12
+ fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
13
+
14
+ snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
15
+ print("Model downloaded successully!")
16
+
17
+ fp16_conversion = f"python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"
18
+ subprocess.run(fp16_conversion, shell=True)
19
+ print("Model converted to fp16 successully!")
20
+
21
+ qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
22
+ quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
23
+ subprocess.run(quantise_ggml, shell=True)
24
+ print("Quantised successfully!")
25
+
26
+ # Create empty repo
27
+ create_repo(
28
+ repo_id = f"{username}/{MODEL_NAME}-{q_method}-GGUF",
29
+ repo_type="model",
30
+ exist_ok=True,
31
+ token=hf_token
32
+ )
33
+ print("Empty repo created successfully!")
34
+
35
+ # Upload gguf files
36
+ api.upload_folder(
37
+ folder_path=MODEL_NAME,
38
+ repo_id=f"{username}/{MODEL_NAME}-{q_method}-GGUF",
39
+ allow_patterns=["*.gguf","$.md"],
40
+ token=hf_token
41
+ )
42
+ print("Uploaded successfully!")
43
+
44
+ return "Processing complete."
45
+
46
+ # Create Gradio interface
47
+ iface = gr.Interface(
48
+ fn=process_model,
49
+ inputs=[
50
+ gr.Textbox(lines=1, label="Model ID"),
51
+ gr.Textbox(lines=1, label="Quantization Methods"),
52
+ gr.Textbox(lines=1, label="Username"),
53
+ gr.Textbox(lines=1, label="Token")
54
+ ],
55
+ outputs="text"
56
+ )
57
+
58
+ # Launch the interface
59
+ iface.launch(debug=True)