Spaces:
Running
on
A10G
Running
on
A10G
Update app.py
#26
by
reach-vb
HF staff
- opened
app.py
CHANGED
@@ -23,14 +23,30 @@ def script_to_use(model_id, api):
|
|
23 |
arch = arch[0]
|
24 |
return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
|
25 |
|
26 |
-
def process_model(model_id, q_method, hf_token):
|
27 |
model_name = model_id.split('/')[-1]
|
28 |
fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
|
29 |
|
30 |
try:
|
31 |
api = HfApi(token=hf_token)
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
print("Model downloaded successully!")
|
35 |
|
36 |
conversion_script = script_to_use(model_id, api)
|
@@ -49,7 +65,7 @@ def process_model(model_id, q_method, hf_token):
|
|
49 |
print("Quantised successfully!")
|
50 |
|
51 |
# Create empty repo
|
52 |
-
new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True)
|
53 |
new_repo_id = new_repo_url.repo_id
|
54 |
print("Repo created successfully!", new_repo_url)
|
55 |
|
@@ -58,6 +74,7 @@ def process_model(model_id, q_method, hf_token):
|
|
58 |
except:
|
59 |
card = ModelCard("")
|
60 |
card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
|
|
|
61 |
card.text = dedent(
|
62 |
f"""
|
63 |
# {new_repo_id}
|
@@ -84,7 +101,7 @@ def process_model(model_id, q_method, hf_token):
|
|
84 |
llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
|
85 |
```
|
86 |
|
87 |
-
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the
|
88 |
|
89 |
```
|
90 |
git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
|
@@ -138,6 +155,11 @@ iface = gr.Interface(
|
|
138 |
label="HF Write Token",
|
139 |
info="https://hf.co/settings/token",
|
140 |
type="password",
|
|
|
|
|
|
|
|
|
|
|
141 |
)
|
142 |
],
|
143 |
outputs=[
|
@@ -145,7 +167,7 @@ iface = gr.Interface(
|
|
145 |
gr.Image(show_label=False),
|
146 |
],
|
147 |
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
148 |
-
description="The space takes
|
149 |
article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
|
150 |
|
151 |
)
|
|
|
23 |
arch = arch[0]
|
24 |
return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
|
25 |
|
26 |
+
def process_model(model_id, q_method, hf_token, private_repo):
|
27 |
model_name = model_id.split('/')[-1]
|
28 |
fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
|
29 |
|
30 |
try:
|
31 |
api = HfApi(token=hf_token)
|
32 |
|
33 |
+
dl_pattern = ["*.md", "*.json", "*.model"]
|
34 |
+
|
35 |
+
pattern = (
|
36 |
+
"*.safetensors"
|
37 |
+
if any(
|
38 |
+
file.path.endswith(".safetensors")
|
39 |
+
for file in api.list_repo_tree(
|
40 |
+
repo_id=model_id,
|
41 |
+
recursive=True,
|
42 |
+
)
|
43 |
+
)
|
44 |
+
else "*.bin"
|
45 |
+
)
|
46 |
+
|
47 |
+
dl_pattern += pattern
|
48 |
+
|
49 |
+
snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
|
50 |
print("Model downloaded successully!")
|
51 |
|
52 |
conversion_script = script_to_use(model_id, api)
|
|
|
65 |
print("Quantised successfully!")
|
66 |
|
67 |
# Create empty repo
|
68 |
+
new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
|
69 |
new_repo_id = new_repo_url.repo_id
|
70 |
print("Repo created successfully!", new_repo_url)
|
71 |
|
|
|
74 |
except:
|
75 |
card = ModelCard("")
|
76 |
card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
|
77 |
+
card.data.tags += ["gguf-my-repo"]
|
78 |
card.text = dedent(
|
79 |
f"""
|
80 |
# {new_repo_id}
|
|
|
101 |
llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
|
102 |
```
|
103 |
|
104 |
+
Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
|
105 |
|
106 |
```
|
107 |
git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
|
|
|
155 |
label="HF Write Token",
|
156 |
info="https://hf.co/settings/token",
|
157 |
type="password",
|
158 |
+
),
|
159 |
+
gr.Checkbox(
|
160 |
+
value=False,
|
161 |
+
label="Private Repo",
|
162 |
+
info="Create a private repo under your username."
|
163 |
)
|
164 |
],
|
165 |
outputs=[
|
|
|
167 |
gr.Image(show_label=False),
|
168 |
],
|
169 |
title="Create your own GGUF Quants, blazingly fast ⚡!",
|
170 |
+
description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
|
171 |
article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
|
172 |
|
173 |
)
|