Spaces:
Runtime error
Runtime error
testbot
commited on
Commit
β’
713b2b5
1
Parent(s):
9c6997e
might work
Browse files- app.py +40 -21
- convert.py +16 -9
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from pathlib import Path
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
from huggingface_hub import HfApi, Repository
|
@@ -6,23 +7,25 @@ from huggingface_hub.utils import RepositoryNotFoundError
|
|
6 |
|
7 |
from convert import convert
|
8 |
|
9 |
-
REPO_PATH = Path("bloomz.cpp")
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
def run(token: str, model_id: str, precision: str, quantization: bool) -> str:
|
15 |
if token == "" or model_id == "":
|
16 |
return """
|
17 |
### Invalid input π
|
18 |
|
19 |
Please fill a token and model_id.
|
20 |
"""
|
|
|
|
|
21 |
|
22 |
api = HfApi(token=token)
|
23 |
try:
|
24 |
# TODO: make a PR to bloomz.cpp to be able to pass a token
|
25 |
-
api.model_info(
|
|
|
|
|
26 |
except RepositoryNotFoundError:
|
27 |
return f"""
|
28 |
### Error π’π’π’
|
@@ -31,17 +34,29 @@ def run(token: str, model_id: str, precision: str, quantization: bool) -> str:
|
|
31 |
"""
|
32 |
|
33 |
try:
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
except Exception as e:
|
46 |
return f"""
|
47 |
### Error π’π’π’
|
@@ -52,11 +67,14 @@ def run(token: str, model_id: str, precision: str, quantization: bool) -> str:
|
|
52 |
|
53 |
DESCRIPTION = """
|
54 |
The steps are the following:
|
55 |
-
- Paste
|
56 |
-
- Input a model id from the Hub
|
57 |
-
-
|
58 |
-
-
|
59 |
-
|
|
|
|
|
|
|
60 |
"""
|
61 |
|
62 |
demo = gr.Interface(
|
@@ -69,6 +87,7 @@ demo = gr.Interface(
|
|
69 |
gr.Text(max_lines=1, label="model_id (e.g.: bigscience/bloomz-7b1)"),
|
70 |
gr.Radio(choices=["FP16", "FP32"], label="Precision", value="FP16"),
|
71 |
gr.Checkbox(value=False, label="4-bits quantization"),
|
|
|
72 |
],
|
73 |
outputs=[gr.Markdown(label="output")],
|
74 |
fn=run,
|
|
|
1 |
from pathlib import Path
|
2 |
+
from tempfile import TemporaryDirectory
|
3 |
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import HfApi, Repository
|
|
|
7 |
|
8 |
from convert import convert
|
9 |
|
|
|
10 |
|
11 |
+
def run(
|
12 |
+
token: str, model_id: str, precision: str, quantization: bool, destination: str
|
13 |
+
) -> str:
|
|
|
14 |
if token == "" or model_id == "":
|
15 |
return """
|
16 |
### Invalid input π
|
17 |
|
18 |
Please fill a token and model_id.
|
19 |
"""
|
20 |
+
if destination == "":
|
21 |
+
destination = model_id
|
22 |
|
23 |
api = HfApi(token=token)
|
24 |
try:
|
25 |
# TODO: make a PR to bloomz.cpp to be able to pass a token
|
26 |
+
api.model_info(
|
27 |
+
repo_id=model_id, token=False
|
28 |
+
) # only public repos are accessible
|
29 |
except RepositoryNotFoundError:
|
30 |
return f"""
|
31 |
### Error π’π’π’
|
|
|
34 |
"""
|
35 |
|
36 |
try:
|
37 |
+
with TemporaryDirectory() as cache_folder:
|
38 |
+
model_path = convert(
|
39 |
+
cache_folder=Path(cache_folder),
|
40 |
+
model_id=model_id,
|
41 |
+
precision=precision,
|
42 |
+
quantization=quantization,
|
43 |
+
)
|
44 |
+
print("[model_path]", model_path)
|
45 |
|
46 |
+
commit_info = api.upload_file(
|
47 |
+
repo_id=destination,
|
48 |
+
path_or_fileobj=model_path,
|
49 |
+
path_in_repo=model_path.name,
|
50 |
+
create_pr=True,
|
51 |
+
commit_message=f"Add {model_path.name} from bloomz.cpp converter.",
|
52 |
+
)
|
53 |
+
|
54 |
+
return f"""
|
55 |
+
### Success π₯
|
56 |
+
Yay! This model was successfully converted and a PR was open using your token, here:
|
57 |
+
|
58 |
+
# [{commit_info.pr_url}]({commit_info.pr_url})
|
59 |
+
"""
|
60 |
except Exception as e:
|
61 |
return f"""
|
62 |
### Error π’π’π’
|
|
|
67 |
|
68 |
DESCRIPTION = """
|
69 |
The steps are the following:
|
70 |
+
- Paste your HF token. You can create one in your [settings page](https://huggingface.co/settings/tokens).
|
71 |
+
- Input a model id from the Hub. This model must be public.
|
72 |
+
- Choose which precision you want to use (default to FP16).
|
73 |
+
- (optional) Opt-in for 4-bit quantization.
|
74 |
+
- (optional) By default a PR to the initial repo will be created. You can choose a different destination repo if you want. The destination repo must exist.
|
75 |
+
- Click Submit:
|
76 |
+
|
77 |
+
That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the opened PR π₯
|
78 |
"""
|
79 |
|
80 |
demo = gr.Interface(
|
|
|
87 |
gr.Text(max_lines=1, label="model_id (e.g.: bigscience/bloomz-7b1)"),
|
88 |
gr.Radio(choices=["FP16", "FP32"], label="Precision", value="FP16"),
|
89 |
gr.Checkbox(value=False, label="4-bits quantization"),
|
90 |
+
gr.Text(max_lines=1, label="destination (e.g.: my-username/bloomz-7b1.cpp)"),
|
91 |
],
|
92 |
outputs=[gr.Markdown(label="output")],
|
93 |
fn=run,
|
convert.py
CHANGED
@@ -1,14 +1,18 @@
|
|
1 |
from pathlib import Path
|
2 |
from subprocess import run
|
3 |
|
|
|
4 |
|
5 |
-
|
|
|
|
|
|
|
6 |
# Conversion
|
7 |
cmd = [
|
8 |
"python",
|
9 |
-
"
|
10 |
-
model_id,
|
11 |
-
|
12 |
]
|
13 |
if precision == "FP32":
|
14 |
cmd.append("--use-fp32")
|
@@ -17,20 +21,23 @@ def convert(model_id:str, precision:str, quantization:bool)->Path:
|
|
17 |
# Model file should exist
|
18 |
f_suffix = "f32" if precision == "FP32" else "f16"
|
19 |
_, model_name = model_id.split("/")
|
20 |
-
model_path =
|
21 |
assert model_path.is_file()
|
22 |
|
23 |
# Quantization
|
24 |
if quantization:
|
|
|
|
|
|
|
25 |
cmd = [
|
26 |
"./bloomz.cpp/quantize",
|
27 |
-
|
28 |
-
|
29 |
"2",
|
30 |
]
|
31 |
run(cmd, check=True)
|
32 |
-
|
33 |
-
|
34 |
|
35 |
# Return
|
36 |
return model_path
|
|
|
1 |
from pathlib import Path
|
2 |
from subprocess import run
|
3 |
|
4 |
+
BLOOMZ_FOLDER = Path(__file__).parent / "bloomz.cpp"
|
5 |
|
6 |
+
|
7 |
+
def convert(
|
8 |
+
cache_folder: Path, model_id: str, precision: str, quantization: bool
|
9 |
+
) -> Path:
|
10 |
# Conversion
|
11 |
cmd = [
|
12 |
"python",
|
13 |
+
str(BLOOMZ_FOLDER / "convert-hf-to-ggml.py"),
|
14 |
+
model_id,
|
15 |
+
str(cache_folder),
|
16 |
]
|
17 |
if precision == "FP32":
|
18 |
cmd.append("--use-fp32")
|
|
|
21 |
# Model file should exist
|
22 |
f_suffix = "f32" if precision == "FP32" else "f16"
|
23 |
_, model_name = model_id.split("/")
|
24 |
+
model_path = cache_folder / f"ggml-model-{model_name}-{f_suffix}.bin"
|
25 |
assert model_path.is_file()
|
26 |
|
27 |
# Quantization
|
28 |
if quantization:
|
29 |
+
q_model_path = model_path = (
|
30 |
+
cache_folder / f"ggml-model-{model_name}-{f_suffix}-q4_0.bin"
|
31 |
+
)
|
32 |
cmd = [
|
33 |
"./bloomz.cpp/quantize",
|
34 |
+
str(model_path),
|
35 |
+
str(q_model_path),
|
36 |
"2",
|
37 |
]
|
38 |
run(cmd, check=True)
|
39 |
+
assert q_model_path.is_file()
|
40 |
+
model_path = q_model_path
|
41 |
|
42 |
# Return
|
43 |
return model_path
|