Spaces:
Running
Running
add quantization parameters
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
from huggingface_hub import HfApi, whoami, ModelCard
|
5 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
6 |
from textwrap import dedent
|
7 |
-
|
8 |
|
9 |
from tempfile import TemporaryDirectory
|
10 |
|
@@ -35,11 +35,13 @@ from optimum.intel import (
|
|
35 |
def process_model(
|
36 |
model_id: str,
|
37 |
dtype: str,
|
|
|
|
|
|
|
38 |
private_repo: bool,
|
39 |
-
|
40 |
oauth_token: gr.OAuthToken,
|
41 |
):
|
42 |
-
task = "auto"
|
43 |
if oauth_token.token is None:
|
44 |
raise ValueError("You must be logged in to use this space")
|
45 |
|
@@ -47,17 +49,10 @@ def process_model(
|
|
47 |
username = whoami(oauth_token.token)["name"]
|
48 |
new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
try:
|
53 |
-
task = TasksManager.infer_task_from_model(model_id)
|
54 |
-
except Exception as e:
|
55 |
-
raise ValueError(
|
56 |
-
"The task could not be automatically inferred. "
|
57 |
-
f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. {e}"
|
58 |
-
)
|
59 |
|
60 |
-
task =
|
61 |
if task not in _HEAD_TO_AUTOMODELS:
|
62 |
raise ValueError(
|
63 |
f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
|
@@ -73,9 +68,18 @@ def process_model(
|
|
73 |
use_auth_token=oauth_token.token,
|
74 |
)
|
75 |
export = len(ov_files) == 0
|
76 |
-
quantization_config = OVWeightQuantizationConfig(
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
api = HfApi(token=oauth_token.token)
|
78 |
|
|
|
|
|
|
|
79 |
with TemporaryDirectory() as d:
|
80 |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
|
81 |
os.makedirs(folder)
|
@@ -83,27 +87,28 @@ def process_model(
|
|
83 |
api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
|
84 |
|
85 |
ov_model = eval(auto_model_class).from_pretrained(
|
86 |
-
model_id,
|
|
|
|
|
87 |
)
|
88 |
ov_model.save_pretrained(folder)
|
89 |
|
90 |
-
new_repo_url = api.create_repo(
|
91 |
-
repo_id=new_repo_id, exist_ok=True, private=private_repo
|
92 |
-
)
|
93 |
new_repo_id = new_repo_url.repo_id
|
94 |
print("Repo created successfully!", new_repo_url)
|
95 |
|
96 |
-
|
97 |
-
|
98 |
-
for
|
99 |
-
|
|
|
|
|
100 |
try:
|
101 |
api.upload_file(
|
102 |
path_or_fileobj=file_path,
|
103 |
-
path_in_repo=
|
104 |
repo_id=new_repo_id,
|
105 |
)
|
106 |
-
|
107 |
except Exception as e:
|
108 |
raise Exception(f"Error uploading file {file_path}: {e}")
|
109 |
|
@@ -161,23 +166,61 @@ dtype = gr.Dropdown(
|
|
161 |
filterable=False,
|
162 |
visible=True,
|
163 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
private_repo = gr.Checkbox(
|
165 |
value=False,
|
166 |
label="Private Repo",
|
167 |
info="Create a private repo under your username",
|
168 |
)
|
|
|
|
|
|
|
|
|
|
|
169 |
interface = gr.Interface(
|
170 |
fn=process_model,
|
171 |
inputs=[
|
172 |
model_id,
|
173 |
dtype,
|
|
|
|
|
|
|
174 |
private_repo,
|
|
|
175 |
],
|
176 |
outputs=[
|
177 |
gr.Markdown(label="output"),
|
178 |
],
|
179 |
title="Quantize your model with NNCF",
|
180 |
-
description="
|
181 |
api_name=False,
|
182 |
)
|
183 |
|
|
|
4 |
from huggingface_hub import HfApi, whoami, ModelCard
|
5 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
6 |
from textwrap import dedent
|
7 |
+
from pathlib import Path
|
8 |
|
9 |
from tempfile import TemporaryDirectory
|
10 |
|
|
|
35 |
def process_model(
|
36 |
model_id: str,
|
37 |
dtype: str,
|
38 |
+
quant_method: str,
|
39 |
+
calibration_dataset: str,
|
40 |
+
ratio: str,
|
41 |
private_repo: bool,
|
42 |
+
overwritte: bool,
|
43 |
oauth_token: gr.OAuthToken,
|
44 |
):
|
|
|
45 |
if oauth_token.token is None:
|
46 |
raise ValueError("You must be logged in to use this space")
|
47 |
|
|
|
49 |
username = whoami(oauth_token.token)["name"]
|
50 |
new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
|
51 |
|
52 |
+
if quant_method != "default":
|
53 |
+
new_repo_id += f"-{quant_method}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
task = TasksManager.infer_task_from_model(model_id)
|
56 |
if task not in _HEAD_TO_AUTOMODELS:
|
57 |
raise ValueError(
|
58 |
f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
|
|
|
68 |
use_auth_token=oauth_token.token,
|
69 |
)
|
70 |
export = len(ov_files) == 0
|
71 |
+
quantization_config = OVWeightQuantizationConfig(
|
72 |
+
bits=8 if dtype == "int8" else 4,
|
73 |
+
quant_method=quant_method,
|
74 |
+
dataset=calibration_dataset,
|
75 |
+
ratio=1.0 if dtype == "int8" else ratio,
|
76 |
+
)
|
77 |
+
|
78 |
api = HfApi(token=oauth_token.token)
|
79 |
|
80 |
+
if api.repo_exists(new_repo_id) and not overwritte:
|
81 |
+
raise Exception(f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo")
|
82 |
+
|
83 |
with TemporaryDirectory() as d:
|
84 |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
|
85 |
os.makedirs(folder)
|
|
|
87 |
api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
|
88 |
|
89 |
ov_model = eval(auto_model_class).from_pretrained(
|
90 |
+
model_id,
|
91 |
+
export=export,
|
92 |
+
quantization_config=quantization_config,
|
93 |
)
|
94 |
ov_model.save_pretrained(folder)
|
95 |
|
96 |
+
new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
|
|
|
|
|
97 |
new_repo_id = new_repo_url.repo_id
|
98 |
print("Repo created successfully!", new_repo_url)
|
99 |
|
100 |
+
folder = Path(folder)
|
101 |
+
folder_parts = len(folder.parts)
|
102 |
+
for file_path in folder.glob("**/*"):
|
103 |
+
name = Path(*file_path.parts[folder_parts:])
|
104 |
+
if not file_path.is_file() or any(part_name.startswith(".") for part_name in name.parts):
|
105 |
+
continue
|
106 |
try:
|
107 |
api.upload_file(
|
108 |
path_or_fileobj=file_path,
|
109 |
+
path_in_repo=str(name),
|
110 |
repo_id=new_repo_id,
|
111 |
)
|
|
|
112 |
except Exception as e:
|
113 |
raise Exception(f"Error uploading file {file_path}: {e}")
|
114 |
|
|
|
166 |
filterable=False,
|
167 |
visible=True,
|
168 |
)
|
169 |
+
quant_method = gr.Dropdown(
|
170 |
+
["default", "awq", "hybrid"],
|
171 |
+
value="default",
|
172 |
+
label="Quantization method",
|
173 |
+
filterable=False,
|
174 |
+
visible=True,
|
175 |
+
)
|
176 |
+
calibration_dataset = gr.Dropdown(
|
177 |
+
[
|
178 |
+
"wikitext2",
|
179 |
+
"c4",
|
180 |
+
"c4-new",
|
181 |
+
"conceptual_captions",
|
182 |
+
"laion/220k-GPT4Vision-captions-from-LIVIS",
|
183 |
+
"laion/filtered-wit",
|
184 |
+
],
|
185 |
+
value="wikitext2",
|
186 |
+
label="Calibration dataset",
|
187 |
+
filterable=False,
|
188 |
+
visible=True,
|
189 |
+
)
|
190 |
+
ratio = gr.Slider(
|
191 |
+
label="Ratio",
|
192 |
+
info="Parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization",
|
193 |
+
minimum=0.0,
|
194 |
+
maximum=1.0,
|
195 |
+
step=0.1,
|
196 |
+
value=1.0,
|
197 |
+
)
|
198 |
private_repo = gr.Checkbox(
|
199 |
value=False,
|
200 |
label="Private Repo",
|
201 |
info="Create a private repo under your username",
|
202 |
)
|
203 |
+
overwritte = gr.Checkbox(
|
204 |
+
value=False,
|
205 |
+
label="Overwrite repo content",
|
206 |
+
info="Push files on existing repo potentially overwriting existing files",
|
207 |
+
)
|
208 |
interface = gr.Interface(
|
209 |
fn=process_model,
|
210 |
inputs=[
|
211 |
model_id,
|
212 |
dtype,
|
213 |
+
quant_method,
|
214 |
+
calibration_dataset,
|
215 |
+
ratio,
|
216 |
private_repo,
|
217 |
+
overwritte,
|
218 |
],
|
219 |
outputs=[
|
220 |
gr.Markdown(label="output"),
|
221 |
],
|
222 |
title="Quantize your model with NNCF",
|
223 |
+
description="This space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
|
224 |
api_name=False,
|
225 |
)
|
226 |
|