Spaces:
Running
Running
remove quant method
Browse files
app.py
CHANGED
@@ -35,7 +35,6 @@ from optimum.intel import (
|
|
35 |
def process_model(
|
36 |
model_id: str,
|
37 |
dtype: str,
|
38 |
-
quant_method: str,
|
39 |
calibration_dataset: str,
|
40 |
ratio: str,
|
41 |
private_repo: bool,
|
@@ -49,9 +48,6 @@ def process_model(
|
|
49 |
username = whoami(oauth_token.token)["name"]
|
50 |
new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
|
51 |
|
52 |
-
if quant_method != "default":
|
53 |
-
new_repo_id += f"-{quant_method}"
|
54 |
-
|
55 |
task = TasksManager.infer_task_from_model(model_id)
|
56 |
if task not in _HEAD_TO_AUTOMODELS:
|
57 |
raise ValueError(
|
@@ -68,11 +64,21 @@ def process_model(
|
|
68 |
use_auth_token=oauth_token.token,
|
69 |
)
|
70 |
export = len(ov_files) == 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
quantization_config = OVWeightQuantizationConfig(
|
72 |
-
bits=8 if
|
73 |
quant_method=quant_method,
|
74 |
-
dataset=calibration_dataset,
|
75 |
-
ratio=1.0 if
|
76 |
)
|
77 |
|
78 |
api = HfApi(token=oauth_token.token)
|
@@ -166,6 +172,7 @@ dtype = gr.Dropdown(
|
|
166 |
filterable=False,
|
167 |
visible=True,
|
168 |
)
|
|
|
169 |
quant_method = gr.Dropdown(
|
170 |
["default", "awq", "hybrid"],
|
171 |
value="default",
|
@@ -173,6 +180,7 @@ quant_method = gr.Dropdown(
|
|
173 |
filterable=False,
|
174 |
visible=True,
|
175 |
)
|
|
|
176 |
calibration_dataset = gr.Dropdown(
|
177 |
[
|
178 |
"wikitext2",
|
@@ -210,7 +218,6 @@ interface = gr.Interface(
|
|
210 |
inputs=[
|
211 |
model_id,
|
212 |
dtype,
|
213 |
-
quant_method,
|
214 |
calibration_dataset,
|
215 |
ratio,
|
216 |
private_repo,
|
|
|
35 |
def process_model(
|
36 |
model_id: str,
|
37 |
dtype: str,
|
|
|
38 |
calibration_dataset: str,
|
39 |
ratio: str,
|
40 |
private_repo: bool,
|
|
|
48 |
username = whoami(oauth_token.token)["name"]
|
49 |
new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
|
50 |
|
|
|
|
|
|
|
51 |
task = TasksManager.infer_task_from_model(model_id)
|
52 |
if task not in _HEAD_TO_AUTOMODELS:
|
53 |
raise ValueError(
|
|
|
64 |
use_auth_token=oauth_token.token,
|
65 |
)
|
66 |
export = len(ov_files) == 0
|
67 |
+
|
68 |
+
is_int8 = dtype == "int8"
|
69 |
+
library_name = TasksManager.infer_library_from_model(model_id)
|
70 |
+
if library_name == "diffusers":
|
71 |
+
quant_method = "hybrid"
|
72 |
+
elif not is_int8:
|
73 |
+
quant_method = "awq"
|
74 |
+
else:
|
75 |
+
quant_method = "default"
|
76 |
+
|
77 |
quantization_config = OVWeightQuantizationConfig(
|
78 |
+
bits=8 if is_int8 else 4,
|
79 |
quant_method=quant_method,
|
80 |
+
dataset=None if quant_method=="default" else calibration_dataset,
|
81 |
+
ratio=1.0 if is_int8 else ratio,
|
82 |
)
|
83 |
|
84 |
api = HfApi(token=oauth_token.token)
|
|
|
172 |
filterable=False,
|
173 |
visible=True,
|
174 |
)
|
175 |
+
"""
|
176 |
quant_method = gr.Dropdown(
|
177 |
["default", "awq", "hybrid"],
|
178 |
value="default",
|
|
|
180 |
filterable=False,
|
181 |
visible=True,
|
182 |
)
|
183 |
+
"""
|
184 |
calibration_dataset = gr.Dropdown(
|
185 |
[
|
186 |
"wikitext2",
|
|
|
218 |
inputs=[
|
219 |
model_id,
|
220 |
dtype,
|
|
|
221 |
calibration_dataset,
|
222 |
ratio,
|
223 |
private_repo,
|