Spaces:

OpenVINO
/

nncf-quantization

Running

echarlaix HF Staff commited on Jul 11, 2024

Commit

da32672

1 Parent(s): e23b1fe

remove quant method

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,7 +35,6 @@ from optimum.intel import (
 def process_model(
     model_id: str,
     dtype: str,
-    quant_method: str,
     calibration_dataset: str,
     ratio: str,
     private_repo: bool,
@@ -49,9 +48,6 @@ def process_model(
     username = whoami(oauth_token.token)["name"]
     new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
-    if quant_method != "default":
-        new_repo_id += f"-{quant_method}"
     task = TasksManager.infer_task_from_model(model_id)
     if task not in _HEAD_TO_AUTOMODELS:
         raise ValueError(
@@ -68,11 +64,21 @@ def process_model(
         use_auth_token=oauth_token.token,
     )
     export = len(ov_files) == 0
     quantization_config = OVWeightQuantizationConfig(
-        bits=8 if dtype == "int8" else 4,
         quant_method=quant_method,
-        dataset=calibration_dataset,
-        ratio=1.0 if dtype == "int8" else ratio,
     )
     api = HfApi(token=oauth_token.token)
@@ -166,6 +172,7 @@ dtype = gr.Dropdown(
     filterable=False,
     visible=True,
 )
 quant_method = gr.Dropdown(
     ["default", "awq", "hybrid"],
     value="default",
@@ -173,6 +180,7 @@ quant_method = gr.Dropdown(
     filterable=False,
     visible=True,
 )
 calibration_dataset = gr.Dropdown(
     [
         "wikitext2",
@@ -210,7 +218,6 @@ interface = gr.Interface(
     inputs=[
         model_id,
         dtype,
-        quant_method,
         calibration_dataset,
         ratio,
         private_repo,

 def process_model(
     model_id: str,
     dtype: str,
     calibration_dataset: str,
     ratio: str,
     private_repo: bool,
     username = whoami(oauth_token.token)["name"]
     new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
     task = TasksManager.infer_task_from_model(model_id)
     if task not in _HEAD_TO_AUTOMODELS:
         raise ValueError(
         use_auth_token=oauth_token.token,
     )
     export = len(ov_files) == 0
+    is_int8 = dtype == "int8"
+    library_name = TasksManager.infer_library_from_model(model_id)
+    if library_name == "diffusers":
+        quant_method = "hybrid"
+    elif not is_int8:
+        quant_method = "awq"
+    else:
+        quant_method = "default"
     quantization_config = OVWeightQuantizationConfig(
+        bits=8 if is_int8 else 4,
         quant_method=quant_method,
+        dataset=None if quant_method=="default" else calibration_dataset,
+        ratio=1.0 if is_int8 else ratio,
     )
     api = HfApi(token=oauth_token.token)
     filterable=False,
     visible=True,
 )
+"""
 quant_method = gr.Dropdown(
     ["default", "awq", "hybrid"],
     value="default",
     filterable=False,
     visible=True,
 )
+"""
 calibration_dataset = gr.Dropdown(
     [
         "wikitext2",
     inputs=[
         model_id,
         dtype,
         calibration_dataset,
         ratio,
         private_repo,