Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

echarlaix HF staff commited on Jul 10, 2024

Commit

e23b1fe

1 Parent(s): 93d09b5

add quantization parameters

Browse files

Files changed (1) hide show

app.py +68 -25

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 from huggingface_hub import HfApi, whoami, ModelCard
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from textwrap import dedent
 from tempfile import TemporaryDirectory
@@ -35,11 +35,13 @@ from optimum.intel import (
 def process_model(
     model_id: str,
     dtype: str,
     private_repo: bool,
-    # task: str,
     oauth_token: gr.OAuthToken,
 ):
-    task = "auto"
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use this space")
@@ -47,17 +49,10 @@ def process_model(
     username = whoami(oauth_token.token)["name"]
     new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
-    task = TasksManager.map_from_synonym(task)
-    if task == "auto":
-        try:
-            task = TasksManager.infer_task_from_model(model_id)
-        except Exception as e:
-            raise ValueError(
-                "The task could not be automatically inferred. "
-                f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}.  {e}"
-            )
-    task = _TASK_ALIASES.get(task, task)
     if task not in _HEAD_TO_AUTOMODELS:
         raise ValueError(
             f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
@@ -73,9 +68,18 @@ def process_model(
         use_auth_token=oauth_token.token,
     )
     export = len(ov_files) == 0
-    quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
     api = HfApi(token=oauth_token.token)
     with TemporaryDirectory() as d:
         folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
         os.makedirs(folder)
@@ -83,27 +87,28 @@ def process_model(
             api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
             ov_model = eval(auto_model_class).from_pretrained(
-                model_id, export=export, quantization_config=quantization_config
             )
             ov_model.save_pretrained(folder)
-            new_repo_url = api.create_repo(
-                repo_id=new_repo_id, exist_ok=True, private=private_repo
-            )
             new_repo_id = new_repo_url.repo_id
             print("Repo created successfully!", new_repo_url)
-            file_names = (f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))
-            for file in file_names:
-                file_path = os.path.join(folder, file)
                 try:
                     api.upload_file(
                         path_or_fileobj=file_path,
-                        path_in_repo=file,
                         repo_id=new_repo_id,
                     )
                 except Exception as e:
                     raise Exception(f"Error uploading file {file_path}: {e}")
@@ -161,23 +166,61 @@ dtype = gr.Dropdown(
     filterable=False,
     visible=True,
 )
 private_repo = gr.Checkbox(
     value=False,
     label="Private Repo",
     info="Create a private repo under your username",
 )
 interface = gr.Interface(
     fn=process_model,
     inputs=[
         model_id,
         dtype,
         private_repo,
     ],
     outputs=[
         gr.Markdown(label="output"),
     ],
     title="Quantize your model with NNCF",
-    description="The space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
     api_name=False,
 )

 from huggingface_hub import HfApi, whoami, ModelCard
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from textwrap import dedent
+from pathlib import Path
 from tempfile import TemporaryDirectory
 def process_model(
     model_id: str,
     dtype: str,
+    quant_method: str,
+    calibration_dataset: str,
+    ratio: str,
     private_repo: bool,
+    overwritte: bool,
     oauth_token: gr.OAuthToken,
 ):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use this space")
     username = whoami(oauth_token.token)["name"]
     new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
+    if quant_method != "default":
+        new_repo_id += f"-{quant_method}"
+    task = TasksManager.infer_task_from_model(model_id)
     if task not in _HEAD_TO_AUTOMODELS:
         raise ValueError(
             f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
         use_auth_token=oauth_token.token,
     )
     export = len(ov_files) == 0
+    quantization_config = OVWeightQuantizationConfig(
+        bits=8 if dtype == "int8" else 4,
+        quant_method=quant_method,
+        dataset=calibration_dataset,
+        ratio=1.0 if dtype == "int8" else ratio,
+    )
     api = HfApi(token=oauth_token.token)
+    if api.repo_exists(new_repo_id) and not overwritte:
+        raise Exception(f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo")
     with TemporaryDirectory() as d:
         folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
         os.makedirs(folder)
             api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
             ov_model = eval(auto_model_class).from_pretrained(
+                model_id,
+                export=export,
+                quantization_config=quantization_config,
             )
             ov_model.save_pretrained(folder)
+            new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
             new_repo_id = new_repo_url.repo_id
             print("Repo created successfully!", new_repo_url)
+            folder = Path(folder)
+            folder_parts = len(folder.parts)
+            for file_path in folder.glob("**/*"):
+                name = Path(*file_path.parts[folder_parts:])
+                if not file_path.is_file() or any(part_name.startswith(".") for part_name in name.parts):
+                    continue
                 try:
                     api.upload_file(
                         path_or_fileobj=file_path,
+                        path_in_repo=str(name),
                         repo_id=new_repo_id,
                     )
                 except Exception as e:
                     raise Exception(f"Error uploading file {file_path}: {e}")
     filterable=False,
     visible=True,
 )
+quant_method = gr.Dropdown(
+    ["default", "awq", "hybrid"],
+    value="default",
+    label="Quantization method",
+    filterable=False,
+    visible=True,
+)
+calibration_dataset = gr.Dropdown(
+    [
+        "wikitext2",
+        "c4",
+        "c4-new",
+        "conceptual_captions",
+        "laion/220k-GPT4Vision-captions-from-LIVIS",
+        "laion/filtered-wit",
+    ],
+    value="wikitext2",
+    label="Calibration dataset",
+    filterable=False,
+    visible=True,
+)
+ratio = gr.Slider(
+    label="Ratio",
+    info="Parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization",
+    minimum=0.0,
+    maximum=1.0,
+    step=0.1,
+    value=1.0,
+)
 private_repo = gr.Checkbox(
     value=False,
     label="Private Repo",
     info="Create a private repo under your username",
 )
+overwritte = gr.Checkbox(
+    value=False,
+    label="Overwrite repo content",
+    info="Push files on existing repo potentially overwriting existing files",
+)
 interface = gr.Interface(
     fn=process_model,
     inputs=[
         model_id,
         dtype,
+        quant_method,
+        calibration_dataset,
+        ratio,
         private_repo,
+        overwritte,
     ],
     outputs=[
         gr.Markdown(label="output"),
     ],
     title="Quantize your model with NNCF",
+    description="This space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
     api_name=False,
 )