Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

echarlaix HF Staff commited on Jul 21, 2024

Commit

edf5256

1 Parent(s): 872b151

Add support of private repo

Browse files

Files changed (2) hide show

app.py +44 -18
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import shutil
 import gradio as gr
-from huggingface_hub import HfApi, whoami, ModelCard
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from textwrap import dedent
 from pathlib import Path
@@ -9,9 +9,6 @@ from pathlib import Path
 from tempfile import TemporaryDirectory
 from huggingface_hub.file_download import repo_folder_name
-from optimum.exporters.tasks import TasksManager
-from optimum.intel.utils.constant import _TASK_ALIASES
-from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
 from optimum.exporters import TasksManager
 from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
@@ -28,9 +25,23 @@ from optimum.intel import (
     OVStableDiffusionPipeline,
     OVStableDiffusionXLPipeline,
     OVLatentConsistencyModelPipeline,
-    OVModelForPix2Struct,
     OVWeightQuantizationConfig,
 )
 def quantize_model(
     model_id: str,
@@ -42,23 +53,33 @@ def quantize_model(
     oauth_token: gr.OAuthToken,
 ):
     if oauth_token.token is None:
-        raise ValueError("You must be logged in to use this space")
     model_name = model_id.split("/")[-1]
     username = whoami(oauth_token.token)["name"]
     new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
-    task = TasksManager.infer_task_from_model(model_id)
-    library_name = TasksManager.infer_library_from_model(model_id)
-    # task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
-    # library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
-    if task not in _HEAD_TO_AUTOMODELS:
-        raise ValueError(
-            f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
-        )
     if task == "text2text-generation":
-        raise ValueError("Export of Seq2Seq models is currently disabled.")
     auto_model_class = _HEAD_TO_AUTOMODELS[task]
     ov_files = _find_files_matching_pattern(
@@ -85,7 +106,7 @@ def quantize_model(
     api = HfApi(token=oauth_token.token)
     if api.repo_exists(new_repo_id) and not overwritte:
-        raise Exception(f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo")
     with TemporaryDirectory() as d:
         folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
@@ -129,7 +150,7 @@ def quantize_model(
                                 repo_id=new_repo_id,
                             )
                         except Exception as e:
-                            raise Exception(f"Error uploading file {file_path}: {e}")
             try:
                 card = ModelCard.load(model_id, token=oauth_token.token)
@@ -169,6 +190,8 @@ def quantize_model(
                 repo_id=new_repo_id,
             )
             return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
         finally:
             shutil.rmtree(folder, ignore_errors=True)
@@ -177,6 +200,8 @@ DESCRIPTION = """
 This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
 The resulting model will then be pushed under your HF user namespace. For now we only support conversion for models that are hosted on public repositories.
 """
 model_id = HuggingfaceHubSearch(
@@ -202,6 +227,7 @@ quant_method = gr.Dropdown(
 """
 calibration_dataset = gr.Dropdown(
     [
         "wikitext2",
         "c4",
         "c4-new",
@@ -209,7 +235,7 @@ calibration_dataset = gr.Dropdown(
         "laion/220k-GPT4Vision-captions-from-LIVIS",
         "laion/filtered-wit",
     ],
-    value="wikitext2",
     label="Calibration dataset",
     filterable=False,
     visible=True,

 import os
 import shutil
 import gradio as gr
+from huggingface_hub import HfApi, whoami, ModelCard, model_info
 from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from textwrap import dedent
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from huggingface_hub.file_download import repo_folder_name
 from optimum.exporters import TasksManager
 from optimum.intel.utils.modeling_utils import _find_files_matching_pattern
     OVStableDiffusionPipeline,
     OVStableDiffusionXLPipeline,
     OVLatentConsistencyModelPipeline,
     OVWeightQuantizationConfig,
 )
+from diffusers import ConfigMixin
+_HEAD_TO_AUTOMODELS = {
+    "feature-extraction": OVModelForFeatureExtraction,
+    "fill-mask": OVModelForMaskedLM,
+    "text-generation": OVModelForCausalLM,
+    "text-classification": OVModelForSequenceClassification,
+    "token-classification": OVModelForTokenClassification,
+    "question-answering": OVModelForQuestionAnswering,
+    "image-classification": OVModelForImageClassification,
+    "audio-classification": OVModelForAudioClassification,
+    "stable-diffusion": OVStableDiffusionPipeline,
+    "stable-diffusion-xl": OVStableDiffusionXLPipeline,
+    "latent-consistency": OVLatentConsistencyModelPipeline,
+}
 def quantize_model(
     model_id: str,
     oauth_token: gr.OAuthToken,
 ):
     if oauth_token.token is None:
+        return "You must be logged in to use this space"
+    if not model_id:
+        return f"### Invalid input 🐞 Please specify a model name, got {model_id}"
     model_name = model_id.split("/")[-1]
     username = whoami(oauth_token.token)["name"]
     new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
+    library_name = TasksManager.infer_library_from_model(model_id, token=oauth_token.token)
+    if library_name == "diffusers":
+        ConfigMixin.config_name = "model_index.json"
+        class_name = ConfigMixin.load_config(model_id, token=oauth_token.token)["_class_name"].lower()
+        if "xl" in class_name:
+            task = "stable-diffusion-xl"
+        elif "consistency" in class_name:
+            task = "latent-consistency"
+        else:
+            task = "stable-diffusion"
+    else:
+        task = TasksManager.infer_task_from_model(model_id, token=oauth_token.token)
     if task == "text2text-generation":
+        return "Export of Seq2Seq models is currently disabled."
+    if task not in _HEAD_TO_AUTOMODELS:
+        return f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
     auto_model_class = _HEAD_TO_AUTOMODELS[task]
     ov_files = _find_files_matching_pattern(
     api = HfApi(token=oauth_token.token)
     if api.repo_exists(new_repo_id) and not overwritte:
+        return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo"
     with TemporaryDirectory() as d:
         folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
                                 repo_id=new_repo_id,
                             )
                         except Exception as e:
+                            return f"Error uploading file {file_path}: {e}"
             try:
                 card = ModelCard.load(model_id, token=oauth_token.token)
                 repo_id=new_repo_id,
             )
             return f"This model was successfully quantized, find it under your repo {new_repo_url}'"
+        except Exception as e:
+            return f"### Error: {e}"
         finally:
             shutil.rmtree(folder, ignore_errors=True)
 This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
 The resulting model will then be pushed under your HF user namespace. For now we only support conversion for models that are hosted on public repositories.
+The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
 """
 model_id = HuggingfaceHubSearch(
 """
 calibration_dataset = gr.Dropdown(
     [
+        "None",
         "wikitext2",
         "c4",
         "c4-new",
         "laion/220k-GPT4Vision-captions-from-LIVIS",
         "laion/filtered-wit",
     ],
+    value="None",
     label="Calibration dataset",
     filterable=False,
     visible=True,

requirements.txt CHANGED Viewed

@@ -3,7 +3,6 @@ gradio[oauth]>=4.37.2
 gradio_huggingfacehub_search==0.0.6
 transformers==4.42.4
 diffusers==0.29.1
-optimum==1.21.2
-optimum-intel==1.18.1
 openvino
 nncf

 gradio_huggingfacehub_search==0.0.6
 transformers==4.42.4
 diffusers==0.29.1
 openvino
 nncf
+git+https://github.com/huggingface/optimum-intel.git