echarlaix HF staff commited on
Commit
e23b1fe
·
1 Parent(s): 93d09b5

add quantization parameters

Browse files
Files changed (1) hide show
  1. app.py +68 -25
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  from huggingface_hub import HfApi, whoami, ModelCard
5
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
6
  from textwrap import dedent
7
-
8
 
9
  from tempfile import TemporaryDirectory
10
 
@@ -35,11 +35,13 @@ from optimum.intel import (
35
  def process_model(
36
  model_id: str,
37
  dtype: str,
 
 
 
38
  private_repo: bool,
39
- # task: str,
40
  oauth_token: gr.OAuthToken,
41
  ):
42
- task = "auto"
43
  if oauth_token.token is None:
44
  raise ValueError("You must be logged in to use this space")
45
 
@@ -47,17 +49,10 @@ def process_model(
47
  username = whoami(oauth_token.token)["name"]
48
  new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
49
 
50
- task = TasksManager.map_from_synonym(task)
51
- if task == "auto":
52
- try:
53
- task = TasksManager.infer_task_from_model(model_id)
54
- except Exception as e:
55
- raise ValueError(
56
- "The task could not be automatically inferred. "
57
- f"Please pass explicitely the task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. {e}"
58
- )
59
 
60
- task = _TASK_ALIASES.get(task, task)
61
  if task not in _HEAD_TO_AUTOMODELS:
62
  raise ValueError(
63
  f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
@@ -73,9 +68,18 @@ def process_model(
73
  use_auth_token=oauth_token.token,
74
  )
75
  export = len(ov_files) == 0
76
- quantization_config = OVWeightQuantizationConfig(bits=8 if dtype == "int8" else 4)
 
 
 
 
 
 
77
  api = HfApi(token=oauth_token.token)
78
 
 
 
 
79
  with TemporaryDirectory() as d:
80
  folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
81
  os.makedirs(folder)
@@ -83,27 +87,28 @@ def process_model(
83
  api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
84
 
85
  ov_model = eval(auto_model_class).from_pretrained(
86
- model_id, export=export, quantization_config=quantization_config
 
 
87
  )
88
  ov_model.save_pretrained(folder)
89
 
90
- new_repo_url = api.create_repo(
91
- repo_id=new_repo_id, exist_ok=True, private=private_repo
92
- )
93
  new_repo_id = new_repo_url.repo_id
94
  print("Repo created successfully!", new_repo_url)
95
 
96
- file_names = (f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))
97
-
98
- for file in file_names:
99
- file_path = os.path.join(folder, file)
 
 
100
  try:
101
  api.upload_file(
102
  path_or_fileobj=file_path,
103
- path_in_repo=file,
104
  repo_id=new_repo_id,
105
  )
106
-
107
  except Exception as e:
108
  raise Exception(f"Error uploading file {file_path}: {e}")
109
 
@@ -161,23 +166,61 @@ dtype = gr.Dropdown(
161
  filterable=False,
162
  visible=True,
163
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  private_repo = gr.Checkbox(
165
  value=False,
166
  label="Private Repo",
167
  info="Create a private repo under your username",
168
  )
 
 
 
 
 
169
  interface = gr.Interface(
170
  fn=process_model,
171
  inputs=[
172
  model_id,
173
  dtype,
 
 
 
174
  private_repo,
 
175
  ],
176
  outputs=[
177
  gr.Markdown(label="output"),
178
  ],
179
  title="Quantize your model with NNCF",
180
- description="The space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
181
  api_name=False,
182
  )
183
 
 
4
  from huggingface_hub import HfApi, whoami, ModelCard
5
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
6
  from textwrap import dedent
7
+ from pathlib import Path
8
 
9
  from tempfile import TemporaryDirectory
10
 
 
35
  def process_model(
36
  model_id: str,
37
  dtype: str,
38
+ quant_method: str,
39
+ calibration_dataset: str,
40
+ ratio: str,
41
  private_repo: bool,
42
+ overwritte: bool,
43
  oauth_token: gr.OAuthToken,
44
  ):
 
45
  if oauth_token.token is None:
46
  raise ValueError("You must be logged in to use this space")
47
 
 
49
  username = whoami(oauth_token.token)["name"]
50
  new_repo_id = f"{username}/{model_name}-openvino-{dtype}"
51
 
52
+ if quant_method != "default":
53
+ new_repo_id += f"-{quant_method}"
 
 
 
 
 
 
 
54
 
55
+ task = TasksManager.infer_task_from_model(model_id)
56
  if task not in _HEAD_TO_AUTOMODELS:
57
  raise ValueError(
58
  f"The task '{task}' is not supported, only {_HEAD_TO_AUTOMODELS.keys()} tasks are supported"
 
68
  use_auth_token=oauth_token.token,
69
  )
70
  export = len(ov_files) == 0
71
+ quantization_config = OVWeightQuantizationConfig(
72
+ bits=8 if dtype == "int8" else 4,
73
+ quant_method=quant_method,
74
+ dataset=calibration_dataset,
75
+ ratio=1.0 if dtype == "int8" else ratio,
76
+ )
77
+
78
  api = HfApi(token=oauth_token.token)
79
 
80
+ if api.repo_exists(new_repo_id) and not overwritte:
81
+ raise Exception(f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo")
82
+
83
  with TemporaryDirectory() as d:
84
  folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
85
  os.makedirs(folder)
 
87
  api.snapshot_download(repo_id=model_id, local_dir=folder, allow_patterns=["*.json"])
88
 
89
  ov_model = eval(auto_model_class).from_pretrained(
90
+ model_id,
91
+ export=export,
92
+ quantization_config=quantization_config,
93
  )
94
  ov_model.save_pretrained(folder)
95
 
96
+ new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
 
 
97
  new_repo_id = new_repo_url.repo_id
98
  print("Repo created successfully!", new_repo_url)
99
 
100
+ folder = Path(folder)
101
+ folder_parts = len(folder.parts)
102
+ for file_path in folder.glob("**/*"):
103
+ name = Path(*file_path.parts[folder_parts:])
104
+ if not file_path.is_file() or any(part_name.startswith(".") for part_name in name.parts):
105
+ continue
106
  try:
107
  api.upload_file(
108
  path_or_fileobj=file_path,
109
+ path_in_repo=str(name),
110
  repo_id=new_repo_id,
111
  )
 
112
  except Exception as e:
113
  raise Exception(f"Error uploading file {file_path}: {e}")
114
 
 
166
  filterable=False,
167
  visible=True,
168
  )
169
+ quant_method = gr.Dropdown(
170
+ ["default", "awq", "hybrid"],
171
+ value="default",
172
+ label="Quantization method",
173
+ filterable=False,
174
+ visible=True,
175
+ )
176
+ calibration_dataset = gr.Dropdown(
177
+ [
178
+ "wikitext2",
179
+ "c4",
180
+ "c4-new",
181
+ "conceptual_captions",
182
+ "laion/220k-GPT4Vision-captions-from-LIVIS",
183
+ "laion/filtered-wit",
184
+ ],
185
+ value="wikitext2",
186
+ label="Calibration dataset",
187
+ filterable=False,
188
+ visible=True,
189
+ )
190
+ ratio = gr.Slider(
191
+ label="Ratio",
192
+ info="Parameter used when applying 4-bit quantization to control the ratio between 4-bit and 8-bit quantization",
193
+ minimum=0.0,
194
+ maximum=1.0,
195
+ step=0.1,
196
+ value=1.0,
197
+ )
198
  private_repo = gr.Checkbox(
199
  value=False,
200
  label="Private Repo",
201
  info="Create a private repo under your username",
202
  )
203
+ overwritte = gr.Checkbox(
204
+ value=False,
205
+ label="Overwrite repo content",
206
+ info="Push files on existing repo potentially overwriting existing files",
207
+ )
208
  interface = gr.Interface(
209
  fn=process_model,
210
  inputs=[
211
  model_id,
212
  dtype,
213
+ quant_method,
214
+ calibration_dataset,
215
+ ratio,
216
  private_repo,
217
+ overwritte,
218
  ],
219
  outputs=[
220
  gr.Markdown(label="output"),
221
  ],
222
  title="Quantize your model with NNCF",
223
+ description="This space takes a model, converts it to the OpenVINO format and applies NNCF weight only quantization. The resulting model will then be pushed on the Hub under your HF user namespace",
224
  api_name=False,
225
  )
226