IlyasMoutawwakil HF staff commited on
Commit
a6d3fdf
1 Parent(s): 77d2cc4
Files changed (5) hide show
  1. app.py +156 -140
  2. config_store.py +64 -338
  3. configs/base_config.yaml +0 -15
  4. requirements.txt +1 -1
  5. run.py +0 -189
app.py CHANGED
@@ -1,181 +1,197 @@
1
  import os
2
- import subprocess
3
- import importlib.util
4
-
5
- NVIDIA_AVAILABLE = (
6
- subprocess.run(
7
- "nvidia-smi",
8
- shell=True,
9
- stdout=subprocess.DEVNULL,
10
- stderr=subprocess.DEVNULL,
11
- ).returncode
12
- == 0
13
- )
14
-
15
- if NVIDIA_AVAILABLE:
16
- DEVICES = ["cpu", "cuda"]
17
- if importlib.util.find_spec("optimum_benchmark") is None:
18
- os.system(
19
- "pip install optimum-benchmark[onnxruntime-gpu,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
20
- )
21
- os.system("pip uninstall onnxruntime onnxruntime-gpu -y")
22
- os.system("pip install onnxruntime-gpu")
23
- else:
24
- DEVICES = ["cpu"]
25
- if importlib.util.find_spec("optimum_benchmark") is None:
26
- os.system(
27
- "pip install optimum-benchmark[onnxruntime,openvino,neural-compressor,diffusers,peft]@git+https://github.com/huggingface/optimum-benchmark.git"
28
- )
29
-
30
-
31
- BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
32
- BENCHMARKS = ["inference", "training"]
33
-
34
-
35
- import random
36
  import gradio as gr
37
- from optimum_benchmark.task_utils import (
38
- TASKS_TO_AUTOMODELS,
39
- infer_task_from_model_name_or_path,
40
- )
41
- from run import run_benchmark
42
  from config_store import (
43
- get_training_config,
44
  get_inference_config,
45
- get_neural_compressor_config,
46
  get_onnxruntime_config,
47
  get_openvino_config,
48
  get_pytorch_config,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  )
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  with gr.Blocks() as demo:
 
 
 
52
  # add image
53
  gr.Markdown(
54
  """<img src="https://huggingface.co/spaces/optimum/optimum-benchmark-ui/resolve/main/huggy_bench.png" style="display: block; margin-left: auto; margin-right: auto; width: 30%;">"""
55
  )
 
56
  # title text
57
- gr.Markdown("<h1 style='text-align: center'>🤗 Optimum-Benchmark UI 🏋️</h1>")
58
 
59
  # explanation text
60
  gr.HTML(
61
  "<h3 style='text-align: center'>"
62
- "Zero code Gradio interface of <a href='https://github.com/huggingface/optimum-benchmark.git'>Optimum-Benchmark</a><br>"
 
 
 
 
63
  "</h3>"
64
- "<p style='text-align: center'>"
65
- "Note: <a href='https://huggingface.co/spaces/optimum/optimum-benchmark-ui?duplicate=true'>Duplicate this space</a> and change its hardware to enable CUDA device<br>"
66
- "or <a href='https://huggingface.co/spaces/optimum/optimum-benchmark-ui?docker=true'>Run with Docker</a> locally to target your own hardware."
67
- "</p>"
68
  )
69
 
70
- model = gr.Textbox(
71
  label="model",
72
- value="distilbert-base-uncased-finetuned-sst-2-english",
73
- info="Model to run the benchmark on. Press enter to infer the task automatically.",
 
74
  )
75
  task = gr.Dropdown(
76
  label="task",
77
- value="text-classification",
78
- choices=list(TASKS_TO_AUTOMODELS.keys()),
79
- info="Task to run the benchmark on. Can be infered automatically by submitting a model.",
80
- )
81
- device = gr.Dropdown(
82
- value="cpu",
83
- label="device",
84
- choices=DEVICES,
85
- info="Device to run the benchmark on. make sure to duplicate the space if you wanna run on CUDA devices.",
86
- )
87
- experiment = gr.Textbox(
88
- label="experiment_name",
89
- value=f"awesome-experiment-{random.randint(0, 100000)}",
90
- info="Name of the experiment. Will be used to create a folder where results are stored.",
91
  )
92
- model.submit(fn=infer_task_from_model_name_or_path, inputs=model, outputs=task)
93
 
94
  with gr.Row():
95
- with gr.Column():
96
- with gr.Row():
97
- backend = gr.Dropdown(
98
- label="backend",
99
- choices=BACKENDS,
100
- value=BACKENDS[0],
101
- info="Backend to run the benchmark on.",
102
- )
103
-
104
- with gr.Row() as backend_configs:
105
- with gr.Accordion(label="backend options", open=False, visible=True):
106
- pytorch_config = get_pytorch_config()
107
- with gr.Accordion(label="backend config", open=False, visible=False):
108
- onnxruntime_config = get_onnxruntime_config()
109
- with gr.Accordion(label="backend config", open=False, visible=False):
110
- openvino_config = get_openvino_config()
111
- with gr.Accordion(label="backend config", open=False, visible=False):
112
- neural_compressor_config = get_neural_compressor_config()
113
-
114
- # hide backend configs based on backend
115
- backend.change(
116
- inputs=backend,
117
- outputs=backend_configs.children,
118
- fn=lambda value: [gr.update(visible=value == key) for key in BACKENDS],
119
- )
120
 
121
- with gr.Column():
122
- with gr.Row():
123
- benchmark = gr.Dropdown(
124
- label="benchmark",
125
- choices=BENCHMARKS,
126
- value=BENCHMARKS[0],
127
- info="Type of benchmark to run.",
128
- )
129
-
130
- with gr.Row() as benchmark_configs:
131
- with gr.Accordion(label="benchmark Config", open=False, visible=True):
132
- inference_config = get_inference_config()
133
- with gr.Accordion(label="benchmark Config", open=False, visible=False):
134
- training_config = get_training_config()
135
-
136
- # hide benchmark configs based on benchmark
137
- benchmark.change(
138
- inputs=benchmark,
139
- outputs=benchmark_configs.children,
140
- fn=lambda value: [gr.update(visible=value == key) for key in BENCHMARKS],
141
- )
142
 
143
- baseline = gr.Checkbox(
144
- value=False,
145
- label="compare_to_baseline",
146
- info="Check this box to compare your chosen configuration to the baseline configuration.",
147
- )
148
 
149
  button = gr.Button(value="Run Benchmark", variant="primary")
150
- with gr.Accordion(label="", open=True):
151
- html_output = gr.HTML()
152
- table_output = gr.Dataframe(visible=False)
153
 
154
  button.click(
155
  fn=run_benchmark,
156
  inputs={
157
- experiment,
158
- baseline,
159
- model,
160
  task,
161
- device,
162
- backend,
163
- benchmark,
164
- *pytorch_config,
165
- *openvino_config,
166
- *onnxruntime_config,
167
- *neural_compressor_config,
168
- *inference_config,
169
- *training_config,
170
  },
171
- outputs=[html_output, button, table_output],
172
- queue=True,
173
- )
174
- button.click(
175
- fn=lambda: f"awesome-experiment-{random.randint(0, 100000)}",
176
- inputs=[],
177
- outputs=experiment,
178
- queue=True,
179
  )
180
 
181
- demo.queue().launch()
 
 
1
  import os
2
+ import time
3
+ from huggingface_hub import create_repo, whoami
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import gradio as gr
 
 
 
 
 
5
  from config_store import (
 
6
  get_inference_config,
 
7
  get_onnxruntime_config,
8
  get_openvino_config,
9
  get_pytorch_config,
10
+ get_process_config,
11
+ )
12
+ from optimum_benchmark.backends.openvino.utils import TASKS_TO_OVMODEL
13
+ from optimum_benchmark.backends.transformers_utils import TASKS_TO_MODEL_LOADERS
14
+ from optimum_benchmark.backends.onnxruntime.utils import TASKS_TO_ORTMODELS
15
+ from optimum_benchmark.backends.ipex.utils import TASKS_TO_IPEXMODEL
16
+ from optimum_benchmark import (
17
+ BenchmarkConfig,
18
+ PyTorchConfig,
19
+ OVConfig,
20
+ ORTConfig,
21
+ IPEXConfig,
22
+ ProcessConfig,
23
+ InferenceConfig,
24
+ Benchmark,
25
+ )
26
+ from optimum_benchmark.logging_utils import setup_logging
27
+
28
+ os.environ["LOG_TO_FILE"] = "0"
29
+ os.environ["LOG_LEVEL"] = "INFO"
30
+ setup_logging(level="INFO", prefix="MAIN-PROCESS")
31
+
32
+ DEVICE = "cpu"
33
+ BACKENDS = ["pytorch", "onnxruntime", "openvino", "ipex"]
34
+
35
+ CHOSEN_MODELS = ["bert-base-uncased", "gpt2"]
36
+ CHOSEN_TASKS = (
37
+ set(TASKS_TO_OVMODEL.keys())
38
+ & set(TASKS_TO_ORTMODELS.keys())
39
+ & set(TASKS_TO_IPEXMODEL.keys())
40
+ & set(TASKS_TO_MODEL_LOADERS.keys())
41
  )
42
 
43
+
44
+ def run_benchmark(kwargs, oauth_token: gr.OAuthToken):
45
+ if oauth_token.token is None:
46
+ return "You must be logged in to use this space"
47
+
48
+ username = whoami(oauth_token.token)["name"]
49
+ create_repo(
50
+ f"{username}/benchmarks",
51
+ token=oauth_token.token,
52
+ repo_type="dataset",
53
+ exist_ok=True,
54
+ )
55
+
56
+ configs = {
57
+ "process": {},
58
+ "inference": {},
59
+ "onnxruntime": {},
60
+ "openvino": {},
61
+ "pytorch": {},
62
+ "ipex": {},
63
+ }
64
+
65
+ for key, value in kwargs.items():
66
+ if key.label == "model":
67
+ model = value
68
+ elif key.label == "task":
69
+ task = value
70
+ elif "." in key.label:
71
+ backend, argument = key.label.split(".")
72
+ configs[backend][argument] = value
73
+ else:
74
+ continue
75
+
76
+ process_config = ProcessConfig(**configs.pop("process"))
77
+ inference_config = InferenceConfig(**configs.pop("inference"))
78
+
79
+ configs["onnxruntime"] = ORTConfig(
80
+ task=task,
81
+ model=model,
82
+ device=DEVICE,
83
+ **configs["onnxruntime"],
84
+ )
85
+ configs["openvino"] = OVConfig(
86
+ task=task,
87
+ model=model,
88
+ device=DEVICE,
89
+ **configs["openvino"],
90
+ )
91
+ configs["pytorch"] = PyTorchConfig(
92
+ task=task,
93
+ model=model,
94
+ device=DEVICE,
95
+ **configs["pytorch"],
96
+ )
97
+ configs["ipex"] = IPEXConfig(
98
+ task=task,
99
+ model=model,
100
+ device=DEVICE,
101
+ **configs["ipex"],
102
+ )
103
+
104
+ for backend in configs:
105
+ benchmark_name = (
106
+ f"{model}-{task}-{backend}-{time.strftime('%Y-%m-%d-%H-%M-%S')}"
107
+ )
108
+ benchmark_config = BenchmarkConfig(
109
+ name=benchmark_name,
110
+ launcher=process_config,
111
+ scenario=inference_config,
112
+ backend=configs[backend],
113
+ )
114
+ benchmark_report = Benchmark.run(benchmark_config)
115
+ benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
116
+ benchmark.push_to_hub(
117
+ repo_id=f"{username}/benchmarks",
118
+ subfolder=benchmark_name,
119
+ token=oauth_token.token,
120
+ )
121
+
122
+ return f"🚀 Benchmark {benchmark_name} has been pushed to {username}/benchmarks"
123
+
124
+
125
  with gr.Blocks() as demo:
126
+ # add login button
127
+ gr.LoginButton(min_width=250)
128
+
129
  # add image
130
  gr.Markdown(
131
  """<img src="https://huggingface.co/spaces/optimum/optimum-benchmark-ui/resolve/main/huggy_bench.png" style="display: block; margin-left: auto; margin-right: auto; width: 30%;">"""
132
  )
133
+
134
  # title text
135
+ gr.Markdown("<h1 style='text-align: center'>🤗 Optimum-Benchmark Interface 🏋️</h1>")
136
 
137
  # explanation text
138
  gr.HTML(
139
  "<h3 style='text-align: center'>"
140
+ "Zero code Gradio interface of "
141
+ "<a href='https://github.com/huggingface/optimum-benchmark.git'>"
142
+ "Optimum-Benchmark"
143
+ "</a>"
144
+ "<br>"
145
  "</h3>"
 
 
 
 
146
  )
147
 
148
+ model = gr.Dropdown(
149
  label="model",
150
+ choices=CHOSEN_MODELS,
151
+ value="bert-base-uncased",
152
+ info="Model to run the benchmark on.",
153
  )
154
  task = gr.Dropdown(
155
  label="task",
156
+ choices=CHOSEN_TASKS,
157
+ value="feature-extraction",
158
+ info="Task to run the benchmark on.",
 
 
 
 
 
 
 
 
 
 
 
159
  )
 
160
 
161
  with gr.Row():
162
+ with gr.Accordion(label="Process Config", open=False, visible=True):
163
+ process_config = get_process_config()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ with gr.Row():
166
+ with gr.Accordion(label="PyTorch Config", open=True, visible=True):
167
+ pytorch_config = get_pytorch_config()
168
+ with gr.Accordion(label="OpenVINO Config", open=True, visible=True):
169
+ openvino_config = get_openvino_config()
170
+ with gr.Accordion(label="OnnxRuntime Config", open=True, visible=True):
171
+ onnxruntime_config = get_onnxruntime_config()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ with gr.Row():
174
+ with gr.Accordion(label="Scenario Config", open=False, visible=True):
175
+ inference_config = get_inference_config()
 
 
176
 
177
  button = gr.Button(value="Run Benchmark", variant="primary")
178
+
179
+ html_output = gr.HTML()
 
180
 
181
  button.click(
182
  fn=run_benchmark,
183
  inputs={
 
 
 
184
  task,
185
+ model,
186
+ *process_config.values(),
187
+ *inference_config.values(),
188
+ *onnxruntime_config.values(),
189
+ *openvino_config.values(),
190
+ *pytorch_config.values(),
 
 
 
191
  },
192
+ outputs=[html_output],
193
+ concurrency_limit=1,
 
 
 
 
 
 
194
  )
195
 
196
+
197
+ demo.queue(max_size=10).launch()
config_store.py CHANGED
@@ -1,401 +1,127 @@
1
  import gradio as gr
2
 
3
 
4
- def get_base_backend_config(backend_name="pytorch"):
5
- return [
6
- # seed
7
- gr.Textbox(
8
- value=42,
9
- label=f"{backend_name}.seed",
10
- info="Sets seed for reproducibility",
11
- ),
12
- # inter_op_num_threads
13
- gr.Textbox(
14
- value="null",
15
- label=f"{backend_name}.inter_op_num_threads",
16
- info="Use null for default and -1 for cpu_count()",
17
- ),
18
- # intra_op_num_threads
19
- gr.Textbox(
20
- value="null",
21
- label=f"{backend_name}.intra_op_num_threads",
22
- info="Use null for default and -1 for cpu_count()",
23
- ),
24
- # initial_isolation_check
25
- gr.Checkbox(
26
- value=True,
27
- label=f"{backend_name}.initial_isolation_check",
28
- info="Makes sure that initially, no other process is running on the target device",
29
- ),
30
- # continous_isolation_check
31
- gr.Checkbox(
32
- value=True,
33
- label=f"{backend_name}.continous_isolation_check",
34
- info="Makes sure that throughout the benchmark, no other process is running on the target device",
35
- ),
36
- # delete_cache
37
- gr.Checkbox(
38
  value=False,
39
- label=f"{backend_name}.delete_cache",
40
- info="Deletes model cache (weights & configs) after benchmark is done",
41
  ),
42
- ]
 
 
 
 
 
43
 
44
 
45
  def get_pytorch_config():
46
- return get_base_backend_config(backend_name="pytorch") + [
47
- # no_weights
48
- gr.Checkbox(
49
- value=False,
50
- label="pytorch.no_weights",
51
- info="Generates random weights instead of downloading pretrained ones",
52
- ),
53
- # # device_map
54
- # gr.Dropdown(
55
- # value="null",
56
- #
57
- # label="pytorch.device_map",
58
- # choices=["null", "auto", "sequential"],
59
- # info="Use null for default and `auto` or `sequential` the same way as in `from_pretrained`",
60
- # ),
61
- # torch_dtype
62
- gr.Dropdown(
63
- value="null",
64
  label="pytorch.torch_dtype",
65
- choices=["null", "bfloat16", "float16", "float32", "auto"],
66
- info="Use null for default and `auto` for automatic dtype selection",
67
- ),
68
- # amp_autocast
69
- gr.Checkbox(
70
- value=False,
71
- label="pytorch.amp_autocast",
72
- info="Enables Pytorch's native Automatic Mixed Precision",
73
- ),
74
- # amp_dtype
75
- gr.Dropdown(
76
- value="null",
77
- label="pytorch.amp_dtype",
78
- info="Use null for default",
79
- choices=["null", "bfloat16", "float16"],
80
  ),
81
- # torch_compile
82
- gr.Checkbox(
83
  value=False,
84
  label="pytorch.torch_compile",
85
  info="Compiles the model with torch.compile",
86
  ),
87
- # bettertransformer
88
- gr.Checkbox(
89
- value=False,
90
- label="pytorch.bettertransformer",
91
- info="Applies optimum.BetterTransformer for fastpath anf optimized attention",
92
- ),
93
- # quantization_scheme
94
- gr.Dropdown(
95
- value="null",
96
- choices=["null", "gptq", "bnb"],
97
- label="pytorch.quantization_scheme",
98
- info="Use null for no quantization",
99
- ),
100
- # # use_ddp
101
- # gr.Checkbox(
102
- # value=False,
103
- #
104
- # label="pytorch.use_ddp",
105
- # info="Uses DistributedDataParallel for multi-gpu training",
106
- # ),
107
- # peft_strategy
108
- gr.Dropdown(
109
- value="null",
110
- choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
111
- label="pytorch.peft_strategy",
112
- info="Use null for no PEFT",
113
- ),
114
- ]
115
 
116
 
117
  def get_onnxruntime_config():
118
- return get_base_backend_config(backend_name="onnxruntime") + [
119
- # no_weights
120
- gr.Checkbox(
121
- value=False,
122
- label="pytorch.no_weights",
123
- info="Generates random weights instead of downloading pretrained ones",
124
- ),
125
- # export
126
- gr.Checkbox(
127
  value=True,
128
  label="onnxruntime.export",
129
  info="Exports the model to ONNX",
130
  ),
131
- # use_cache
132
- gr.Checkbox(
133
  value=True,
134
  label="onnxruntime.use_cache",
135
  info="Uses cached ONNX model if available",
136
  ),
137
- # use_merged
138
- gr.Checkbox(
139
- value=False,
140
  label="onnxruntime.use_merged",
141
  info="Uses merged ONNX model if available",
142
  ),
143
- # torch_dtype
144
- gr.Dropdown(
145
- value="null",
146
  label="onnxruntime.torch_dtype",
147
- choices=["null", "bfloat16", "float16", "float32", "auto"],
148
- info="Use null for default and `auto` for automatic dtype selection",
149
- ),
150
- # use_io_binding
151
- gr.Checkbox(
152
- value=True,
153
- label="onnxruntime.use_io_binding",
154
- info="Uses IO binding for inference",
155
- ),
156
- # auto_optimization
157
- gr.Dropdown(
158
- value="null",
159
- label="onnxruntime.auto_optimization",
160
- choices=["null", "O1", "O2", "O3", "O4"],
161
- info="Use null for default",
162
- ),
163
- # auto_quantization
164
- gr.Dropdown(
165
- value="null",
166
- label="onnxruntime.auto_quantization",
167
- choices=["null", "arm64", "avx2", "avx512", "avx512_vnni", "tensorrt"],
168
- info="Use null for default",
169
- ),
170
- # optimization
171
- gr.Checkbox(
172
- value=False,
173
- label="onnxruntime.optimization",
174
- info="Enables manual optimization",
175
- ),
176
- # optimization_config
177
- gr.Dataframe(
178
- type="array",
179
- value=[["optimization_level"]],
180
- headers=["1"],
181
- row_count=(1, "static"),
182
- col_count=(1, "dynamic"),
183
- label="onnxruntime.optimization_config",
184
- ),
185
- # quantization
186
- gr.Checkbox(
187
- value=False,
188
- label="onnxruntime.quantization",
189
- info="Enables manual quantization",
190
- ),
191
- # quantization_config
192
- gr.Dataframe(
193
- type="array",
194
- value=[["is_static"]],
195
- headers=[False],
196
- row_count=(1, "static"),
197
- col_count=(1, "dynamic"),
198
- label="onnxruntime.quantization_config",
199
- info="Use null for default",
200
- ),
201
- # calibration
202
- gr.Checkbox(
203
- value=False,
204
- label="onnxruntime.calibration",
205
- info="Enables calibration",
206
  ),
207
- # calibration_config
208
- gr.Dataframe(
209
- type="array",
210
- value=[["glue"]],
211
- headers=["dataset_name"],
212
- row_count=(1, "static"),
213
- col_count=(1, "dynamic"),
214
- label="onnxruntime.calibration_config",
215
- info="Use null for default",
216
- ),
217
- # peft_strategy
218
- gr.Dropdown(
219
- value="null",
220
- label="onnxruntime.peft_strategy",
221
- choices=["null", "lora", "ada_lora", "prompt_tuning", "prefix_tuning", "p_tuning", "ia3"],
222
- info="Use null for full parameters fine-tuning",
223
- ),
224
- ]
225
 
226
 
227
  def get_openvino_config():
228
- return get_base_backend_config(backend_name="openvino") + [
229
- # export
230
- gr.Checkbox(
231
  value=True,
232
  label="openvino.export",
233
  info="Exports the model to ONNX",
234
  ),
235
- # use_cache
236
- gr.Checkbox(
237
  value=True,
238
  label="openvino.use_cache",
239
  info="Uses cached ONNX model if available",
240
  ),
241
- # use_merged
242
- gr.Checkbox(
243
- value=False,
244
  label="openvino.use_merged",
245
  info="Uses merged ONNX model if available",
246
  ),
247
- # reshape
248
- gr.Checkbox(
249
  value=False,
250
  label="openvino.reshape",
251
  info="Reshapes the model to the input shape",
252
  ),
253
- # half
254
- gr.Checkbox(
255
  value=False,
256
  label="openvino.half",
257
  info="Converts model to half precision",
258
  ),
259
- # quantization
260
- gr.Checkbox(
261
- value=False,
262
- label="openvino.quantization",
263
- info="Enables quantization",
264
- ),
265
- # quantization_config
266
- gr.Dataframe(
267
- type="array",
268
- headers=["compression", "input_info", "save_onnx_model"],
269
- value=[[None, None, None]],
270
- row_count=(1, "static"),
271
- col_count=(3, "dynamic"),
272
- label="openvino.quantization_config",
273
- ),
274
- # calibration
275
- gr.Checkbox(
276
- value=False,
277
- label="openvino.calibration",
278
- info="Enables calibration",
279
- ),
280
- # calibration_config
281
- gr.Dataframe(
282
- type="array",
283
- headers=["dataset_name"],
284
- value=[["glue"]],
285
- row_count=(1, "static"),
286
- col_count=(1, "dynamic"),
287
- label="openvino.calibration_config",
288
- ),
289
- ]
290
-
291
-
292
- def get_neural_compressor_config():
293
- return get_base_backend_config(backend_name="neural-compressor") + [
294
- # ptq_quantization
295
- gr.Checkbox(
296
- value=False,
297
- label="neural-compressor.ptq_quantization",
298
- info="Enables post-training quantization",
299
- ),
300
- # ptq_quantization_config
301
- gr.Dataframe(
302
- type="array",
303
- headers=["device"],
304
- value=[["cpu"]],
305
- row_count=(1, "static"),
306
- col_count=(1, "dynamic"),
307
- label="neural-compressor.ptq_quantization_config",
308
- ),
309
- # calibration
310
- gr.Checkbox(
311
- value=False,
312
- label="neural-compressor.calibration",
313
- info="Enables calibration",
314
- ),
315
- # calibration_config
316
- gr.Dataframe(
317
- type="array",
318
- headers=["dataset_name"],
319
- value=[["glue"]],
320
- row_count=(1, "static"),
321
- col_count=(1, "dynamic"),
322
- label="neural-compressor.calibration_config",
323
- ),
324
- ]
325
 
326
 
327
  def get_inference_config():
328
- return [
329
- # duration
330
- gr.Textbox(
 
 
 
 
 
 
 
 
331
  value=10,
 
 
332
  label="inference.duration",
333
- info="Minimum duration of benchmark in seconds",
334
  ),
335
- # warmup runs
336
- gr.Textbox(
337
  value=10,
338
- label="inference.warmup_runs",
339
- info="Number of warmup runs before measurements",
 
 
340
  ),
341
- # memory
342
- gr.Checkbox(
343
- value=False,
344
- label="inference.memory",
345
- info="Measures the peak memory footprint",
346
  ),
347
- # energy
348
- gr.Checkbox(
349
  value=False,
350
- label="inference.energy",
351
- info="Measures energy consumption and carbon emissions",
352
- ),
353
- # input_shapes
354
- gr.Dataframe(
355
- type="array",
356
- value=[[2, 16]],
357
- row_count=(1, "static"),
358
- col_count=(2, "dynamic"),
359
- label="inference.input_shapes",
360
- headers=["batch_size", "sequence_length"],
361
- info="Controllable input shapes, add more columns for more inputs",
362
- ),
363
- # forward kwargs
364
- gr.Dataframe(
365
- type="array",
366
- value=[[False]],
367
- headers=["return_dict"],
368
- row_count=(1, "static"),
369
- col_count=(1, "dynamic"),
370
- label="inference.forward_kwargs",
371
- info="Keyword arguments for the forward pass, add more columns for more arguments",
372
- ),
373
- ]
374
-
375
-
376
- def get_training_config():
377
- return [
378
- # warmup steps
379
- gr.Textbox(
380
- value=40,
381
- label="training.warmup_steps",
382
- ),
383
- # dataset_shapes
384
- gr.Dataframe(
385
- type="array",
386
- value=[[500, 16]],
387
- headers=["dataset_size", "sequence_length"],
388
- row_count=(1, "static"),
389
- col_count=(2, "dynamic"),
390
- label="training.dataset_shapes",
391
- ),
392
- # training_arguments
393
- gr.Dataframe(
394
- value=[[2]],
395
- type="array",
396
- row_count=(1, "static"),
397
- col_count=(1, "dynamic"),
398
- label="training.training_arguments",
399
- headers=["per_device_train_batch_size"],
400
  ),
401
- ]
 
1
  import gradio as gr
2
 
3
 
4
+ def get_process_config():
5
+ return {
6
+ "process.numactl": gr.Checkbox(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  value=False,
8
+ label="process.numactl",
9
+ info="Runs the model with numactl",
10
  ),
11
+ "process.numactl_kwargs": gr.Textbox(
12
+ value="",
13
+ label="process.numactl_kwargs",
14
+ info="Additional python dict of kwargs to pass to numactl",
15
+ ),
16
+ }
17
 
18
 
19
  def get_pytorch_config():
20
+ return {
21
+ "pytorch.torch_dtype": gr.Dropdown(
22
+ value="float32",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  label="pytorch.torch_dtype",
24
+ choices=["bfloat16", "float16", "float32", "auto"],
25
+ info="The dtype to use for the model",
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  ),
27
+ "pytorch.torch_compile": gr.Checkbox(
 
28
  value=False,
29
  label="pytorch.torch_compile",
30
  info="Compiles the model with torch.compile",
31
  ),
32
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
  def get_onnxruntime_config():
36
+ return {
37
+ "onnxruntime.export": gr.Checkbox(
 
 
 
 
 
 
 
38
  value=True,
39
  label="onnxruntime.export",
40
  info="Exports the model to ONNX",
41
  ),
42
+ "onnxruntime.use_cache": gr.Checkbox(
 
43
  value=True,
44
  label="onnxruntime.use_cache",
45
  info="Uses cached ONNX model if available",
46
  ),
47
+ "onnxruntime.use_merged": gr.Checkbox(
48
+ value=True,
 
49
  label="onnxruntime.use_merged",
50
  info="Uses merged ONNX model if available",
51
  ),
52
+ "onnxruntime.torch_dtype": gr.Dropdown(
53
+ value="float32",
 
54
  label="onnxruntime.torch_dtype",
55
+ choices=["bfloat16", "float16", "float32", "auto"],
56
+ info="The dtype to use for the model",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ),
58
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  def get_openvino_config():
62
+ return {
63
+ "openvino.export": gr.Checkbox(
 
64
  value=True,
65
  label="openvino.export",
66
  info="Exports the model to ONNX",
67
  ),
68
+ "openvino.use_cache": gr.Checkbox(
 
69
  value=True,
70
  label="openvino.use_cache",
71
  info="Uses cached ONNX model if available",
72
  ),
73
+ "openvino.use_merged": gr.Checkbox(
74
+ value=True,
 
75
  label="openvino.use_merged",
76
  info="Uses merged ONNX model if available",
77
  ),
78
+ "openvino.reshape": gr.Checkbox(
 
79
  value=False,
80
  label="openvino.reshape",
81
  info="Reshapes the model to the input shape",
82
  ),
83
+ "openvino.half": gr.Checkbox(
 
84
  value=False,
85
  label="openvino.half",
86
  info="Converts model to half precision",
87
  ),
88
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  def get_inference_config():
92
+ return {
93
+ "inference.warmup_runs": gr.Slider(
94
+ step=1,
95
+ value=10,
96
+ minimum=0,
97
+ maximum=10,
98
+ label="inference.warmup_runs",
99
+ info="Number of warmup runs",
100
+ ),
101
+ "inference.duration": gr.Slider(
102
+ step=1,
103
  value=10,
104
+ minimum=0,
105
+ maximum=10,
106
  label="inference.duration",
107
+ info="Minimum duration of the benchmark in seconds",
108
  ),
109
+ "inference.iterations": gr.Slider(
110
+ step=1,
111
  value=10,
112
+ minimum=0,
113
+ maximum=10,
114
+ label="inference.iterations",
115
+ info="Minimum number of iterations of the benchmark",
116
  ),
117
+ "inference.latency": gr.Checkbox(
118
+ value=True,
119
+ label="inference.latency",
120
+ info="Measures the latency of the model",
 
121
  ),
122
+ "inference.memory": gr.Checkbox(
 
123
  value=False,
124
+ label="inference.memory",
125
+ info="Measures the peak memory consumption",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  ),
127
+ }
configs/base_config.yaml DELETED
@@ -1,15 +0,0 @@
1
- defaults:
2
- - backend: null # default backend
3
- - benchmark: null # default benchmark
4
- - experiment # inheriting experiment schema
5
- - _self_ # for hydra 1.1 compatibility
6
- - override hydra/job_logging: colorlog # colorful logging
7
- - override hydra/hydra_logging: colorlog # colorful logging
8
-
9
- hydra:
10
- run:
11
- dir: runs/${experiment_name}
12
- job:
13
- chdir: true
14
- env_set:
15
- CUDA_VISIBLE_DEVICES: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1 +1 @@
1
- ansi2html==1.8.0
 
1
+ optimum-benchmark[oprnvino,onnxruntime,ipex]@git+https://github.com/huggingface/optimum-benchmark.git
run.py DELETED
@@ -1,189 +0,0 @@
1
- import subprocess
2
- import gradio as gr
3
- import pandas as pd
4
- from ansi2html import Ansi2HTMLConverter
5
-
6
- ansi2html_converter = Ansi2HTMLConverter(inline=True)
7
-
8
-
9
- def run_benchmark(kwargs):
10
- for key, value in kwargs.copy().items():
11
- if key.label == "compare_to_baseline":
12
- baseline = value
13
- kwargs.pop(key)
14
- elif key.label == "experiment_name":
15
- experiment_name = value
16
- kwargs.pop(key)
17
- elif key.label == "model":
18
- model = value
19
- kwargs.pop(key)
20
- elif key.label == "task":
21
- task = value
22
- kwargs.pop(key)
23
- elif key.label == "device":
24
- device = value
25
- kwargs.pop(key)
26
- elif key.label == "backend":
27
- backend = value
28
- kwargs.pop(key)
29
- elif key.label == "benchmark":
30
- benchmark = value
31
- kwargs.pop(key)
32
- else:
33
- continue
34
-
35
- if baseline:
36
- baseline_arguments = [
37
- "optimum-benchmark",
38
- "--config-dir",
39
- "./configs",
40
- "--config-name",
41
- "base_config",
42
- f"backend=pytorch",
43
- f"task={task}",
44
- f"model={model}",
45
- f"device={device}",
46
- f"benchmark={benchmark}",
47
- f"experiment_name=baseline_{experiment_name}",
48
- ]
49
- for component, value in kwargs.items():
50
- if f"{benchmark}." in component.label:
51
- label = component.label.replace(f"{benchmark}.", "benchmark.")
52
- if isinstance(component, gr.Dataframe):
53
- for sub_key, sub_value in zip(component.headers, value[0]):
54
- baseline_arguments.append(f"++{label}.{sub_key}={sub_value}")
55
- else:
56
- baseline_arguments.append(f"{label}={value}")
57
-
58
- # yield from run_experiment(baseline_arguments) but get the return code
59
- baseline_return_code, html_text = yield from run_experiment(baseline_arguments, "")
60
- if baseline_return_code is not None and baseline_return_code != 0:
61
- yield gr.update(value=html_text), gr.update(interactive=True), gr.update(visible=False)
62
- return
63
- else:
64
- html_text = ""
65
-
66
- arguments = [
67
- "optimum-benchmark",
68
- "--config-dir",
69
- "./configs",
70
- "--config-name",
71
- "base_config",
72
- f"task={task}",
73
- f"model={model}",
74
- f"device={device}",
75
- f"backend={backend}",
76
- f"benchmark={benchmark}",
77
- f"experiment_name={experiment_name}",
78
- ]
79
- for component, value in kwargs.items():
80
- if f"{backend}." in component.label or f"{benchmark}." in component.label:
81
- label = component.label.replace(f"{backend}.", "backend.").replace(f"{benchmark}.", "benchmark.")
82
-
83
- if isinstance(component, gr.Dataframe):
84
- for sub_key, sub_value in zip(component.headers, value[0]):
85
- arguments.append(f"++{label}.{sub_key}={sub_value}")
86
- else:
87
- arguments.append(f"{label}={value}")
88
-
89
- return_code, html_text = yield from run_experiment(arguments, html_text)
90
- if return_code is not None and return_code != 0:
91
- yield gr.update(value=html_text), gr.update(interactive=True), gr.update(visible=False)
92
- return
93
-
94
- if baseline:
95
- baseline_table = pd.read_csv(f"runs/baseline_{experiment_name}/{benchmark}_results.csv", index_col=0)
96
- table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
97
- # concat tables
98
- table = pd.concat([baseline_table, table], axis=0)
99
- table = postprocess_table(table, experiment_name)
100
- else:
101
- table = pd.read_csv(f"runs/{experiment_name}/{benchmark}_results.csv", index_col=0)
102
-
103
- table_update = gr.update(visible=True, value={"headers": list(table.columns), "data": table.values.tolist()})
104
- yield gr.update(value=html_text), gr.update(interactive=True), table_update
105
- return
106
-
107
-
108
- def run_experiment(args, html_text=""):
109
- command = "<br>".join(args)
110
- html_text += f"<h3>Running command:</h3>{command}"
111
- yield gr.update(value=html_text), gr.update(interactive=False), gr.update(visible=False)
112
-
113
- # stream subprocess output
114
- process = subprocess.Popen(
115
- args,
116
- stdout=subprocess.PIPE,
117
- stderr=subprocess.STDOUT,
118
- universal_newlines=True,
119
- )
120
-
121
- curr_ansi_text = ""
122
- for ansi_line in iter(process.stdout.readline, ""):
123
- if process.returncode is not None and process.returncode != 0:
124
- break
125
-
126
- # stream process output to stdout
127
- print(ansi_line, end="")
128
- # skip torch.distributed.nn.jit.instantiator messages
129
- if "torch.distributed.nn.jit.instantiator" in ansi_line:
130
- continue
131
- # process download messages
132
- if "Downloading " in curr_ansi_text and "Downloading " in ansi_line:
133
- curr_ansi_text = curr_ansi_text.split("\n")[:-2]
134
- print(curr_ansi_text)
135
- curr_ansi_text.append(ansi_line)
136
- curr_ansi_text = "\n".join(curr_ansi_text)
137
- else:
138
- # append line to ansi text
139
- curr_ansi_text += ansi_line
140
- # convert ansi to html
141
- curr_html_text = ansi2html_converter.convert(curr_ansi_text)
142
- # stream html output to gradio
143
- cumul_html_text = html_text + "<br><h3>Streaming logs:</h3>" + curr_html_text
144
- yield gr.update(value=cumul_html_text), gr.update(interactive=False), gr.update(visible=False)
145
-
146
- return process.returncode, cumul_html_text
147
-
148
-
149
- def postprocess_table(table, experiment_name):
150
- table["experiment_name"] = ["baseline", experiment_name]
151
- table = table.set_index("experiment_name")
152
- table.reset_index(inplace=True)
153
- if "forward.latency(s)" in table.columns:
154
- table["forward.latency.reduction(%)"] = (
155
- table["forward.latency(s)"] / table["forward.latency(s)"].iloc[0] - 1
156
- ) * 100
157
- table["forward.latency.reduction(%)"] = table["forward.latency.reduction(%)"].round(2)
158
-
159
- if "forward.throughput(samples/s)" in table.columns:
160
- table["forward.throughput.speedup(%)"] = (
161
- table["forward.throughput(samples/s)"] / table["forward.throughput(samples/s)"].iloc[0] - 1
162
- ) * 100
163
- table["forward.throughput.speedup(%)"] = table["forward.throughput.speedup(%)"].round(2)
164
-
165
- if "forward.peak_memory(MB)" in table.columns:
166
- table["forward.peak_memory.reduction(%)"] = (
167
- table["forward.peak_memory(MB)"] / table["forward.peak_memory(MB)"].iloc[0] - 1
168
- ) * 100
169
- table["forward.peak_memory.reduction(%)"] = table["forward.peak_memory.reduction(%)"].round(2)
170
-
171
- if "generate.latency(s)" in table.columns:
172
- table["generate.latency.reduction(%)"] = (
173
- table["generate.latency(s)"] / table["generate.latency(s)"].iloc[0] - 1
174
- ) * 100
175
- table["generate.latency.reduction(%)"] = table["generate.latency.reduction(%)"].round(2)
176
-
177
- if "generate.throughput(tokens/s)" in table.columns:
178
- table["generate.throughput.speedup(%)"] = (
179
- table["generate.throughput(tokens/s)"] / table["generate.throughput(tokens/s)"].iloc[0] - 1
180
- ) * 100
181
- table["generate.throughput.speedup(%)"] = table["generate.throughput.speedup(%)"].round(2)
182
-
183
- if "generate.peak_memory(MB)" in table.columns:
184
- table["generate.peak_memory.reduction(%)"] = (
185
- table["generate.peak_memory(MB)"] / table["generate.peak_memory(MB)"].iloc[0] - 1
186
- ) * 100
187
- table["generate.peak_memory.reduction(%)"] = table["generate.peak_memory.reduction(%)"].round(2)
188
-
189
- return table