import random import subprocess import gradio as gr from ansi2html import Ansi2HTMLConverter from optimum_benchmark.task_utils import ( TASKS_TO_AUTOMODELS, infer_task_from_model_name_or_path, ) def get_backend_config(): return [ # seed gr.Textbox(label="backend.seed", value=42), # inter_op_num_threads gr.Textbox( label="backend.inter_op_num_threads", value=None, placeholder=None, ), # intra_op_num_threads gr.Textbox( label="backend.intra_op_num_threads", value=None, placeholder=None, ), # initial_isolation_check gr.Checkbox(label="backend.initial_isolation_check", value=True), # continous_isolation_check gr.Checkbox(label="backend.continous_isolation_check", value=True), # delete_cache gr.Checkbox(label="backend.delete_cache", value=False), ] def get_inference_config(): return [ # duration gr.Textbox(label="benchmark.duration", value=10), # warmup runs gr.Textbox(label="benchmark.warmup_runs", value=1), ] def get_pytorch_config(): return [ # no_weights gr.Checkbox(label="backend.no_weights"), # device_map gr.Dropdown(["auto", "sequential"], label="backend.device_map"), # torch_dtype gr.Dropdown( ["bfloat16", "float16", "float32", "auto"], label="backend.torch_dtype", ), # disable_grad gr.Checkbox(label="backend.disable_grad"), # eval_mode gr.Checkbox(label="backend.eval_mode"), # amp_autocast gr.Checkbox(label="backend.amp_autocast"), # amp_dtype gr.Dropdown(["bfloat16", "float16"], label="backend.amp_dtype"), # torch_compile gr.Checkbox(label="backend.torch_compile"), # bettertransformer gr.Checkbox(label="backend.bettertransformer"), # quantization_scheme gr.Dropdown(["gptq", "bnb"], label="backend.quantization_scheme"), # use_ddp gr.Checkbox(label="backend.use_ddp"), # peft_strategy gr.Textbox(label="backend.peft_strategy"), ] conv = Ansi2HTMLConverter() def run_experiment(kwargs): arguments = [ "optimum-benchmark", "--config-dir", "./", "--config-name", "base_config", ] for key, value in kwargs.items(): arguments.append(f"{key.label}={value if value != '' else 'null'}") # stream subprocess output process = subprocess.Popen( arguments, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, ) ansi_text = "" for ansi_line in iter(process.stdout.readline, ""): # stream process output print(ansi_line, end="") # append line to ansi text ansi_text += ansi_line # convert ansi to html html_text = conv.convert(ansi_text) # extract style from html style = html_text.split('")[0] # parse style into dict style_dict = {} for line in style.split("\n"): if line: key, value = line.split("{") key = key.replace(".", "").strip() value = value.split("}")[0].strip() style_dict[key] = value # replace style in html for key, value in style_dict.items(): html_text = html_text.replace(f'class="{key}"', f'style="{value}"') yield html_text return html_text with gr.Blocks() as demo: # title text gr.HTML("

🤗 Optimum Benchmark 🏋️

") # explanation text gr.Markdown( "This is a demo space of [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark.git)." ) model = gr.Textbox( label="model", value="bert-base-uncased", ) task = gr.Dropdown( label="task", value="text-classification", choices=list(TASKS_TO_AUTOMODELS.keys()), ) device = gr.Dropdown( value="cpu", choices=["cpu", "cuda"], label="device", ) expetiment_name = gr.Textbox( label="experiment_name", value=f"experiment_{random.getrandbits(16)}", ) model.submit(fn=infer_task_from_model_name_or_path, inputs=[model], outputs=[task]) with gr.Row(): with gr.Column(variant="panel"): backend = gr.Dropdown( ["pytorch", "onnxruntime", "openvino", "neural-compressor"], label="backend", value="pytorch", container=True, ) with gr.Column(variant="panel"): with gr.Accordion(label="Backend Config", open=False): backend_config = get_backend_config() + get_pytorch_config() with gr.Row(): with gr.Column(variant="panel"): benchmark = gr.Dropdown( choices=["inference", "training"], label="benchmark", value="inference", container=True, ) with gr.Column(variant="panel"): with gr.Accordion(label="Benchmark Config", open=False): benchmark_config = get_inference_config() # run benchmark button run_benchmark = gr.Button(value="Run Benchmark", variant="primary") # accordion with output logs with gr.Accordion(label="Logs:", open=True): logs = gr.HTML() run_benchmark.click( fn=run_experiment, inputs={ expetiment_name, model, task, device, backend, benchmark, *backend_config, *benchmark_config, }, outputs=[logs], queue=True, ) if __name__ == "__main__": demo.queue().launch()