import os import gradio as gr from src.control_panel import create_control_panel, create_control_callback from src.latency_score_memory import create_lat_score_mem_plot from src.leaderboard import create_leaderboard_table from src.bettertransformer import create_bt_plots from src.flashattentionv2 import create_fa2_plots from src.custom_kernels import create_custom_kernels_plots from src.llm_perf import get_llm_perf_df from src.assets import custom_css from src.content import ( LOGO, TITLE, ABOUT, INTRODUCTION, EXAMPLE_CONFIG, CITATION_BUTTON, CITATION_BUTTON_LABEL, ) MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB 🖥️"} HF_TOKEN = os.environ.get("HF_TOKEN", None) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(LOGO, elem_classes="logo") gr.HTML(TITLE, elem_classes="title") gr.Markdown(INTRODUCTION, elem_classes="descriptive-text") ####################### HARDWARE TABS ####################### with gr.Tabs(elem_classes="tabs"): for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()): with gr.TabItem(hardware, id=id): ####################### CONTROL PANEL ####################### ( filter_button, machine_textbox, search_bar, score_slider, memory_slider, backend_checkboxes, datatype_checkboxes, optimization_checkboxes, quantization_checkboxes, ) = create_control_panel() ####################### HARDWARE SUBTABS ####################### with gr.Tabs(elem_classes="subtabs"): llm_perf_df = get_llm_perf_df(machine=machine) ####################### LEADERBOARD TAB ####################### with gr.TabItem("Leaderboard 🏅", id=0): leaderboard_table = create_leaderboard_table(llm_perf_df) ####################### LAT. vs. SCORE vs. MEM. TAB ####################### with gr.TabItem("Latency vs. Score vs. Memory 📊", id=1): lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df) ####################### BETTERTRANSFORMER SPEEDUP TAB ####################### with gr.TabItem("BetterTransformer Speedup 📈", id=2): bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df) with gr.TabItem("FlashAttentionV2 Speedup 📈", id=3): fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df) with gr.TabItem("Custom Quantization Kernels Comparison 🏆", id=4): custom_kernels_prefill_plot, custom_kernels_decode_plot = create_custom_kernels_plots( llm_perf_df ) ####################### CONTROL CALLBACK ####################### create_control_callback( filter_button, # inputs machine_textbox, search_bar, score_slider, memory_slider, backend_checkboxes, datatype_checkboxes, optimization_checkboxes, quantization_checkboxes, # outputs leaderboard_table, lat_score_mem_plot, bt_prefill_plot, bt_decode_plot, fa2_prefill_plot, fa2_decode_plot, custom_kernels_prefill_plot, custom_kernels_decode_plot, ) ####################### ABOUT TAB ####################### with gr.TabItem("About 📖", id=3): gr.HTML(ABOUT, elem_classes="descriptive-text") gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text") ####################### CITATION with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON, label=CITATION_BUTTON_LABEL, elem_id="citation-button", show_copy_button=True, ) if __name__ == "__main__": # Launch demo demo.queue().launch()