Spaces:
Running
Running
File size: 4,089 Bytes
c8763bd 134a499 c8763bd 134a499 ab5f5f1 6f3a090 0f1bf97 ab5f5f1 0f1bf97 c8763bd efc3d5b 9a3f7b4 ab5f5f1 b3a1bf0 c8763bd 6f3a090 ab5f5f1 4b40065 ab5f5f1 d19e350 ab5f5f1 d19e350 ab5f5f1 d19e350 134a499 d19e350 ab5f5f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
import gradio as gr
from src.control_panel import create_control_panel, create_control_callback
from src.latency_score_memory import create_lat_score_mem_plot
from src.leaderboard import create_leaderboard_table
from src.flashattentionv2 import create_fa2_plots
from src.bettertransformer import create_bt_plots
from src.llm_perf import get_llm_perf_df
from src.assets import custom_css
from src.content import (
LOGO,
TITLE,
ABOUT,
INTRODUCTION,
EXAMPLE_CONFIG,
CITATION_BUTTON,
CITATION_BUTTON_LABEL,
)
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB π₯οΈ"}
HF_TOKEN = os.environ.get("HF_TOKEN", None)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(LOGO, elem_classes="logo")
gr.HTML(TITLE, elem_classes="title")
gr.Markdown(INTRODUCTION, elem_classes="descriptive-text")
####################### HARDWARE TABS #######################
with gr.Tabs(elem_classes="tabs"):
for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
with gr.TabItem(hardware, id=id):
####################### CONTROL PANEL #######################
(
filter_button,
machine_textbox,
search_bar,
score_slider,
memory_slider,
backend_checkboxes,
datatype_checkboxes,
optimization_checkboxes,
quantization_checkboxes,
) = create_control_panel()
####################### HARDWARE SUBTABS #######################
with gr.Tabs(elem_classes="subtabs"):
llm_perf_df = get_llm_perf_df(machine=machine)
####################### LEADERBOARD TAB #######################
with gr.TabItem("Leaderboard π
", id=0):
leaderboard_table = create_leaderboard_table(llm_perf_df)
####################### LAT. vs. SCORE vs. MEM. TAB #######################
with gr.TabItem("Latency vs. Score vs. Memory π", id=1):
lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df)
####################### BETTERTRANSFORMER SPEEDUP TAB #######################
with gr.TabItem("BetterTransformer Speedup π", id=2):
bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df)
with gr.TabItem("FlashAttentionV2 Speedup π", id=3):
fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df)
####################### CONTROL CALLBACK #######################
create_control_callback(
filter_button,
# inputs
machine_textbox,
search_bar,
score_slider,
memory_slider,
backend_checkboxes,
datatype_checkboxes,
optimization_checkboxes,
quantization_checkboxes,
# outputs
leaderboard_table,
lat_score_mem_plot,
bt_prefill_plot,
bt_decode_plot,
fa2_prefill_plot,
fa2_decode_plot,
)
####################### ABOUT TAB #######################
with gr.TabItem("About π", id=3):
gr.HTML(ABOUT, elem_classes="descriptive-text")
gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text")
####################### CITATION
with gr.Row():
with gr.Accordion("π Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
if __name__ == "__main__":
# Launch demo
demo.queue().launch()
|