Spaces:
Running
Running
BenchmarkBot
commited on
Commit
•
4cfc121
1
Parent(s):
d3abea5
updated plot
Browse files- app.py +8 -8
- src/utils.py +3 -3
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
import plotly.express as px
|
2 |
import os
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
|
|
5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
6 |
|
7 |
from src.assets.text_content import TITLE, INTRODUCTION_TEXT, SINGLE_A100_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
@@ -19,7 +19,7 @@ COLUMNS_MAPPING = {
|
|
19 |
"backend.torch_dtype": "Datatype 📥",
|
20 |
"forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
|
21 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
|
22 |
-
"h4_score": "H4 Score ⬆️",
|
23 |
}
|
24 |
COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number", "markdown"]
|
25 |
SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
|
@@ -66,7 +66,7 @@ def get_benchmark_plot(benchmark):
|
|
66 |
scores_df = pd.read_csv(
|
67 |
f"./llm-perf-dataset/reports/additional_data.csv")
|
68 |
bench_df = bench_df.merge(scores_df, on="model", how="left")
|
69 |
-
|
70 |
bench_df = bench_df[bench_df["generate.latency(s)"] < 100]
|
71 |
|
72 |
fig = px.scatter(
|
@@ -85,11 +85,11 @@ def get_benchmark_plot(benchmark):
|
|
85 |
},
|
86 |
xaxis_title="Average H4 Score",
|
87 |
yaxis_title="Latency per 1000 Tokens (s)",
|
88 |
-
legend_title="Model Type",
|
89 |
width=1200,
|
90 |
height=600,
|
91 |
)
|
92 |
-
|
93 |
fig.update_traces(
|
94 |
hovertemplate="<br>".join([
|
95 |
"Model: %{customdata[0]}",
|
@@ -174,7 +174,7 @@ with demo:
|
|
174 |
max_rows=None,
|
175 |
visible=False,
|
176 |
)
|
177 |
-
|
178 |
submit_button.click(
|
179 |
submit_query,
|
180 |
[
|
@@ -187,14 +187,14 @@ with demo:
|
|
187 |
with gr.TabItem("🖥️ A100-80GB Plot 📊", id=1):
|
188 |
# Original leaderboard plot
|
189 |
gr.HTML(SINGLE_A100_TEXT)
|
190 |
-
|
191 |
# Original leaderboard plot
|
192 |
single_A100_plotly = gr.components.Plot(
|
193 |
value=single_A100_plot,
|
194 |
elem_id="1xA100-plot",
|
195 |
show_label=False,
|
196 |
)
|
197 |
-
|
198 |
with gr.Row():
|
199 |
with gr.Accordion("📙 Citation", open=False):
|
200 |
citation_button = gr.Textbox(
|
|
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
+
import plotly.express as px
|
5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
6 |
|
7 |
from src.assets.text_content import TITLE, INTRODUCTION_TEXT, SINGLE_A100_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
|
|
19 |
"backend.torch_dtype": "Datatype 📥",
|
20 |
"forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
|
21 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
|
22 |
+
"h4_score": "Average H4 Score ⬆️",
|
23 |
}
|
24 |
COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number", "markdown"]
|
25 |
SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
|
|
|
66 |
scores_df = pd.read_csv(
|
67 |
f"./llm-perf-dataset/reports/additional_data.csv")
|
68 |
bench_df = bench_df.merge(scores_df, on="model", how="left")
|
69 |
+
|
70 |
bench_df = bench_df[bench_df["generate.latency(s)"] < 100]
|
71 |
|
72 |
fig = px.scatter(
|
|
|
85 |
},
|
86 |
xaxis_title="Average H4 Score",
|
87 |
yaxis_title="Latency per 1000 Tokens (s)",
|
88 |
+
legend_title="Model Type, Backend",
|
89 |
width=1200,
|
90 |
height=600,
|
91 |
)
|
92 |
+
|
93 |
fig.update_traces(
|
94 |
hovertemplate="<br>".join([
|
95 |
"Model: %{customdata[0]}",
|
|
|
174 |
max_rows=None,
|
175 |
visible=False,
|
176 |
)
|
177 |
+
|
178 |
submit_button.click(
|
179 |
submit_query,
|
180 |
[
|
|
|
187 |
with gr.TabItem("🖥️ A100-80GB Plot 📊", id=1):
|
188 |
# Original leaderboard plot
|
189 |
gr.HTML(SINGLE_A100_TEXT)
|
190 |
+
|
191 |
# Original leaderboard plot
|
192 |
single_A100_plotly = gr.components.Plot(
|
193 |
value=single_A100_plot,
|
194 |
elem_id="1xA100-plot",
|
195 |
show_label=False,
|
196 |
)
|
197 |
+
|
198 |
with gr.Row():
|
199 |
with gr.Accordion("📙 Citation", open=False):
|
200 |
citation_button = gr.Textbox(
|
src/utils.py
CHANGED
@@ -73,16 +73,16 @@ def extract_score_from_clickable(clickable_score) -> float:
|
|
73 |
|
74 |
|
75 |
def submit_query(text, backends, datatypes, threshold, raw_df):
|
76 |
-
raw_df["H4 Score ⬆️"] = raw_df["H4 Score ⬆️"].apply(
|
77 |
extract_score_from_clickable)
|
78 |
|
79 |
filtered_df = raw_df[
|
80 |
raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
|
81 |
raw_df["Backend 🏭"].isin(backends) &
|
82 |
raw_df["Datatype 📥"].isin(datatypes) &
|
83 |
-
(raw_df["H4 Score ⬆️"] >= threshold)
|
84 |
]
|
85 |
|
86 |
-
filtered_df["H4 Score ⬆️"] = filtered_df["H4 Score ⬆️"].apply(
|
87 |
make_clickable_score)
|
88 |
return filtered_df
|
|
|
73 |
|
74 |
|
75 |
def submit_query(text, backends, datatypes, threshold, raw_df):
|
76 |
+
raw_df["Average H4 Score ⬆️"] = raw_df["Average H4 Score ⬆️"].apply(
|
77 |
extract_score_from_clickable)
|
78 |
|
79 |
filtered_df = raw_df[
|
80 |
raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
|
81 |
raw_df["Backend 🏭"].isin(backends) &
|
82 |
raw_df["Datatype 📥"].isin(datatypes) &
|
83 |
+
(raw_df["Average H4 Score ⬆️"] >= threshold)
|
84 |
]
|
85 |
|
86 |
+
filtered_df["Average H4 Score ⬆️"] = filtered_df["Average H4 Score ⬆️"].apply(
|
87 |
make_clickable_score)
|
88 |
return filtered_df
|