Spaces:
Running
Running
BenchmarkBot
commited on
Commit
β’
9e3eaf4
1
Parent(s):
df1a500
remove about
Browse files- app.py +6 -14
- src/assets/text_content.py +5 -12
app.py
CHANGED
@@ -9,7 +9,6 @@ from src.assets.text_content import (
|
|
9 |
TITLE,
|
10 |
INTRODUCTION_TEXT,
|
11 |
A100_TEXT,
|
12 |
-
ABOUT_TEXT,
|
13 |
CITATION_BUTTON_LABEL,
|
14 |
CITATION_BUTTON_TEXT,
|
15 |
)
|
@@ -29,16 +28,14 @@ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
|
29 |
|
30 |
|
31 |
ALL_COLUMNS_MAPPING = {
|
32 |
-
"best_scored_model": "Best Scored
|
33 |
-
"model_type": "Type π€",
|
34 |
-
"weight_class": "Class ποΈ",
|
35 |
#
|
36 |
"backend.name": "Backend π",
|
37 |
-
"backend.torch_dtype": "
|
38 |
"optimizations": "Optimizations π οΈ",
|
39 |
#
|
40 |
-
# "tradeoff": "Tradeoff* β¬οΈ",
|
41 |
-
#
|
42 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
43 |
"forward.peak_memory(MB)": "Peak Memory (MB) β¬οΈ",
|
44 |
"best_score": "Score (%) β¬οΈ",
|
@@ -53,8 +50,6 @@ ALL_COLUMNS_DATATYPES = [
|
|
53 |
"str",
|
54 |
"str",
|
55 |
#
|
56 |
-
# "number",
|
57 |
-
#
|
58 |
"number",
|
59 |
"number",
|
60 |
"number",
|
@@ -112,8 +107,8 @@ def get_benchmark_table(bench_df):
|
|
112 |
# rename
|
113 |
bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
114 |
# transform
|
115 |
-
bench_df["Type π€"] = bench_df["Type π€"].apply(process_model_type)
|
116 |
-
bench_df["Class ποΈ"] = bench_df["Class ποΈ"].apply(
|
117 |
process_weight_class
|
118 |
)
|
119 |
bench_df["Best Scored Model π"] = bench_df["Best Scored Model π"].apply(
|
@@ -309,9 +304,6 @@ with demo:
|
|
309 |
elem_id="filter-button",
|
310 |
)
|
311 |
|
312 |
-
with gr.TabItem("β About π", id=4):
|
313 |
-
gr.HTML(ABOUT_TEXT)
|
314 |
-
|
315 |
demo.load(
|
316 |
change_tab,
|
317 |
A100_tabs,
|
|
|
9 |
TITLE,
|
10 |
INTRODUCTION_TEXT,
|
11 |
A100_TEXT,
|
|
|
12 |
CITATION_BUTTON_LABEL,
|
13 |
CITATION_BUTTON_TEXT,
|
14 |
)
|
|
|
28 |
|
29 |
|
30 |
ALL_COLUMNS_MAPPING = {
|
31 |
+
"best_scored_model": "Best Scored LLM π",
|
32 |
+
"model_type": "LLM Type π€",
|
33 |
+
"weight_class": "Weight Class ποΈ",
|
34 |
#
|
35 |
"backend.name": "Backend π",
|
36 |
+
"backend.torch_dtype": "Load Datatype π₯",
|
37 |
"optimizations": "Optimizations π οΈ",
|
38 |
#
|
|
|
|
|
39 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
40 |
"forward.peak_memory(MB)": "Peak Memory (MB) β¬οΈ",
|
41 |
"best_score": "Score (%) β¬οΈ",
|
|
|
50 |
"str",
|
51 |
"str",
|
52 |
#
|
|
|
|
|
53 |
"number",
|
54 |
"number",
|
55 |
"number",
|
|
|
107 |
# rename
|
108 |
bench_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
109 |
# transform
|
110 |
+
bench_df["LLM Type π€"] = bench_df["LLM Type π€"].apply(process_model_type)
|
111 |
+
bench_df["Weight Class ποΈ"] = bench_df["Weight Class ποΈ"].apply(
|
112 |
process_weight_class
|
113 |
)
|
114 |
bench_df["Best Scored Model π"] = bench_df["Best Scored Model π"].apply(
|
|
|
304 |
elem_id="filter-button",
|
305 |
)
|
306 |
|
|
|
|
|
|
|
307 |
demo.load(
|
308 |
change_tab,
|
309 |
A100_tabs,
|
src/assets/text_content.py
CHANGED
@@ -10,18 +10,11 @@ Anyone from the community can request a model or a hardware/backend/optimization
|
|
10 |
|
11 |
A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
|
12 |
<ul>
|
13 |
-
<li>
|
14 |
-
<li>
|
15 |
-
|
16 |
-
""
|
17 |
-
|
18 |
-
ABOUT_TEXT = """<h3>About the benchmarks:</h3>
|
19 |
-
<ul>
|
20 |
-
<li>The performances benchmarks were obtained using <a href="https://github.com/huggingface/optimum-benchmark">Optimum-Benchmark</a>.</li>
|
21 |
-
<li>Throughput is measured in tokens per second when generating 1000 tokens with a batch size of 1.</li>
|
22 |
-
<li>Peak memory is measured in MB during the first forward pass of the model (no warmup).</li>
|
23 |
-
<li>Open LLM Score is an average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a>.</li>
|
24 |
-
<li>Open LLM Tradeoff is the euclidean distance between an LLM and the "perfect LLM" (i.e. 0 latency and 100% accuracy) translating the tradeoff between latency and accuracy.</li>
|
25 |
</ul>
|
26 |
"""
|
27 |
|
|
|
10 |
|
11 |
A100_TEXT = """<h3>Single-GPU Benchmark (1xA100):</h3>
|
12 |
<ul>
|
13 |
+
<li>LLMs are evaluated on a singleton batch and genrating a thousand tokens.</li>
|
14 |
+
<li>Peak memory is measured in MB during the first forward pass of the LLM (no warmup).</li>
|
15 |
+
<li>Each pair of (LLM Type, Weight Class) is represented by the best scored LLM. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
|
16 |
+
<li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a>.</li>
|
17 |
+
<li>Ranking is based on the euclidean distance from "perfect LLM" (i.e. 0 latency and 100% accuracy).</li>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
</ul>
|
19 |
"""
|
20 |
|