Spaces:
Running
Running
BenchmarkBot
commited on
Commit
β’
3c37eb3
1
Parent(s):
e2e1ee9
move things around
Browse files- app.py +16 -21
- src/assets/text_content.py +5 -5
app.py
CHANGED
@@ -27,13 +27,13 @@ LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
|
|
27 |
OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
28 |
|
29 |
ALL_COLUMNS_MAPPING = {
|
30 |
-
"weight_class": "Class ποΈ",
|
31 |
-
"model_type": "Type π€",
|
32 |
-
#
|
33 |
"backend.name": "Backend π",
|
34 |
"backend.torch_dtype": "Dtype π₯",
|
35 |
-
"quantization": "Quantization ποΈ",
|
36 |
"optimizations": "Optimizations π οΈ",
|
|
|
|
|
|
|
|
|
37 |
#
|
38 |
"generate.peak_memory(MB)": "Memory (MB) β¬οΈ",
|
39 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
@@ -45,9 +45,9 @@ ALL_COLUMNS_MAPPING = {
|
|
45 |
ALL_COLUMNS_DATATYPES = [
|
46 |
"str",
|
47 |
"str",
|
48 |
-
#
|
49 |
"str",
|
50 |
"str",
|
|
|
51 |
"str",
|
52 |
"str",
|
53 |
#
|
@@ -85,21 +85,16 @@ def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
|
|
85 |
merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
|
86 |
lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
|
87 |
)
|
88 |
-
# distance to 100% score
|
89 |
-
score_distance =
|
90 |
-
# distance to 0s latency
|
91 |
-
latency_distance = merged_df["generate.latency(s)"]
|
92 |
-
|
93 |
-
)
|
94 |
-
#
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
)
|
99 |
-
# add perf distance
|
100 |
-
merged_df["perf_distance"] = (
|
101 |
-
score_distance**2 + latency_distance**2 + memory_distance**2
|
102 |
-
) ** 0.5
|
103 |
|
104 |
return merged_df
|
105 |
|
@@ -121,7 +116,7 @@ def get_benchmark_table(bench_df):
|
|
121 |
# rename
|
122 |
copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
123 |
# transform
|
124 |
-
copy_df["
|
125 |
copy_df["Best Scored LLM π"] = copy_df["Best Scored LLM π"].apply(
|
126 |
process_model_name
|
127 |
)
|
|
|
27 |
OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
28 |
|
29 |
ALL_COLUMNS_MAPPING = {
|
|
|
|
|
|
|
30 |
"backend.name": "Backend π",
|
31 |
"backend.torch_dtype": "Dtype π₯",
|
|
|
32 |
"optimizations": "Optimizations π οΈ",
|
33 |
+
"quantization": "Quantization ποΈ",
|
34 |
+
#
|
35 |
+
"weight_class": "Class ποΈ",
|
36 |
+
"model_type": "Type π€",
|
37 |
#
|
38 |
"generate.peak_memory(MB)": "Memory (MB) β¬οΈ",
|
39 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
|
|
45 |
ALL_COLUMNS_DATATYPES = [
|
46 |
"str",
|
47 |
"str",
|
|
|
48 |
"str",
|
49 |
"str",
|
50 |
+
#
|
51 |
"str",
|
52 |
"str",
|
53 |
#
|
|
|
85 |
merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
|
86 |
lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
|
87 |
)
|
88 |
+
# # distance to 100% score
|
89 |
+
# score_distance = 100 - merged_df["best_score"]
|
90 |
+
# # distance to 0s latency
|
91 |
+
# latency_distance = merged_df["generate.latency(s)"]
|
92 |
+
# # distance to 0MB memory
|
93 |
+
# memory_distance = merged_df["forward.peak_memory(MB)"]
|
94 |
+
# # add perf distance
|
95 |
+
# merged_df["perf_distance"] = (
|
96 |
+
# score_distance**2 + latency_distance**2 + memory_distance**2
|
97 |
+
# ) ** 0.5
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
return merged_df
|
100 |
|
|
|
116 |
# rename
|
117 |
copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
118 |
# transform
|
119 |
+
copy_df["Type π€"] = copy_df["Type π€"].apply(process_model_type)
|
120 |
copy_df["Best Scored LLM π"] = copy_df["Best Scored LLM π"].apply(
|
121 |
process_model_name
|
122 |
)
|
src/assets/text_content.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
-
TITLE = """<h1 align="center" id="space-title">π€
|
2 |
|
3 |
INTRODUCTION_TEXT = f"""
|
4 |
-
The π€
|
5 |
|
6 |
Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
|
7 |
-
- Model evaluation requests should be made in the [π€ Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the π€
|
8 |
- Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
|
9 |
"""
|
10 |
|
11 |
-
ABOUT_TEXT = """<h3>About the π€
|
12 |
<ul>
|
13 |
<li>To avoid communication-dependent results, only one GPU is used.</li>
|
14 |
<li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
|
@@ -63,7 +63,7 @@ benchmark:
|
|
63 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
64 |
CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
|
65 |
author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
|
66 |
-
title = {
|
67 |
year = {2023},
|
68 |
publisher = {Hugging Face},
|
69 |
howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
|
|
|
1 |
+
TITLE = """<h1 align="center" id="space-title">π€ LLM-Perf Leaderboard ποΈ</h1>"""
|
2 |
|
3 |
INTRODUCTION_TEXT = f"""
|
4 |
+
The π€ LLM-Perf Leaderboard ποΈ aims to benchmark the performance (latency, throughput & memory) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
|
5 |
|
6 |
Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
|
7 |
+
- Model evaluation requests should be made in the [π€ Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the π€ LLM-Perf Leaderboard ποΈ automatically.
|
8 |
- Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
|
9 |
"""
|
10 |
|
11 |
+
ABOUT_TEXT = """<h3>About the π€ LLM-Perf Leaderboard ποΈ</h3>
|
12 |
<ul>
|
13 |
<li>To avoid communication-dependent results, only one GPU is used.</li>
|
14 |
<li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
|
|
|
63 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
64 |
CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
|
65 |
author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
|
66 |
+
title = {LLM-Perf Leaderboard},
|
67 |
year = {2023},
|
68 |
publisher = {Hugging Face},
|
69 |
howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
|