Spaces:
Running
Running
Arnav Chavan
commited on
Commit
β’
c5bc8e4
1
Parent(s):
c113723
alignment fix and desription change
Browse files- README.md +16 -4
- app.py +8 -5
- dataset/llm-perf-leaderboard-Raspberry Pi 5(8GB).csv +127 -127
- hardware.yaml +0 -1
- src/assets.py +24 -3
- src/content.py +18 -3
- src/hardware.py +0 -1
- src/llm_perf.py +1 -1
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Edge
|
3 |
emoji: π
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
@@ -11,10 +11,10 @@ license: apache-2.0
|
|
11 |
tags: [edge llm leaderboard, llm edge leaderboard, llm, edge, leaderboard]
|
12 |
---
|
13 |
|
14 |
-
# LLM
|
15 |
|
16 |
## π About
|
17 |
-
The Edge
|
18 |
Its aim is to benchmark the performance (throughput and memory)
|
19 |
of Large Language Models (LLMs) on Edge hardware - starting with a Raspberry Pi 5 (8GB) based on the ARM Cortex A76 CPU.
|
20 |
|
@@ -32,9 +32,21 @@ All of our throughput benchmarks are ran by this single tool
|
|
32 |
[llama-bench](https://github.com/ggerganov/llama.cpp/tree/master/examples/llama-bench)
|
33 |
using the power of [llama.cpp](https://github.com/ggerganov/llama.cpp) to guarantee reproducibility and consistency.
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
## π How to run locally
|
36 |
|
37 |
-
To run the Edge
|
38 |
|
39 |
### 1. Clone the Repository
|
40 |
|
|
|
1 |
---
|
2 |
+
title: Edge LLM Leaderboard
|
3 |
emoji: π
|
4 |
colorFrom: red
|
5 |
colorTo: blue
|
|
|
11 |
tags: [edge llm leaderboard, llm edge leaderboard, llm, edge, leaderboard]
|
12 |
---
|
13 |
|
14 |
+
# Edge LLM leaderboard
|
15 |
|
16 |
## π About
|
17 |
+
The Edge LLM Leaderboard is a leaderboard to gauge practical performance and quality of edge LLMs.
|
18 |
Its aim is to benchmark the performance (throughput and memory)
|
19 |
of Large Language Models (LLMs) on Edge hardware - starting with a Raspberry Pi 5 (8GB) based on the ARM Cortex A76 CPU.
|
20 |
|
|
|
32 |
[llama-bench](https://github.com/ggerganov/llama.cpp/tree/master/examples/llama-bench)
|
33 |
using the power of [llama.cpp](https://github.com/ggerganov/llama.cpp) to guarantee reproducibility and consistency.
|
34 |
|
35 |
+
## π Ranking Models
|
36 |
+
|
37 |
+
We use MMLU (zero-shot) via [llama-perplexity](https://github.com/ggerganov/llama.cpp/tree/master/examples/perplexity) for performance evaluation, focusing on key metrics relevant for edge applications:
|
38 |
+
|
39 |
+
1. **Prefill Latency (Time to First Token - TTFT):** Measures the time to generate the first token. Low TTFT ensures a smooth user experience, especially for real-time interactions in edge use cases.
|
40 |
+
|
41 |
+
2. **Decode Latency (Generation Speed):** Indicates the speed of generating subsequent tokens, critical for real-time tasks like transcription or extended dialogue sessions.
|
42 |
+
|
43 |
+
3. **Model Size:** Smaller models are better suited for edge devices with limited secondary storage compared to cloud or GPU systems, making efficient deployment possible.
|
44 |
+
|
45 |
+
These metrics collectively address the unique challenges of deploying LLMs on edge devices, balancing performance, responsiveness, and memory constraints.
|
46 |
+
|
47 |
## π How to run locally
|
48 |
|
49 |
+
To run the Edge LLM Leaderboard locally on your machine, follow these steps:
|
50 |
|
51 |
### 1. Clone the Repository
|
52 |
|
app.py
CHANGED
@@ -4,7 +4,7 @@ import src.dependency # noqa
|
|
4 |
from src.assets import custom_css
|
5 |
|
6 |
# from src.attention import create_attn_plots
|
7 |
-
from src.content import ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, LOGO, TITLE
|
8 |
from src.hardware import load_hardware_configs
|
9 |
from src.leaderboard import create_leaderboard_table
|
10 |
from src.llm_perf import get_llm_perf_df
|
@@ -22,12 +22,14 @@ demo = gr.Blocks(
|
|
22 |
theme=gr.themes.Default(primary_hue="indigo", secondary_hue="indigo"),
|
23 |
)
|
24 |
with demo:
|
25 |
-
gr.
|
26 |
-
|
|
|
|
|
27 |
####################### HARDWARE TABS #######################
|
28 |
with gr.Tabs(elem_classes="tabs"):
|
29 |
for id, config in enumerate(configs):
|
30 |
-
with gr.TabItem(
|
31 |
####################### HARDWARE DETAILS #######################
|
32 |
if config.detail:
|
33 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
@@ -57,7 +59,7 @@ with demo:
|
|
57 |
hardware_type=config.hardware_type,
|
58 |
)
|
59 |
####################### LEADERBOARD TAB #######################
|
60 |
-
with gr.TabItem("
|
61 |
search_bar, columns_checkboxes, leaderboard_table = (
|
62 |
create_leaderboard_table(open_llm_perf_df)
|
63 |
)
|
@@ -114,6 +116,7 @@ with demo:
|
|
114 |
label=CITATION_BUTTON_LABEL,
|
115 |
elem_id="citation-button",
|
116 |
show_copy_button=True,
|
|
|
117 |
)
|
118 |
|
119 |
if __name__ == "__main__":
|
|
|
4 |
from src.assets import custom_css
|
5 |
|
6 |
# from src.attention import create_attn_plots
|
7 |
+
from src.content import ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, LOGO, LOGO2, TITLE
|
8 |
from src.hardware import load_hardware_configs
|
9 |
from src.leaderboard import create_leaderboard_table
|
10 |
from src.llm_perf import get_llm_perf_df
|
|
|
22 |
theme=gr.themes.Default(primary_hue="indigo", secondary_hue="indigo"),
|
23 |
)
|
24 |
with demo:
|
25 |
+
with gr.Row():
|
26 |
+
gr.HTML(LOGO, elem_classes="logo")
|
27 |
+
gr.HTML(LOGO2, elem_classes="logo2")
|
28 |
+
gr.HTML(TITLE, elem_classes="title")
|
29 |
####################### HARDWARE TABS #######################
|
30 |
with gr.Tabs(elem_classes="tabs"):
|
31 |
for id, config in enumerate(configs):
|
32 |
+
with gr.TabItem("Leaderboard", id=id):
|
33 |
####################### HARDWARE DETAILS #######################
|
34 |
if config.detail:
|
35 |
gr.Markdown(config.detail, elem_classes="descriptive-text")
|
|
|
59 |
hardware_type=config.hardware_type,
|
60 |
)
|
61 |
####################### LEADERBOARD TAB #######################
|
62 |
+
with gr.TabItem("Raspberry Pi 5 - Cortex A76", id=0):
|
63 |
search_bar, columns_checkboxes, leaderboard_table = (
|
64 |
create_leaderboard_table(open_llm_perf_df)
|
65 |
)
|
|
|
116 |
label=CITATION_BUTTON_LABEL,
|
117 |
elem_id="citation-button",
|
118 |
show_copy_button=True,
|
119 |
+
lines=7
|
120 |
)
|
121 |
|
122 |
if __name__ == "__main__":
|
dataset/llm-perf-leaderboard-Raspberry Pi 5(8GB).csv
CHANGED
@@ -1,129 +1,129 @@
|
|
1 |
Model,Quantization,Params (B),Model Size (GB),Prefill (tokens/s),Decode (tokens/s),Backend,MMLU Accuracy
|
2 |
-
gemma-2-9b,Q8_0,10.159,10.796,2.169,0.012,llama_cpp,42.
|
3 |
-
DeepSeek-V2-Lite,Q4_K_M,15.706,10.36,4.304,1.764,llama_cpp,38.
|
4 |
-
aya-expanse-8b,Q8_0,9.077,9.644,3.1,0.027,llama_cpp,41.
|
5 |
-
aya-23-8B,Q8_0,9.077,9.644,3.174,0.027,llama_cpp,38.
|
6 |
-
Yi-1.5-9B,Q8_0,8.829,9.382,2.585,0.019,llama_cpp,41.
|
7 |
-
Qwen2.5-14B,Q4_K_M,14.77,8.982,1.916,0.018,llama_cpp,42.
|
8 |
-
DeepSeek-V2-Lite,Q4_0_4_4,15.706,8.901,7.788,3.867,llama_cpp,38.
|
9 |
-
Phi-3-medium-128k-instruct,Q4_K_M,13.96,8.566,1.819,0.02,llama_cpp,42.
|
10 |
-
Hermes-3-Llama-3.1-8B,Q8_0,8.03,8.533,3.286,0.922,llama_cpp,41.
|
11 |
-
Qwen2.5-14B,Q4_0_4_4,14.77,8.512,4.698,0.028,llama_cpp,42.
|
12 |
-
internlm2_5-7b-chat,Q8_0,7.738,8.222,3.258,1.238,llama_cpp,41.
|
13 |
-
dolphin-2.9.2-qwen2-7b,Q8_0,7.616,8.093,4.241,1.301,llama_cpp,38.
|
14 |
-
Qwen2.5-7B,Q8_0,7.616,8.093,4.253,1.302,llama_cpp,40.
|
15 |
-
Phi-3-medium-128k-instruct,Q4_0_4_4,13.96,7.896,4.715,0.038,llama_cpp,42.
|
16 |
-
NexusRaven-V2-13B,Q4_K_M,13.016,7.865,2.066,0.035,llama_cpp,32.
|
17 |
-
Mistral-7B-Instruct-v0.3,Q8_0,7.248,7.702,4.104,1.29,llama_cpp,43.
|
18 |
-
dolphin-2.9.3-mistral-7B-32k,Q8_0,7.248,7.702,4.135,1.294,llama_cpp,40.
|
19 |
-
Yarn-Mistral-7b-128k,Q8_0,7.242,7.695,4.082,1.292,llama_cpp,40.
|
20 |
-
Starling-LM-7B-beta,Q8_0,7.242,7.695,4.132,1.296,llama_cpp,41.
|
21 |
-
Mistral-Nemo-Base-2407,Q4_K_M,12.248,7.469,2.453,1.358,llama_cpp,41.
|
22 |
-
NexusRaven-V2-13B,Q4_0_4_4,13.016,7.365,4.979,1.348,llama_cpp,
|
23 |
-
OLMoE-1B-7B-0924,Q8_0,6.919,7.358,26.942,7.489,llama_cpp,38.
|
24 |
-
OLMo-7B-0724-hf,Q8_0,6.888,7.319,4.515,1.371,llama_cpp,36.
|
25 |
-
mpt-7b-instruct,Q8_0,6.856,7.285,4.287,1.367,llama_cpp,35.
|
26 |
-
Amber,Q8_0,6.738,7.16,4.442,1.373,llama_cpp,33.
|
27 |
-
Mistral-Nemo-Base-2407,Q4_0_4_4,12.248,7.064,9.103,1.48,llama_cpp,41.
|
28 |
-
gemma-2-9b,Q4_K_M,10.159,6.508,3.531,1.629,llama_cpp,41.
|
29 |
-
Yarn-Solar-10b-64k,Q4_K_M,10.732,6.461,2.905,1.503,llama_cpp,38.
|
30 |
-
SOLAR-10.7B-v1.0,Q4_K_M,10.732,6.461,2.925,1.505,llama_cpp,39.
|
31 |
-
SOLAR-10.7B-Instruct-v1.0,Q4_K_M,10.732,6.461,2.916,1.506,llama_cpp,40.
|
32 |
-
Yi-1.5-6B,Q8_0,6.061,6.441,5.269,1.584,llama_cpp,39.
|
33 |
-
gemma-2-9b,Q4_0_4_4,10.159,6.19,10.553,1.757,llama_cpp,42.
|
34 |
-
SOLAR-10.7B-v1.0,Q4_0_4_4,10.732,6.072,9.315,1.635,llama_cpp,39.
|
35 |
-
SOLAR-10.7B-Instruct-v1.0,Q4_0_4_4,10.732,6.072,9.332,1.635,llama_cpp,40.
|
36 |
-
Yarn-Solar-10b-64k,Q4_0_4_4,10.732,6.072,9.352,1.638,llama_cpp,39.
|
37 |
-
aya-expanse-8b,Q4_K_M,9.077,5.906,4.406,1.911,llama_cpp,41.
|
38 |
-
aya-23-8B,Q4_K_M,9.077,5.906,4.428,1.914,llama_cpp,37.
|
39 |
-
aya-expanse-8b,Q4_0_4_4,9.077,5.647,14.074,2.05,llama_cpp,41.
|
40 |
-
aya-23-8B,Q4_0_4_4,9.077,5.647,14.113,2.051,llama_cpp,38.
|
41 |
-
Yi-1.5-9B,Q4_K_M,8.829,5.327,3.681,1.85,llama_cpp,41.
|
42 |
-
Yi-1.5-9B,Q4_0_4_4,8.829,5.035,11.33,2.0,llama_cpp,40.
|
43 |
-
Hermes-3-Llama-3.1-8B,Q4_K_M,8.03,4.913,4.375,2.078,llama_cpp,41.
|
44 |
-
Llama-3.1-8B,Q4_K_M,8.03,4.913,4.403,2.086,llama_cpp,40.
|
45 |
-
internlm2_5-7b-chat,Q4_K_M,7.738,4.711,4.4,2.133,llama_cpp,41.
|
46 |
-
Qwen2.5-7B,Q4_K_M,7.616,4.677,4.769,2.201,llama_cpp,40.
|
47 |
-
dolphin-2.9.2-qwen2-7b,Q4_K_M,7.616,4.677,4.759,2.204,llama_cpp,38.
|
48 |
-
Llama-3.1-8B,Q4_0_4_4,8.03,4.653,13.99,2.245,llama_cpp,39.
|
49 |
-
Hermes-3-Llama-3.1-8B,Q4_0_4_4,8.03,4.653,14.006,2.245,llama_cpp,40.
|
50 |
-
internlm2_5-7b-chat,Q4_0_4_4,7.738,4.451,14.036,2.31,llama_cpp,41.
|
51 |
-
mpt-7b-instruct,Q4_K_M,6.856,4.442,4.162,2.213,llama_cpp,35.
|
52 |
-
Qwen2.5-7B,Q4_0_4_4,7.616,4.425,15.563,2.386,llama_cpp,40.
|
53 |
-
dolphin-2.9.2-qwen2-7b,Q4_0_4_4,7.616,4.425,15.58,2.387,llama_cpp,37.
|
54 |
-
dolphin-2.9.3-mistral-7B-32k,Q4_K_M,7.248,4.372,4.387,2.227,llama_cpp,39.
|
55 |
-
Mistral-7B-Instruct-v0.3,Q4_K_M,7.248,4.372,4.462,2.241,llama_cpp,42.
|
56 |
-
Starling-LM-7B-beta,Q4_K_M,7.242,4.368,4.406,2.234,llama_cpp,41.
|
57 |
-
Yarn-Mistral-7b-128k,Q4_K_M,7.242,4.368,4.434,2.245,llama_cpp,40.
|
58 |
-
OLMoE-1B-7B-0924,Q4_K_M,6.919,4.212,26.902,12.119,llama_cpp,38.
|
59 |
-
OLMo-7B-0724-hf,Q4_K_M,6.888,4.183,4.706,2.339,llama_cpp,36.
|
60 |
-
dolphin-2.9.3-mistral-7B-32k,Q4_0_4_4,7.248,4.113,14.053,2.427,llama_cpp,40.
|
61 |
-
Mistral-7B-Instruct-v0.3,Q4_0_4_4,7.248,4.113,14.177,2.43,llama_cpp,42.
|
62 |
-
Starling-LM-7B-beta,Q4_0_4_4,7.242,4.108,14.068,2.427,llama_cpp,41.
|
63 |
-
Yarn-Mistral-7b-128k,Q4_0_4_4,7.242,4.108,14.139,2.436,llama_cpp,40.
|
64 |
-
Amber,Q4_K_M,6.738,4.08,4.594,2.351,llama_cpp,32.
|
65 |
-
Phi-3.5-mini-instruct,Q8_0,3.821,4.06,7.951,2.423,llama_cpp,41.
|
66 |
-
Phi-3-mini-128k-instruct,Q8_0,3.821,4.06,7.947,2.426,llama_cpp,41.
|
67 |
-
mpt-7b-instruct,Q4_0_4_4,6.856,3.964,14.569,2.533,llama_cpp,34.
|
68 |
-
OLMoE-1B-7B-0924,Q4_0_4_4,6.919,3.926,50.413,12.989,llama_cpp,
|
69 |
-
Llama-3.2-3B,Q8_0,3.607,3.833,10.31,2.83,llama_cpp,37.
|
70 |
-
Amber,Q4_0_4_4,6.738,3.825,14.442,2.57,llama_cpp,33.
|
71 |
-
Yi-1.5-6B,Q4_K_M,6.061,3.672,5.58,2.72,llama_cpp,39.
|
72 |
-
Qwen2.5-3B,Q8_0,3.397,3.61,10.473,2.939,llama_cpp,38.
|
73 |
-
Yi-1.5-6B,Q4_0_4_4,6.061,3.478,17.017,2.945,llama_cpp,39.
|
74 |
-
dolphin-2.9.4-gemma2-2b,Q8_0,3.204,3.405,13.966,3.381,llama_cpp,37.
|
75 |
-
gemma-2-2b,Q8_0,3.204,3.405,13.996,3.385,llama_cpp,37.
|
76 |
-
stable-code-instruct-3b,Q8_0,2.795,2.971,10.668,3.316,llama_cpp,29.
|
77 |
-
Phi-3.5-mini-instruct,Q4_K_M,3.821,2.393,7.502,3.936,llama_cpp,41.
|
78 |
-
Phi-3-mini-128k-instruct,Q4_K_M,3.821,2.393,7.519,3.938,llama_cpp,40.
|
79 |
-
Llama-3.2-3B,Q4_K_M,3.607,2.335,10.691,4.674,llama_cpp,37.
|
80 |
-
Llama-3.2-3B,Q4_0_4_4,3.607,2.233,31.72,5.025,llama_cpp,36.
|
81 |
-
gemma-2-2b,Q4_K_M,3.204,2.186,14.202,5.253,llama_cpp,
|
82 |
-
dolphin-2.9.4-gemma2-2b,Q4_K_M,3.204,2.186,14.218,5.253,llama_cpp,37.
|
83 |
-
Qwen2.5-3B,Q4_K_M,3.397,2.179,10.638,4.808,llama_cpp,38.
|
84 |
-
Phi-3.5-mini-instruct,Q4_0_4_4,3.821,2.175,23.369,4.428,llama_cpp,41.
|
85 |
-
Phi-3-mini-128k-instruct,Q4_0_4_4,3.821,2.175,23.461,4.436,llama_cpp,40.
|
86 |
-
gemma-2-2b,Q4_0_4_4,3.204,2.107,40.616,5.552,llama_cpp,37.
|
87 |
-
dolphin-2.9.4-gemma2-2b,Q4_0_4_4,3.204,2.107,40.977,5.58,llama_cpp,37.
|
88 |
-
Qwen2.5-3B,Q4_0_4_4,3.397,2.072,32.434,5.239,llama_cpp,37.
|
89 |
-
internlm2_5-1_8b-chat,Q8_0,1.889,2.007,19.329,5.279,llama_cpp,
|
90 |
-
SmolLM2-1.7B-Instruct,Q8_0,1.812,1.926,17.524,5.177,llama_cpp,
|
91 |
-
Qwen2.5-1.5B,Q8_0,1.777,1.889,21.927,5.793,llama_cpp,35.
|
92 |
stable-code-instruct-3b,Q4_K_M,2.795,1.707,10.803,5.564,llama_cpp,29.8
|
93 |
-
stable-code-instruct-3b,Q4_0_4_4,2.795,1.607,28.926,5.957,llama_cpp,29.
|
94 |
-
Llama-3.2-1B,Q8_0,1.498,1.592,29.722,7.295,llama_cpp,
|
95 |
-
Yi-Coder-1.5B,Q8_0,1.476,1.569,23.894,6.596,llama_cpp,29.
|
96 |
-
OLMo-1B-0724-hf,Q8_0,1.28,1.36,27.787,7.591,llama_cpp,31.
|
97 |
-
Qwen2.5-1.5B,Q4_K_M,1.777,1.172,22.326,9.56,llama_cpp,35.
|
98 |
-
internlm2_5-1_8b-chat,Q4_K_M,1.889,1.17,19.453,8.56,llama_cpp,33.
|
99 |
-
TinyLlama-1.1B-Chat-v1.0,Q8_0,1.1,1.169,28.472,8.637,llama_cpp,30.
|
100 |
-
TinyLlama_v1.1,Q8_0,1.1,1.169,28.538,8.652,llama_cpp,28.
|
101 |
-
SmolLM2-1.7B-Instruct,Q4_K_M,1.812,1.136,17.72,8.497,llama_cpp,35.
|
102 |
-
Qwen2.5-1.5B,Q4_0_4_4,1.777,1.12,65.915,10.128,llama_cpp,35.
|
103 |
-
internlm2_5-1_8b-chat,Q4_0_4_4,1.889,1.112,57.736,9.243,llama_cpp,32.
|
104 |
-
SmolLM2-1.7B-Instruct,Q4_0_4_4,1.812,1.072,50.27,9.239,llama_cpp,35.
|
105 |
-
Llama-3.2-1B,Q4_K_M,1.498,1.015,30.451,11.51,llama_cpp,33.
|
106 |
-
Llama-3.2-1B,Q4_0_4_4,1.498,0.979,86.772,12.364,llama_cpp,33.
|
107 |
-
Yi-Coder-1.5B,Q4_K_M,1.476,0.962,23.267,10.03,llama_cpp,29.
|
108 |
-
Yi-Coder-1.5B,Q4_0_4_4,1.476,0.865,67.713,11.422,llama_cpp,29.
|
109 |
-
OLMo-1B-0724-hf,Q4_K_M,1.28,0.79,28.276,12.321,llama_cpp,31.
|
110 |
-
OLMo-1B-0724-hf,Q4_0_4_4,1.28,0.746,84.882,13.339,llama_cpp,31.
|
111 |
-
Qwen2.5-0.5B,Q8_0,0.63,0.67,75.456,18.06,llama_cpp,31.
|
112 |
-
TinyLlama-1.1B-Chat-v1.0,Q4_K_M,1.1,0.667,29.44,14.305,llama_cpp,30.
|
113 |
-
TinyLlama_v1.1,Q4_K_M,1.1,0.667,29.397,14.346,llama_cpp,28.
|
114 |
-
TinyLlama-1.1B-Chat-v1.0,Q4_0_4_4,1.1,0.636,77.823,15.509,llama_cpp,30.
|
115 |
-
TinyLlama_v1.1,Q4_0_4_4,1.1,0.636,77.943,15.543,llama_cpp,28.
|
116 |
-
Qwen2.5-0.5B,Q4_K_M,0.63,0.537,52.916,22.324,llama_cpp,31.
|
117 |
-
Qwen2.5-0.5B,Q4_0_4_4,0.63,0.491,189.874,26.738,llama_cpp,31.
|
118 |
-
gpt2-medium,Q8_0,0.406,0.436,83.423,23.016,llama_cpp,29.
|
119 |
-
SmolLM2-360M-Instruct,Q8_0,0.409,0.435,79.518,22.857,llama_cpp,32.
|
120 |
-
SmolLM2-360M-Instruct,Q4_K_M,0.409,0.319,55.774,30.718,llama_cpp,31.
|
121 |
-
SmolLM2-360M-Instruct,Q4_0_4_4,0.409,0.277,173.275,37.176,llama_cpp,32.
|
122 |
-
gpt2-medium,Q4_K_M,0.406,0.269,73.615,33.913,llama_cpp,28.
|
123 |
-
gpt2-medium,Q4_0_4_4,0.406,0.247,178.73,37.89,llama_cpp,28.
|
124 |
-
gpt2,Q8_0,0.163,0.176,302.932,68.191,llama_cpp,27.
|
125 |
-
SmolLM2-135M-Instruct,Q8_0,0.163,0.173,212.146,57.992,llama_cpp,29.
|
126 |
-
SmolLM2-135M-Instruct,Q4_K_M,0.163,0.134,153.439,73.272,llama_cpp,29.
|
127 |
-
SmolLM2-135M-Instruct,Q4_0_4_4,0.163,0.12,381.667,86.735,llama_cpp,29.
|
128 |
-
gpt2,Q4_K_M,0.163,0.111,269.906,92.707,llama_cpp,27.
|
129 |
-
gpt2,Q4_0_4_4,0.163,0.105,582.32,101.509,llama_cpp,27.
|
|
|
1 |
Model,Quantization,Params (B),Model Size (GB),Prefill (tokens/s),Decode (tokens/s),Backend,MMLU Accuracy
|
2 |
+
gemma-2-9b,Q8_0,10.159,10.796,2.169,0.012,llama_cpp,42.4
|
3 |
+
DeepSeek-V2-Lite,Q4_K_M,15.706,10.36,4.304,1.764,llama_cpp,38.9
|
4 |
+
aya-expanse-8b,Q8_0,9.077,9.644,3.1,0.027,llama_cpp,41.4
|
5 |
+
aya-23-8B,Q8_0,9.077,9.644,3.174,0.027,llama_cpp,38.3
|
6 |
+
Yi-1.5-9B,Q8_0,8.829,9.382,2.585,0.019,llama_cpp,41.8
|
7 |
+
Qwen2.5-14B,Q4_K_M,14.77,8.982,1.916,0.018,llama_cpp,42.5
|
8 |
+
DeepSeek-V2-Lite,Q4_0_4_4,15.706,8.901,7.788,3.867,llama_cpp,38.6
|
9 |
+
Phi-3-medium-128k-instruct,Q4_K_M,13.96,8.566,1.819,0.02,llama_cpp,42.7
|
10 |
+
Hermes-3-Llama-3.1-8B,Q8_0,8.03,8.533,3.286,0.922,llama_cpp,41.8
|
11 |
+
Qwen2.5-14B,Q4_0_4_4,14.77,8.512,4.698,0.028,llama_cpp,42.1
|
12 |
+
internlm2_5-7b-chat,Q8_0,7.738,8.222,3.258,1.238,llama_cpp,41.7
|
13 |
+
dolphin-2.9.2-qwen2-7b,Q8_0,7.616,8.093,4.241,1.301,llama_cpp,38.5
|
14 |
+
Qwen2.5-7B,Q8_0,7.616,8.093,4.253,1.302,llama_cpp,40.4
|
15 |
+
Phi-3-medium-128k-instruct,Q4_0_4_4,13.96,7.896,4.715,0.038,llama_cpp,42.1
|
16 |
+
NexusRaven-V2-13B,Q4_K_M,13.016,7.865,2.066,0.035,llama_cpp,32.9
|
17 |
+
Mistral-7B-Instruct-v0.3,Q8_0,7.248,7.702,4.104,1.29,llama_cpp,43.2
|
18 |
+
dolphin-2.9.3-mistral-7B-32k,Q8_0,7.248,7.702,4.135,1.294,llama_cpp,40.4
|
19 |
+
Yarn-Mistral-7b-128k,Q8_0,7.242,7.695,4.082,1.292,llama_cpp,40.2
|
20 |
+
Starling-LM-7B-beta,Q8_0,7.242,7.695,4.132,1.296,llama_cpp,41.3
|
21 |
+
Mistral-Nemo-Base-2407,Q4_K_M,12.248,7.469,2.453,1.358,llama_cpp,41.2
|
22 |
+
NexusRaven-V2-13B,Q4_0_4_4,13.016,7.365,4.979,1.348,llama_cpp,33.0
|
23 |
+
OLMoE-1B-7B-0924,Q8_0,6.919,7.358,26.942,7.489,llama_cpp,38.3
|
24 |
+
OLMo-7B-0724-hf,Q8_0,6.888,7.319,4.515,1.371,llama_cpp,36.2
|
25 |
+
mpt-7b-instruct,Q8_0,6.856,7.285,4.287,1.367,llama_cpp,35.3
|
26 |
+
Amber,Q8_0,6.738,7.16,4.442,1.373,llama_cpp,33.1
|
27 |
+
Mistral-Nemo-Base-2407,Q4_0_4_4,12.248,7.064,9.103,1.48,llama_cpp,41.9
|
28 |
+
gemma-2-9b,Q4_K_M,10.159,6.508,3.531,1.629,llama_cpp,41.8
|
29 |
+
Yarn-Solar-10b-64k,Q4_K_M,10.732,6.461,2.905,1.503,llama_cpp,38.8
|
30 |
+
SOLAR-10.7B-v1.0,Q4_K_M,10.732,6.461,2.925,1.505,llama_cpp,39.4
|
31 |
+
SOLAR-10.7B-Instruct-v1.0,Q4_K_M,10.732,6.461,2.916,1.506,llama_cpp,40.4
|
32 |
+
Yi-1.5-6B,Q8_0,6.061,6.441,5.269,1.584,llama_cpp,39.9
|
33 |
+
gemma-2-9b,Q4_0_4_4,10.159,6.19,10.553,1.757,llama_cpp,42.4
|
34 |
+
SOLAR-10.7B-v1.0,Q4_0_4_4,10.732,6.072,9.315,1.635,llama_cpp,39.5
|
35 |
+
SOLAR-10.7B-Instruct-v1.0,Q4_0_4_4,10.732,6.072,9.332,1.635,llama_cpp,40.7
|
36 |
+
Yarn-Solar-10b-64k,Q4_0_4_4,10.732,6.072,9.352,1.638,llama_cpp,39.4
|
37 |
+
aya-expanse-8b,Q4_K_M,9.077,5.906,4.406,1.911,llama_cpp,41.6
|
38 |
+
aya-23-8B,Q4_K_M,9.077,5.906,4.428,1.914,llama_cpp,37.8
|
39 |
+
aya-expanse-8b,Q4_0_4_4,9.077,5.647,14.074,2.05,llama_cpp,41.5
|
40 |
+
aya-23-8B,Q4_0_4_4,9.077,5.647,14.113,2.051,llama_cpp,38.3
|
41 |
+
Yi-1.5-9B,Q4_K_M,8.829,5.327,3.681,1.85,llama_cpp,41.2
|
42 |
+
Yi-1.5-9B,Q4_0_4_4,8.829,5.035,11.33,2.0,llama_cpp,40.5
|
43 |
+
Hermes-3-Llama-3.1-8B,Q4_K_M,8.03,4.913,4.375,2.078,llama_cpp,41.2
|
44 |
+
Llama-3.1-8B,Q4_K_M,8.03,4.913,4.403,2.086,llama_cpp,40.5
|
45 |
+
internlm2_5-7b-chat,Q4_K_M,7.738,4.711,4.4,2.133,llama_cpp,41.3
|
46 |
+
Qwen2.5-7B,Q4_K_M,7.616,4.677,4.769,2.201,llama_cpp,40.2
|
47 |
+
dolphin-2.9.2-qwen2-7b,Q4_K_M,7.616,4.677,4.759,2.204,llama_cpp,38.1
|
48 |
+
Llama-3.1-8B,Q4_0_4_4,8.03,4.653,13.99,2.245,llama_cpp,39.7
|
49 |
+
Hermes-3-Llama-3.1-8B,Q4_0_4_4,8.03,4.653,14.006,2.245,llama_cpp,40.7
|
50 |
+
internlm2_5-7b-chat,Q4_0_4_4,7.738,4.451,14.036,2.31,llama_cpp,41.7
|
51 |
+
mpt-7b-instruct,Q4_K_M,6.856,4.442,4.162,2.213,llama_cpp,35.3
|
52 |
+
Qwen2.5-7B,Q4_0_4_4,7.616,4.425,15.563,2.386,llama_cpp,40.1
|
53 |
+
dolphin-2.9.2-qwen2-7b,Q4_0_4_4,7.616,4.425,15.58,2.387,llama_cpp,37.7
|
54 |
+
dolphin-2.9.3-mistral-7B-32k,Q4_K_M,7.248,4.372,4.387,2.227,llama_cpp,39.7
|
55 |
+
Mistral-7B-Instruct-v0.3,Q4_K_M,7.248,4.372,4.462,2.241,llama_cpp,42.9
|
56 |
+
Starling-LM-7B-beta,Q4_K_M,7.242,4.368,4.406,2.234,llama_cpp,41.0
|
57 |
+
Yarn-Mistral-7b-128k,Q4_K_M,7.242,4.368,4.434,2.245,llama_cpp,40.1
|
58 |
+
OLMoE-1B-7B-0924,Q4_K_M,6.919,4.212,26.902,12.119,llama_cpp,38.3
|
59 |
+
OLMo-7B-0724-hf,Q4_K_M,6.888,4.183,4.706,2.339,llama_cpp,36.2
|
60 |
+
dolphin-2.9.3-mistral-7B-32k,Q4_0_4_4,7.248,4.113,14.053,2.427,llama_cpp,40.3
|
61 |
+
Mistral-7B-Instruct-v0.3,Q4_0_4_4,7.248,4.113,14.177,2.43,llama_cpp,42.9
|
62 |
+
Starling-LM-7B-beta,Q4_0_4_4,7.242,4.108,14.068,2.427,llama_cpp,41.3
|
63 |
+
Yarn-Mistral-7b-128k,Q4_0_4_4,7.242,4.108,14.139,2.436,llama_cpp,40.3
|
64 |
+
Amber,Q4_K_M,6.738,4.08,4.594,2.351,llama_cpp,32.7
|
65 |
+
Phi-3.5-mini-instruct,Q8_0,3.821,4.06,7.951,2.423,llama_cpp,41.8
|
66 |
+
Phi-3-mini-128k-instruct,Q8_0,3.821,4.06,7.947,2.426,llama_cpp,41.4
|
67 |
+
mpt-7b-instruct,Q4_0_4_4,6.856,3.964,14.569,2.533,llama_cpp,34.9
|
68 |
+
OLMoE-1B-7B-0924,Q4_0_4_4,6.919,3.926,50.413,12.989,llama_cpp,38.0
|
69 |
+
Llama-3.2-3B,Q8_0,3.607,3.833,10.31,2.83,llama_cpp,37.5
|
70 |
+
Amber,Q4_0_4_4,6.738,3.825,14.442,2.57,llama_cpp,33.1
|
71 |
+
Yi-1.5-6B,Q4_K_M,6.061,3.672,5.58,2.72,llama_cpp,39.3
|
72 |
+
Qwen2.5-3B,Q8_0,3.397,3.61,10.473,2.939,llama_cpp,38.6
|
73 |
+
Yi-1.5-6B,Q4_0_4_4,6.061,3.478,17.017,2.945,llama_cpp,39.2
|
74 |
+
dolphin-2.9.4-gemma2-2b,Q8_0,3.204,3.405,13.966,3.381,llama_cpp,37.2
|
75 |
+
gemma-2-2b,Q8_0,3.204,3.405,13.996,3.385,llama_cpp,37.3
|
76 |
+
stable-code-instruct-3b,Q8_0,2.795,2.971,10.668,3.316,llama_cpp,29.9
|
77 |
+
Phi-3.5-mini-instruct,Q4_K_M,3.821,2.393,7.502,3.936,llama_cpp,41.1
|
78 |
+
Phi-3-mini-128k-instruct,Q4_K_M,3.821,2.393,7.519,3.938,llama_cpp,40.9
|
79 |
+
Llama-3.2-3B,Q4_K_M,3.607,2.335,10.691,4.674,llama_cpp,37.3
|
80 |
+
Llama-3.2-3B,Q4_0_4_4,3.607,2.233,31.72,5.025,llama_cpp,36.8
|
81 |
+
gemma-2-2b,Q4_K_M,3.204,2.186,14.202,5.253,llama_cpp,37.0
|
82 |
+
dolphin-2.9.4-gemma2-2b,Q4_K_M,3.204,2.186,14.218,5.253,llama_cpp,37.3
|
83 |
+
Qwen2.5-3B,Q4_K_M,3.397,2.179,10.638,4.808,llama_cpp,38.2
|
84 |
+
Phi-3.5-mini-instruct,Q4_0_4_4,3.821,2.175,23.369,4.428,llama_cpp,41.4
|
85 |
+
Phi-3-mini-128k-instruct,Q4_0_4_4,3.821,2.175,23.461,4.436,llama_cpp,40.6
|
86 |
+
gemma-2-2b,Q4_0_4_4,3.204,2.107,40.616,5.552,llama_cpp,37.4
|
87 |
+
dolphin-2.9.4-gemma2-2b,Q4_0_4_4,3.204,2.107,40.977,5.58,llama_cpp,37.1
|
88 |
+
Qwen2.5-3B,Q4_0_4_4,3.397,2.072,32.434,5.239,llama_cpp,37.9
|
89 |
+
internlm2_5-1_8b-chat,Q8_0,1.889,2.007,19.329,5.279,llama_cpp,34.0
|
90 |
+
SmolLM2-1.7B-Instruct,Q8_0,1.812,1.926,17.524,5.177,llama_cpp,36.0
|
91 |
+
Qwen2.5-1.5B,Q8_0,1.777,1.889,21.927,5.793,llama_cpp,35.8
|
92 |
stable-code-instruct-3b,Q4_K_M,2.795,1.707,10.803,5.564,llama_cpp,29.8
|
93 |
+
stable-code-instruct-3b,Q4_0_4_4,2.795,1.607,28.926,5.957,llama_cpp,29.8
|
94 |
+
Llama-3.2-1B,Q8_0,1.498,1.592,29.722,7.295,llama_cpp,34.0
|
95 |
+
Yi-Coder-1.5B,Q8_0,1.476,1.569,23.894,6.596,llama_cpp,29.3
|
96 |
+
OLMo-1B-0724-hf,Q8_0,1.28,1.36,27.787,7.591,llama_cpp,31.7
|
97 |
+
Qwen2.5-1.5B,Q4_K_M,1.777,1.172,22.326,9.56,llama_cpp,35.8
|
98 |
+
internlm2_5-1_8b-chat,Q4_K_M,1.889,1.17,19.453,8.56,llama_cpp,33.7
|
99 |
+
TinyLlama-1.1B-Chat-v1.0,Q8_0,1.1,1.169,28.472,8.637,llama_cpp,30.9
|
100 |
+
TinyLlama_v1.1,Q8_0,1.1,1.169,28.538,8.652,llama_cpp,28.2
|
101 |
+
SmolLM2-1.7B-Instruct,Q4_K_M,1.812,1.136,17.72,8.497,llama_cpp,35.4
|
102 |
+
Qwen2.5-1.5B,Q4_0_4_4,1.777,1.12,65.915,10.128,llama_cpp,35.1
|
103 |
+
internlm2_5-1_8b-chat,Q4_0_4_4,1.889,1.112,57.736,9.243,llama_cpp,32.2
|
104 |
+
SmolLM2-1.7B-Instruct,Q4_0_4_4,1.812,1.072,50.27,9.239,llama_cpp,35.1
|
105 |
+
Llama-3.2-1B,Q4_K_M,1.498,1.015,30.451,11.51,llama_cpp,33.5
|
106 |
+
Llama-3.2-1B,Q4_0_4_4,1.498,0.979,86.772,12.364,llama_cpp,33.4
|
107 |
+
Yi-Coder-1.5B,Q4_K_M,1.476,0.962,23.267,10.03,llama_cpp,29.4
|
108 |
+
Yi-Coder-1.5B,Q4_0_4_4,1.476,0.865,67.713,11.422,llama_cpp,29.2
|
109 |
+
OLMo-1B-0724-hf,Q4_K_M,1.28,0.79,28.276,12.321,llama_cpp,31.9
|
110 |
+
OLMo-1B-0724-hf,Q4_0_4_4,1.28,0.746,84.882,13.339,llama_cpp,31.5
|
111 |
+
Qwen2.5-0.5B,Q8_0,0.63,0.67,75.456,18.06,llama_cpp,31.9
|
112 |
+
TinyLlama-1.1B-Chat-v1.0,Q4_K_M,1.1,0.667,29.44,14.305,llama_cpp,30.7
|
113 |
+
TinyLlama_v1.1,Q4_K_M,1.1,0.667,29.397,14.346,llama_cpp,28.0
|
114 |
+
TinyLlama-1.1B-Chat-v1.0,Q4_0_4_4,1.1,0.636,77.823,15.509,llama_cpp,30.9
|
115 |
+
TinyLlama_v1.1,Q4_0_4_4,1.1,0.636,77.943,15.543,llama_cpp,28.3
|
116 |
+
Qwen2.5-0.5B,Q4_K_M,0.63,0.537,52.916,22.324,llama_cpp,31.4
|
117 |
+
Qwen2.5-0.5B,Q4_0_4_4,0.63,0.491,189.874,26.738,llama_cpp,31.3
|
118 |
+
gpt2-medium,Q8_0,0.406,0.436,83.423,23.016,llama_cpp,29.0
|
119 |
+
SmolLM2-360M-Instruct,Q8_0,0.409,0.435,79.518,22.857,llama_cpp,32.3
|
120 |
+
SmolLM2-360M-Instruct,Q4_K_M,0.409,0.319,55.774,30.718,llama_cpp,31.9
|
121 |
+
SmolLM2-360M-Instruct,Q4_0_4_4,0.409,0.277,173.275,37.176,llama_cpp,32.0
|
122 |
+
gpt2-medium,Q4_K_M,0.406,0.269,73.615,33.913,llama_cpp,28.8
|
123 |
+
gpt2-medium,Q4_0_4_4,0.406,0.247,178.73,37.89,llama_cpp,28.8
|
124 |
+
gpt2,Q8_0,0.163,0.176,302.932,68.191,llama_cpp,27.2
|
125 |
+
SmolLM2-135M-Instruct,Q8_0,0.163,0.173,212.146,57.992,llama_cpp,29.9
|
126 |
+
SmolLM2-135M-Instruct,Q4_K_M,0.163,0.134,153.439,73.272,llama_cpp,29.5
|
127 |
+
SmolLM2-135M-Instruct,Q4_0_4_4,0.163,0.12,381.667,86.735,llama_cpp,29.8
|
128 |
+
gpt2,Q4_K_M,0.163,0.111,269.906,92.707,llama_cpp,27.6
|
129 |
+
gpt2,Q4_0_4_4,0.163,0.105,582.32,101.509,llama_cpp,27.9
|
hardware.yaml
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
- machine: Raspberry Pi 5(8GB)
|
2 |
-
description: Cortex A76
|
3 |
hardware_provider: ARM
|
4 |
hardware_type: arm cortex a76
|
5 |
subsets:
|
|
|
1 |
- machine: Raspberry Pi 5(8GB)
|
|
|
2 |
hardware_provider: ARM
|
3 |
hardware_type: arm cortex a76
|
4 |
subsets:
|
src/assets.py
CHANGED
@@ -1,12 +1,33 @@
|
|
1 |
custom_css = """
|
2 |
.logo {
|
3 |
-
width:
|
4 |
height: auto;
|
5 |
-
margin: 0
|
6 |
-
max-width:
|
7 |
object-fit: contain;
|
8 |
overflow: visible !important;
|
|
|
|
|
9 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
.text {
|
11 |
font-size: 16px !important;
|
12 |
}
|
|
|
1 |
custom_css = """
|
2 |
.logo {
|
3 |
+
width: 200px;
|
4 |
height: auto;
|
5 |
+
margin: 0;
|
6 |
+
max-width: 220px;
|
7 |
object-fit: contain;
|
8 |
overflow: visible !important;
|
9 |
+
display: flex;
|
10 |
+
align-items: center;
|
11 |
}
|
12 |
+
|
13 |
+
.logo2 {
|
14 |
+
width: 100px;
|
15 |
+
height: auto;
|
16 |
+
margin: 0;
|
17 |
+
max-width: 120px;
|
18 |
+
object-fit: contain;
|
19 |
+
overflow: visible !important;
|
20 |
+
display: flex;
|
21 |
+
align-items: center;
|
22 |
+
}
|
23 |
+
|
24 |
+
.title {
|
25 |
+
display: flex;
|
26 |
+
align-items: center;
|
27 |
+
font-size: 24px !important;
|
28 |
+
margin-top:6px;
|
29 |
+
}
|
30 |
+
|
31 |
.text {
|
32 |
font-size: 16px !important;
|
33 |
}
|
src/content.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
LOGO = '<img src="https://nyunai.com/assets/images/logo.png">'
|
2 |
|
3 |
-
|
|
|
|
|
4 |
|
5 |
ABOUT = """
|
6 |
## π About
|
7 |
-
The Edge
|
8 |
Its aim is to benchmark the performance (throughput and memory)
|
9 |
of Large Language Models (LLMs) on Edge hardware - starting with a Raspberry Pi 5 (8GB) based on the ARM Cortex A76 CPU.
|
10 |
|
@@ -21,13 +23,26 @@ configuration for automated benchmarking:
|
|
21 |
All of our throughput benchmarks are ran by this single tool
|
22 |
[llama-bench](https://github.com/ggerganov/llama.cpp/tree/master/examples/llama-bench)
|
23 |
using the power of [llama.cpp](https://github.com/ggerganov/llama.cpp) to guarantee reproducibility and consistency.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
"""
|
25 |
|
26 |
|
27 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
28 |
CITATION_BUTTON = r"""@misc{edge-llm-leaderboard,
|
29 |
author = {},
|
30 |
-
title = {Edge
|
31 |
year = {2024},
|
32 |
publisher = {},
|
33 |
howpublished = "\url{https://huggingface.co/spaces/nyunai/edge-llm-leaderboard}",
|
|
|
1 |
LOGO = '<img src="https://nyunai.com/assets/images/logo.png">'
|
2 |
|
3 |
+
LOGO2 = '<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg">'
|
4 |
+
|
5 |
+
TITLE = """<h1 align="left" id="space-title"> Edge LLM Leaderboard </h1>"""
|
6 |
|
7 |
ABOUT = """
|
8 |
## π About
|
9 |
+
The Edge LLM Leaderboard is a leaderboard to gauge practical performance and quality of edge LLMs.
|
10 |
Its aim is to benchmark the performance (throughput and memory)
|
11 |
of Large Language Models (LLMs) on Edge hardware - starting with a Raspberry Pi 5 (8GB) based on the ARM Cortex A76 CPU.
|
12 |
|
|
|
23 |
All of our throughput benchmarks are ran by this single tool
|
24 |
[llama-bench](https://github.com/ggerganov/llama.cpp/tree/master/examples/llama-bench)
|
25 |
using the power of [llama.cpp](https://github.com/ggerganov/llama.cpp) to guarantee reproducibility and consistency.
|
26 |
+
|
27 |
+
## π Ranking Models
|
28 |
+
|
29 |
+
We use MMLU (zero-shot) via [llama-perplexity](https://github.com/ggerganov/llama.cpp/tree/master/examples/perplexity) for performance evaluation, focusing on key metrics relevant for edge applications:
|
30 |
+
|
31 |
+
1. **Prefill Latency (Time to First Token - TTFT):** Measures the time to generate the first token. Low TTFT ensures a smooth user experience, especially for real-time interactions in edge use cases.
|
32 |
+
|
33 |
+
2. **Decode Latency (Generation Speed):** Indicates the speed of generating subsequent tokens, critical for real-time tasks like transcription or extended dialogue sessions.
|
34 |
+
|
35 |
+
3. **Model Size:** Smaller models are better suited for edge devices with limited secondary storage compared to cloud or GPU systems, making efficient deployment possible.
|
36 |
+
|
37 |
+
These metrics collectively address the unique challenges of deploying LLMs on edge devices, balancing performance, responsiveness, and memory constraints.
|
38 |
+
|
39 |
"""
|
40 |
|
41 |
|
42 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
43 |
CITATION_BUTTON = r"""@misc{edge-llm-leaderboard,
|
44 |
author = {},
|
45 |
+
title = {Edge LLM Leaderboard},
|
46 |
year = {2024},
|
47 |
publisher = {},
|
48 |
howpublished = "\url{https://huggingface.co/spaces/nyunai/edge-llm-leaderboard}",
|
src/hardware.py
CHANGED
@@ -6,7 +6,6 @@ import yaml
|
|
6 |
class HardwareConfig:
|
7 |
def __init__(self, data: Dict[str, Any]):
|
8 |
self.machine: str = data["machine"]
|
9 |
-
self.description: str = data["description"]
|
10 |
self.hardware_provider: str = data["hardware_provider"]
|
11 |
self.hardware_type: str = data["hardware_type"]
|
12 |
self.subsets: List[str] = data["subsets"]
|
|
|
6 |
class HardwareConfig:
|
7 |
def __init__(self, data: Dict[str, Any]):
|
8 |
self.machine: str = data["machine"]
|
|
|
9 |
self.hardware_provider: str = data["hardware_provider"]
|
10 |
self.hardware_type: str = data["hardware_type"]
|
11 |
self.subsets: List[str] = data["subsets"]
|
src/llm_perf.py
CHANGED
@@ -68,7 +68,7 @@ def processed_llm_perf_df(llm_perf_df):
|
|
68 |
"Decode (tokens/s)": 3,
|
69 |
"Model Size (GB)": 3,
|
70 |
"#Params (B)": 3,
|
71 |
-
"MMLU Accuracy":
|
72 |
}
|
73 |
)
|
74 |
# sort by metric
|
|
|
68 |
"Decode (tokens/s)": 3,
|
69 |
"Model Size (GB)": 3,
|
70 |
"#Params (B)": 3,
|
71 |
+
"MMLU Accuracy": 1,
|
72 |
}
|
73 |
)
|
74 |
# sort by metric
|