Arnav Chavan commited on
Commit
abb9e2b
1 Parent(s): aa8b4d6

add llama3.2

Browse files
dataset/llm-perf-leaderboard-Raspberry Pi 5(8GB).csv CHANGED
@@ -2,6 +2,7 @@ Model,Quantization,Params (B),Model Size (GB),Prefill (tokens/s),Decode (tokens/
2
  gemma-2-9b,Q8_0,10.159,10.796,2.169,0.012,llama_cpp
3
  DeepSeek-V2-Lite,Q4_K_M,15.706,10.36,4.304,1.764,llama_cpp
4
  aya-expanse-8b,Q8_0,9.077,9.644,3.1,0.027,llama_cpp
 
5
  Yi-1.5-9B,Q8_0,8.829,9.382,2.585,0.019,llama_cpp
6
  Qwen2.5-14B,Q4_K_M,14.77,8.982,1.916,0.018,llama_cpp
7
  DeepSeek-V2-Lite,Q4_0_4_4,15.706,8.901,7.788,3.867,llama_cpp
@@ -65,6 +66,7 @@ Phi-3.5-mini-instruct,Q8_0,3.821,4.06,7.951,2.423,llama_cpp
65
  Phi-3-mini-128k-instruct,Q8_0,3.821,4.06,7.947,2.426,llama_cpp
66
  mpt-7b-instruct,Q4_0_4_4,6.856,3.964,14.569,2.533,llama_cpp
67
  OLMoE-1B-7B-0924,Q4_0_4_4,6.919,3.926,50.413,12.989,llama_cpp
 
68
  Amber,Q4_0_4_4,6.738,3.825,14.442,2.57,llama_cpp
69
  Yi-1.5-6B,Q4_K_M,6.061,3.672,5.58,2.72,llama_cpp
70
  Qwen2.5-3B,Q8_0,3.397,3.61,10.473,2.939,llama_cpp
@@ -89,6 +91,7 @@ SmolLM2-1.7B-Instruct,Q8_0,1.812,1.926,17.524,5.177,llama_cpp
89
  Qwen2.5-1.5B,Q8_0,1.777,1.889,21.927,5.793,llama_cpp
90
  stable-code-instruct-3b,Q4_K_M,2.795,1.707,10.803,5.564,llama_cpp
91
  stable-code-instruct-3b,Q4_0_4_4,2.795,1.607,28.926,5.957,llama_cpp
 
92
  Yi-Coder-1.5B,Q8_0,1.476,1.569,23.894,6.596,llama_cpp
93
  OLMo-1B-0724-hf,Q8_0,1.28,1.36,27.787,7.591,llama_cpp
94
  Qwen2.5-1.5B,Q4_K_M,1.777,1.172,22.326,9.56,llama_cpp
 
2
  gemma-2-9b,Q8_0,10.159,10.796,2.169,0.012,llama_cpp
3
  DeepSeek-V2-Lite,Q4_K_M,15.706,10.36,4.304,1.764,llama_cpp
4
  aya-expanse-8b,Q8_0,9.077,9.644,3.1,0.027,llama_cpp
5
+ aya-23-8b,Q8_0,9.077,9.644,3.174,0.027,llama_cpp
6
  Yi-1.5-9B,Q8_0,8.829,9.382,2.585,0.019,llama_cpp
7
  Qwen2.5-14B,Q4_K_M,14.77,8.982,1.916,0.018,llama_cpp
8
  DeepSeek-V2-Lite,Q4_0_4_4,15.706,8.901,7.788,3.867,llama_cpp
 
66
  Phi-3-mini-128k-instruct,Q8_0,3.821,4.06,7.947,2.426,llama_cpp
67
  mpt-7b-instruct,Q4_0_4_4,6.856,3.964,14.569,2.533,llama_cpp
68
  OLMoE-1B-7B-0924,Q4_0_4_4,6.919,3.926,50.413,12.989,llama_cpp
69
+ Llama-3.2-3B,Q8_0,3.607,3.833,10.31,2.83,llama_cpp
70
  Amber,Q4_0_4_4,6.738,3.825,14.442,2.57,llama_cpp
71
  Yi-1.5-6B,Q4_K_M,6.061,3.672,5.58,2.72,llama_cpp
72
  Qwen2.5-3B,Q8_0,3.397,3.61,10.473,2.939,llama_cpp
 
91
  Qwen2.5-1.5B,Q8_0,1.777,1.889,21.927,5.793,llama_cpp
92
  stable-code-instruct-3b,Q4_K_M,2.795,1.707,10.803,5.564,llama_cpp
93
  stable-code-instruct-3b,Q4_0_4_4,2.795,1.607,28.926,5.957,llama_cpp
94
+ Llama-3.2-1B,Q8_0,1.498,1.592,29.722,7.295,llama_cpp
95
  Yi-Coder-1.5B,Q8_0,1.476,1.569,23.894,6.596,llama_cpp
96
  OLMo-1B-0724-hf,Q8_0,1.28,1.36,27.787,7.591,llama_cpp
97
  Qwen2.5-1.5B,Q4_K_M,1.777,1.172,22.326,9.56,llama_cpp