Spaces:
Running
Running
Yotam-Perlitz
commited on
Commit
•
f7f5843
1
Parent(s):
e2be414
add example download option
Browse filesSigned-off-by: Yotam-Perlitz <y.perlitz@ibm.com>
- assets/mybench.csv +53 -0
assets/mybench.csv
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,score,scenario
|
2 |
+
claude_3_5_sonnet_20240620,61.16,mybench_average
|
3 |
+
gpt_4o_2024_05_13,54.96,mybench_average
|
4 |
+
gpt_4_turbo_2024_04_09,53,mybench_average
|
5 |
+
gpt_4_1106_preview,52.17,mybench_average
|
6 |
+
claude_3_opus_20240229,50.75,mybench_average
|
7 |
+
gpt_4_0125_preview,49.39,mybench_average
|
8 |
+
deepseek_coder_v2,46.79,mybench_average
|
9 |
+
gemini_1.5_pro_api_0514,44.35,mybench_average
|
10 |
+
gemma_2_27b_it,41.22,mybench_average
|
11 |
+
gemini_1.5_flash_api_0514,40.89,mybench_average
|
12 |
+
qwen2_72b_instruct,40.16,mybench_average
|
13 |
+
acm_rewrite_qwen2_72b_chat,39.6,mybench_average
|
14 |
+
mistral_large_2402,38.92,mybench_average
|
15 |
+
deepseek_chat_v2,38.39,mybench_average
|
16 |
+
claude_3_sonnet_20240229,38.08,mybench_average
|
17 |
+
meta_llama_3_70b_instruct,37.38,mybench_average
|
18 |
+
claude_3_haiku_20240307,35.32,mybench_average
|
19 |
+
mixtral_8x22b_instruct_v0.1,34.84,mybench_average
|
20 |
+
gpt_3.5_turbo_0125,34.43,mybench_average
|
21 |
+
gpt_3.5_turbo_1106,34.14,mybench_average
|
22 |
+
command_r_plus,32.86,mybench_average
|
23 |
+
mistral_small_2402,32.8,mybench_average
|
24 |
+
gemma_2_9b_it,31.57,mybench_average
|
25 |
+
phi_3_medium_4k_instruct,30.33,mybench_average
|
26 |
+
phi_3_medium_128k_instruct,29.64,mybench_average
|
27 |
+
deepseek_coder_v2_lite_instruct,29.15,mybench_average
|
28 |
+
qwen1.5_110b_chat,28.96,mybench_average
|
29 |
+
qwen1.5_72b_chat,28.89,mybench_average
|
30 |
+
command_r,27.23,mybench_average
|
31 |
+
phi_3_small_128k_instruct,27.19,mybench_average
|
32 |
+
meta_llama_3_8b_instruct,26.67,mybench_average
|
33 |
+
qwen2_7b_instruct,26.45,mybench_average
|
34 |
+
phi_3_small_8k_instruct,26.24,mybench_average
|
35 |
+
openhermes_2.5_mistral_7b,23.3,mybench_average
|
36 |
+
mixtral_8x7b_instruct_v0.1,22.5,mybench_average
|
37 |
+
mistral_7b_instruct_v0.2,19.33,mybench_average
|
38 |
+
phi_3_mini_4k_instruct,19.27,mybench_average
|
39 |
+
zephyr_7b_alpha,19.22,mybench_average
|
40 |
+
phi_3_mini_128k_instruct,18.04,mybench_average
|
41 |
+
zephyr_7b_beta,17.32,mybench_average
|
42 |
+
deepseek_v2_lite_chat,17.14,mybench_average
|
43 |
+
qwen1.5_7b_chat,16.5,mybench_average
|
44 |
+
starling_lm_7b_beta,16.44,mybench_average
|
45 |
+
vicuna_7b_v1.5_16k,13.71,mybench_average
|
46 |
+
vicuna_7b_v1.5,11.73,mybench_average
|
47 |
+
qwen1.5_4b_chat,11.13,mybench_average
|
48 |
+
llama_2_7b_chat,10.25,mybench_average
|
49 |
+
qwen2_1.5b_instruct,9.96,mybench_average
|
50 |
+
yi_6b_chat,8.79,mybench_average
|
51 |
+
qwen2_0.5b_instruct,6.78,mybench_average
|
52 |
+
qwen1.5_1.8b_chat,6.09,mybench_average
|
53 |
+
qwen1.5_0.5b_chat,5.26,mybench_average
|