scottsuk0306 commited on
Commit
0c155df
·
1 Parent(s): b1b6ed6
data/bgb-leaderboard-gpt-4-turbo-2024-04-09.csv ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Grounding ⚡️,Instruction Following 📝,Planning 📅,Reasoning 💡,Refinement 🔩,Safety ⚠️,Theory of Mind 🤔,Tool Usage 🛠️,Multilingual 🇬🇫,Model 🤗,Model Params (B),Model Type,Average
2
+ 4.288,4.23,4.271,4.22,4.171,4.565,4.24,3.775,3.6,gpt-4-1106-preview,Proprietary,Proprietary,4.151
3
+ 4.3,4.2,4.357,4.16,4.145,4.174,4.26,3.925,3.543,gpt-4-0125-preview,Proprietary,Proprietary,4.118
4
+ 4.238,4.26,4.357,4.21,4.079,4.058,4.08,3.85,3.643,gpt-4o-2024-05-13,Proprietary,Proprietary,4.086
5
+ 4.312,4.13,4.3,4.2,4.105,4.087,4.12,3.8,3.471,gpt-4-turbo-2024-04-09,Proprietary,Proprietary,4.058
6
+ 4.288,4.06,4.186,3.97,3.908,4.536,4.09,3.788,3.571,claude-3-opus-20240229,Proprietary,Proprietary,4.044
7
+ 4.125,4.18,4.186,3.87,3.907,4.014,4.04,3.775,3.314,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.935
8
+ 4.25,3.92,4.171,3.91,3.724,4.362,4.0,3.75,3.186,claude-3-sonnet-20240229,Proprietary,Proprietary,3.919
9
+ 4.05,4.04,4.129,4.06,3.671,4.116,4.07,3.488,3.257,gemini-pro-1.5,Proprietary,Proprietary,3.876
10
+ 4.138,4.01,4.129,3.69,3.632,4.304,3.98,3.75,3.071,claude-3-haiku-20240307,Proprietary,Proprietary,3.856
11
+ 4.15,4.01,4.229,3.94,3.882,4.043,3.99,3.588,2.771,qwen/qwen-110b-chat,110.0,Chat,3.845
12
+ 3.962,3.94,4.029,3.95,3.776,4.058,3.9,3.862,2.929,mistral-medium,Proprietary,Proprietary,3.823
13
+ 4.025,3.99,4.029,3.93,3.776,3.913,3.93,3.825,2.886,mistral-large,Proprietary,Proprietary,3.812
14
+ 4.012,4.0,4.0,3.96,3.842,4.087,3.87,3.712,2.714,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,AWQ,Chat,3.8
15
+ 4.138,3.91,3.971,3.92,3.453,4.217,3.96,3.625,2.671,google/gemini-flash-1.5,Proprietary,Proprietary,3.763
16
+ 3.888,3.99,4.029,3.68,3.632,3.957,3.96,3.525,2.914,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.73
17
+ 3.988,4.0,4.186,3.64,3.461,3.971,3.94,3.525,2.757,alpindale/c4ai-command-r-plus-GPTQ,GPTQ,Chat,3.719
18
+ 3.788,3.85,4.029,3.62,3.395,4.217,3.87,3.738,2.714,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.691
19
+ 4.125,3.94,3.929,3.47,3.507,3.725,3.83,3.5,2.914,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.66
20
+ 3.725,3.88,3.8,3.81,3.974,4.145,3.9,3.338,1.914,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.609
21
+ 3.688,3.7,3.743,3.5,3.539,4.0,3.49,3.188,,mistral-community/Mixtral-8x22B-v0.1-AWQ,AWQ,Base,3.606
22
+ 3.812,4.06,3.957,3.53,3.342,3.739,3.79,3.662,2.557,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.606
23
+ 3.8,3.84,4.0,3.56,3.547,3.87,3.87,3.562,2.271,Starling-LM-7B-beta,7.0,Chat,3.591
24
+ 3.6,3.84,3.871,3.62,3.373,3.942,3.75,3.125,3.186,gemini-1.0-pro,Proprietary,Proprietary,3.59
25
+ 3.9,3.88,3.6,3.71,3.434,3.812,3.81,3.412,2.714,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.586
26
+ 3.925,3.85,3.843,3.65,3.434,3.884,3.79,3.138,2.614,gpt-3.5-turbo-0125,Proprietary,Proprietary,3.57
27
+ 4.025,3.79,3.829,3.51,3.434,4.0,3.67,3.162,2.557,gpt-3.5-turbo-1106,Proprietary,Proprietary,3.553
28
+ 3.812,3.77,3.857,3.42,3.382,3.826,3.9,3.412,2.443,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.536
29
+ 3.738,3.83,3.914,3.57,3.676,3.884,3.96,3.038,2.186,01-ai/Yi-34B-Chat,34.0,Chat,3.533
30
+ 3.7,3.89,3.9,3.36,3.421,3.754,3.83,3.612,2.314,allenai/tulu-2-dpo-70b,70.0,Chat,3.531
31
+ 3.662,3.88,3.929,3.22,3.36,4.377,3.73,3.188,2.386,meta-llama/Llama-2-70b-chat-hf,70.0,Chat,3.526
32
+ 3.812,3.88,3.9,3.39,3.447,3.899,3.9,3.188,2.186,CohereForAI/c4ai-command-r-v01,35.0,Chat,3.511
33
+ 3.712,3.8,3.7,3.82,3.513,3.957,3.83,3.1,1.829,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.473
34
+ 3.7,3.87,3.8,3.18,3.447,3.826,3.77,3.362,2.286,mistralai/Mistral-7B-Instruct-v0.2,7.0,Chat,3.471
35
+ 3.55,3.62,3.957,3.52,3.618,3.449,3.58,3.288,2.586,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,AWQ,Chat,3.463
36
+ 3.65,3.78,3.714,3.39,3.461,3.609,3.63,3.538,2.4,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.463
37
+ 3.712,3.58,3.5,3.3,3.237,3.87,3.59,2.775,,mistralai/Mixtral-8x7B-v0.1,46.7,Base,3.445
38
+ 3.625,3.9,3.857,3.36,3.263,3.855,3.52,3.2,2.386,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.441
39
+ 3.638,3.84,3.757,3.34,3.566,3.725,3.66,3.125,2.157,openchat/openchat-3.5-0106,7.0,Chat,3.423
40
+ 3.488,3.6,3.5,3.25,3.227,3.942,3.38,2.988,,Qwen/Qwen1.5-72B,72.0,Base,3.422
41
+ 3.712,3.72,3.829,3.33,3.224,3.913,3.54,3.025,2.229,Starling-LM-7B-alpha,7.0,Chat,3.391
42
+ 3.588,3.88,3.714,3.3,3.395,3.725,3.7,3.15,2.057,Qwen/Qwen1.5-7B-Chat,7.0,Chat,3.39
43
+ 3.662,3.74,3.8,3.26,3.355,3.377,3.69,3.062,2.171,NousResearch/Nous-Hermes-2-Mistral-7B-DPO,7.0,Chat,3.347
44
+ 3.55,3.72,3.729,3.23,3.382,3.551,3.73,3.288,1.943,HuggingFaceH4/zephyr-7b-beta,7.0,Chat,3.347
45
+ 3.512,3.54,3.529,3.27,3.24,3.58,3.39,2.512,,01-ai/Yi-34B,34.0,Base,3.322
46
+ 3.338,3.65,3.643,3.53,3.373,3.536,3.56,3.175,2.071,NousResearch/Nous-Hermes-2-Yi-34B,34.0,Chat,3.32
47
+ 3.612,3.8,3.686,3.12,3.263,3.696,3.58,3.025,2.1,kaist-ai/mistral-orpo-beta,7.0,Chat,3.32
48
+ 3.425,3.56,3.386,3.06,3.133,3.87,3.48,2.625,,meta-llama/Llama-2-70b-hf,70.0,Base,3.317
49
+ 3.662,3.92,3.686,2.76,3.079,4.319,3.71,2.6,2.114,meta-llama/Llama-2-13b-chat-hf,13.0,Chat,3.317
50
+ 3.325,3.64,3.514,3.31,3.118,3.333,3.33,2.925,,Qwen/Qwen1.5-32B,32.0,Base,3.312
51
+ 3.688,3.66,3.729,3.28,3.276,3.435,3.57,3.062,2.1,teknium/OpenHermes-2.5-Mistral-7B,7.0,Chat,3.311
52
+ 3.525,3.7,3.6,3.11,3.171,3.971,3.5,2.95,2.086,kaist-ai/mistral-orpo-alpha,7.0,Chat,3.29
53
+ 3.45,3.77,3.6,2.9,3.184,3.841,3.59,3.05,2.143,allenai/tulu-2-dpo-13b,13.0,Chat,3.281
54
+ 3.45,3.51,3.686,3.01,3.211,3.652,3.5,3.35,2.0,allenai/codetulu-2-34b,34.0,Chat,3.263
55
+ 3.588,3.53,3.371,3.25,3.25,4.043,3.44,2.788,2.0,google/gemma-1.1-7b-it,7.0,Chat,3.251
56
+ 3.25,3.56,3.371,2.96,3.197,3.667,3.42,2.562,,upstage/SOLAR-10.7B-v1.0,10.7,Base,3.248
57
+ 3.525,3.66,3.8,3.28,3.28,3.232,3.45,2.925,1.914,teknium/OpenHermes-2-Mistral-7B,7.0,Chat,3.23
58
+ 3.5,3.5,3.457,3.04,3.079,4.13,3.46,2.738,2.114,codellama/CodeLlama-34b-Instruct-hf,34.0,Chat,3.224
59
+ 3.388,3.58,3.586,2.85,2.961,4.145,3.65,2.3,2.029,meta-llama/Llama-2-7b-chat-hf,7.0,Chat,3.165
60
+ 3.238,3.76,3.5,2.79,3.079,3.754,3.68,2.438,1.971,allenai/tulu-2-dpo-7b,7.0,Chat,3.134
61
+ 3.35,3.33,3.114,3.04,3.342,3.261,3.04,2.5,,meta-llama/Meta-Llama-3-70B,70.0,Base,3.122
62
+ 3.538,3.41,3.157,3.0,3.092,2.58,3.16,2.912,,Qwen/Qwen1.5-14B,14.0,Base,3.106
63
+ 3.225,3.5,3.4,2.8,3.197,3.29,3.38,3.238,1.886,allenai/codetulu-2-13b,13.0,Chat,3.102
64
+ 3.15,3.38,3.4,2.8,3.027,3.768,3.39,2.775,2.029,allenai/tulu-2-13b,13.0,Chat,3.08
65
+ 3.262,3.34,3.357,2.77,2.895,4.043,3.38,2.6,1.886,codellama/CodeLlama-13b-Instruct-hf,13.0,Chat,3.059
66
+ 3.15,3.33,3.1,2.78,2.892,3.377,3.29,2.275,,mistral-community/Mistral-7B-v0.2,7.0,Base,3.024
67
+ 3.275,3.52,3.414,2.85,3.08,3.478,3.677,2.338,1.457,01-ai/Yi-6B-Chat,6.0,Chat,3.01
68
+ 3.225,3.3,3.243,2.86,2.763,3.406,3.09,2.162,,mistralai/Mistral-7B-v0.1,7.0,Base,3.006
69
+ 3.212,3.36,3.286,2.75,2.961,3.754,3.22,2.575,1.771,codellama/CodeLlama-7b-Instruct-hf,7.0,Chat,2.988
70
+ 3.312,3.43,3.071,2.97,3.026,3.768,3.15,2.325,1.786,google/gemma-7b-it,7.0,Chat,2.982
71
+ 3.112,3.41,3.114,2.73,2.908,3.246,3.25,2.788,1.8,allenai/codetulu-2-7b,7.0,Chat,2.929
72
+ 2.9,3.34,3.229,2.74,3.053,3.971,3.37,1.975,1.471,google/gemma-1.1-2b-it,2.0,Chat,2.894
73
+ 2.862,3.34,3.229,2.81,2.974,3.638,3.26,2.212,1.714,allenai/tulu-2-7b,7.0,Chat,2.893
74
+ 2.988,3.14,3.014,2.65,2.827,3.101,2.77,2.488,,Qwen/Qwen1.5-7B,7.0,Base,2.872
75
+ 3.138,2.92,2.857,2.8,2.763,3.406,3.2,1.788,,microsoft/phi-2,2.7,Base,2.859
76
+ 2.9,3.19,3.086,2.83,3.0,3.333,3.07,2.4,1.471,Qwen/Qwen1.5-4B-Chat,4.0,Chat,2.809
77
+ 3.112,3.54,3.271,2.47,2.776,3.101,3.31,2.212,1.414,allenai/OLMo-7B-Instruct,7.0,Chat,2.801
78
+ 2.875,3.24,3.114,2.48,2.882,3.754,3.15,1.962,1.657,google/gemma-2b-it,2.0,Chat,2.79
79
+ 2.988,2.97,2.743,2.75,2.816,2.971,2.84,2.088,,EleutherAI/llemma_34b,34.0,Base,2.771
80
+ 3.262,2.94,2.657,2.39,3.039,2.899,2.82,1.938,,meta-llama/Meta-Llama-3-8B,8.0,Base,2.743
81
+ 2.888,2.94,2.729,2.45,2.697,3.333,2.73,1.9,,Qwen/Qwen1.5-4B,4.0,Base,2.708
82
+ 2.85,2.7,2.671,2.83,2.747,4.101,2.55,1.988,1.929,codellama/CodeLlama-70b-Instruct-hf,70.0,Chat,2.707
83
+ 2.85,3.09,2.786,2.28,2.579,3.348,2.88,1.812,,meta-llama/Llama-2-13b-hf,13.0,Base,2.703
84
+ 2.95,3.27,2.957,2.4,2.684,3.333,2.93,2.088,1.186,allenai/OLMo-7B-SFT,7.0,Chat,2.644
85
+ 2.938,2.97,2.657,2.36,2.487,3.232,2.89,1.55,,01-ai/Yi-6B,6.0,Base,2.635
86
+ 2.938,2.62,2.557,2.44,2.507,2.841,2.44,2.4,,codellama/CodeLlama-70b-hf,70.0,Base,2.593
87
+ 2.812,3.27,2.914,2.28,2.855,2.681,3.13,1.988,1.3,Qwen/Qwen1.5-1.8B-Chat,1.8,Chat,2.581
88
+ 2.812,2.66,2.486,2.17,2.566,2.725,2.59,2.062,,codellama/CodeLlama-34b-hf,34.0,Base,2.509
89
+ 2.475,2.89,2.5,2.24,2.526,2.87,2.95,1.525,,microsoft/phi-1_5,1.3,Base,2.497
90
+ 2.612,2.87,2.514,2.18,2.211,3.217,2.6,1.45,,meta-llama/Llama-2-7b-hf,7.0,Base,2.457
91
+ 2.938,2.49,1.786,2.24,2.487,2.812,2.8,2.362,2.043,microsoft/Orca-2-13b,13.0,Chat,2.44
92
+ 2.538,2.85,2.386,1.98,2.605,2.478,2.55,1.525,,Qwen/Qwen1.5-1.8B,1.8,Base,2.364
93
+ 2.412,2.57,2.086,2.24,2.303,2.522,2.19,1.838,,EleutherAI/llemma_7b,7.0,Base,2.27
94
+ 2.338,2.72,2.357,2.16,2.093,2.623,2.32,1.488,,google/gemma-2b,2.0,Base,2.262
95
+ 2.3,2.3,1.957,2.01,2.092,2.449,2.15,1.812,,codellama/CodeLlama-13b-hf,13.0,Base,2.134
96
+ 2.388,2.26,1.929,1.84,2.105,2.652,2.16,1.312,,allenai/OLMo-7B,7.0,Base,2.081
97
+ 2.425,2.27,1.371,1.85,2.316,2.594,2.24,1.6,1.729,microsoft/Orca-2-7b,7.0,Chat,2.044
98
+ 2.2,2.61,2.057,1.76,2.0,2.391,2.38,1.462,1.159,Qwen/Qwen1.5-0.5B-Chat,0.5,Chat,2.002
99
+ 1.962,2.25,1.771,1.72,2.118,2.348,1.9,1.562,,codellama/CodeLlama-7b-hf,7.0,Base,1.954
100
+ 2.025,2.12,1.7,1.58,2.158,2.014,1.8,1.275,,Qwen/Qwen1.5-0.5B,0.5,Base,1.834
101
+ 1.762,1.8,1.443,1.33,1.947,2.188,1.59,1.125,,allenai/OLMo-1B,1.0,Base,1.648
102
+ 1.288,1.45,1.471,1.25,1.908,1.667,1.38,1.162,1.129,CohereForAI/aya-101,13.0,Chat,1.412
103
+ 1.325,1.49,1.186,1.34,1.579,2.159,1.2,1.012,,google/gemma-7b,7.0,Base,1.411
104
+ 1.112,1.01,1.0,1.0,1.434,1.507,1.0,1.012,,microsoft/phi-1,1.3,Base,1.135
data/bgb-leaderboard-gpt-4-turbo-2024-04-09.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c98ef5b56cdbe53a5547698e1423354abdd0b04544db9efca43dc525c7b3abd8
3
+ size 13924
data/bgb-leaderboard-prometheus-bgb-8x7b-v2.0.csv ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Grounding ⚡️,Instruction Following 📝,Planning 📅,Reasoning 💡,Refinement 🔩,Safety ⚠️,Theory of Mind 🤔,Tool Usage 🛠️,Multilingual 🇬🇫,Model 🤗,Model Params (B),Model Type,Average
2
+ 4.012,4.21,4.029,4.01,4.034,4.449,4.09,3.6,3.429,gpt-4-1106-preview,Proprietary,Proprietary,3.985
3
+ 4.175,4.14,4.1,3.98,3.789,4.235,4.06,3.788,3.414,gpt-4o-2024-05-13,Proprietary,Proprietary,3.965
4
+ 4.112,4.13,3.929,4.15,4.0,4.145,4.15,3.725,3.329,gpt-4-0125-preview,Proprietary,Proprietary,3.963
5
+ 4.112,4.09,3.986,3.92,3.862,4.116,4.06,3.688,3.357,gpt-4-turbo-2024-04-09,Proprietary,Proprietary,3.91
6
+ 4.075,3.88,4.157,3.8,3.741,4.435,4.05,3.425,3.357,claude-3-opus-20240229,Proprietary,Proprietary,3.88
7
+ 4.175,3.92,3.971,3.76,3.741,4.029,3.97,3.625,3.114,meta-llama/Meta-Llama-3-70B-Instruct,70.0,Chat,3.812
8
+ 4.075,4.03,4.0,3.83,3.776,4.13,3.96,3.325,2.771,qwen/qwen-110b-chat,110.0,Chat,3.766
9
+ 3.862,3.83,3.943,3.84,3.69,4.29,3.86,3.5,3.043,claude-3-sonnet-20240229,Proprietary,Proprietary,3.762
10
+ 3.925,3.91,3.843,3.82,3.552,4.116,3.91,3.688,2.971,mistral-medium,Proprietary,Proprietary,3.748
11
+ 4.0,3.94,3.957,3.58,3.569,4.275,3.93,3.538,2.871,claude-3-haiku-20240307,Proprietary,Proprietary,3.74
12
+ 3.875,3.88,3.871,3.83,3.5,4.145,4.01,3.288,3.1,gemini-pro-1.5,Proprietary,Proprietary,3.722
13
+ 3.9,3.83,3.757,3.66,3.638,3.957,3.94,3.712,2.871,mistral-large,Proprietary,Proprietary,3.696
14
+ 4.05,3.81,3.743,3.81,3.31,4.145,3.97,3.45,2.729,google/gemini-flash-1.5,Proprietary,Proprietary,3.669
15
+ 3.925,4.02,3.857,3.46,3.517,3.928,3.91,3.425,2.829,alpindale/c4ai-command-r-plus-GPTQ,GPTQ,Chat,3.652
16
+ 3.812,3.96,3.771,3.6,3.379,4.043,3.84,3.45,2.757,MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-AWQ,AWQ,Chat,3.624
17
+ 3.712,3.92,3.771,3.53,3.586,4.101,3.92,3.425,2.629,Qwen/Qwen1.5-72B-Chat,72.0,Chat,3.622
18
+ 3.85,3.75,3.814,3.3,3.345,3.928,3.71,3.362,3.043,meta-llama/Meta-Llama-3-8B-Instruct,8.0,Chat,3.567
19
+ 3.775,3.86,3.8,3.44,3.534,3.986,3.91,3.325,2.429,Starling-LM-7B-beta,7.0,Chat,3.562
20
+ 3.65,3.85,3.643,3.55,3.121,4.246,3.8,3.488,2.671,Qwen/Qwen1.5-32B-Chat,32.0,Chat,3.558
21
+ 3.9,3.85,3.486,3.54,3.776,4.232,3.81,3.062,1.971,microsoft/Phi-3-mini-4k-instruct,3.8,Chat,3.514
22
+ 3.65,3.89,3.571,3.45,3.138,4.014,3.78,3.2,2.743,mistralai/Mixtral-8x7B-Instruct-v0.1,46.7,Chat,3.493
23
+ 3.8,3.86,3.757,3.43,3.259,3.957,3.64,2.988,2.586,gpt-3.5-turbo-0125,Proprietary,Proprietary,3.475
24
+ 3.812,3.75,3.714,3.41,3.241,4.087,3.65,3.0,2.586,gpt-3.5-turbo-1106,Proprietary,Proprietary,3.472
25
+ 3.562,3.65,3.629,3.48,3.069,3.884,3.74,3.062,2.986,gemini-1.0-pro,Proprietary,Proprietary,3.451
26
+ 3.638,3.8,3.8,3.17,3.155,3.826,3.7,3.5,2.4,allenai/tulu-2-dpo-70b,70.0,Chat,3.443
27
+ 3.7,3.8,3.586,3.21,3.034,3.826,3.7,3.488,2.586,upstage/SOLAR-10.7B-Instruct-v1.0,10.7,Chat,3.437
28
+ 3.662,3.84,3.671,3.24,3.155,3.783,3.71,3.338,2.529,NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,46.7,Chat,3.436
29
+ 3.525,3.59,3.5,3.44,3.207,3.942,3.37,2.762,,mistral-community/Mixtral-8x22B-v0.1-AWQ,AWQ,Base,3.417
30
+ 3.612,3.72,3.657,2.98,3.155,4.464,3.79,2.888,2.429,meta-llama/Llama-2-70b-chat-hf,70.0,Chat,3.411
31
+ 3.462,3.74,3.714,3.27,3.414,4.087,3.81,2.812,2.014,01-ai/Yi-34B-Chat,34.0,Chat,3.369
32
+ 3.588,3.77,3.614,3.26,3.121,3.884,3.5,3.062,2.486,Qwen/Qwen1.5-14B-Chat,14.0,Chat,3.365
33
+ 3.688,3.74,3.6,3.01,3.103,3.957,3.49,3.012,2.6,mistralai/Mistral-7B-Instruct-v0.2,7.0,Chat,3.356
34
+ 3.712,3.72,3.643,3.14,3.19,4.014,3.88,2.95,1.957,CohereForAI/c4ai-command-r-v01,35.0,Chat,3.356
35
+ 3.688,3.69,3.629,3.16,3.103,3.652,3.59,3.225,2.414,NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT,46.7,Chat,3.35
36
+ 3.588,3.66,3.471,3.66,3.345,3.942,3.7,2.912,1.814,microsoft/Phi-3-mini-128k-instruct,3.8,Chat,3.344
37
+ 3.525,3.76,3.514,3.26,3.31,3.841,3.61,2.888,2.314,openchat/openchat-3.5-0106,7.0,Chat,3.336
38
+ 3.288,3.62,3.686,3.25,3.345,3.551,3.45,3.062,2.543,MaziyarPanahi/zephyr-orpo-141b-A35b-v0.1-AWQ,AWQ,Chat,3.31
39
+ 3.712,3.74,3.5,3.2,2.948,3.942,3.53,2.838,2.129,Starling-LM-7B-alpha,7.0,Chat,3.282
40
+ 3.4,3.74,3.4,3.04,3.0,3.754,3.71,2.975,2.043,Qwen/Qwen1.5-7B-Chat,7.0,Chat,3.229
41
+ 3.588,3.7,3.343,2.71,2.862,4.319,3.66,2.512,2.343,meta-llama/Llama-2-13b-chat-hf,13.0,Chat,3.226
42
+ 3.55,3.45,3.186,3.14,2.759,3.812,3.33,2.538,,mistralai/Mixtral-8x7B-v0.1,46.7,Base,3.22
43
+ 3.462,3.66,3.429,2.97,2.931,3.899,3.54,2.812,2.129,kaist-ai/mistral-orpo-beta,7.0,Chat,3.204
44
+ 3.375,3.41,3.114,2.97,2.914,3.899,3.17,2.762,,Qwen/Qwen1.5-72B,72.0,Base,3.202
45
+ 3.438,3.58,3.629,3.05,3.172,3.319,3.46,2.925,2.214,NousResearch/Nous-Hermes-2-Mistral-7B-DPO,7.0,Chat,3.199
46
+ 3.388,3.56,3.443,2.86,3.103,4.029,3.45,2.825,2.114,kaist-ai/mistral-orpo-alpha,7.0,Chat,3.197
47
+ 3.575,3.53,3.557,3.07,3.172,3.304,3.42,2.875,2.243,teknium/OpenHermes-2.5-Mistral-7B,7.0,Chat,3.194
48
+ 3.488,3.56,3.314,3.12,3.052,4.072,3.44,2.675,2.029,google/gemma-1.1-7b-it,7.0,Chat,3.194
49
+ 3.2,3.63,3.557,3.24,3.207,3.609,3.55,2.85,1.9,NousResearch/Nous-Hermes-2-Yi-34B,34.0,Chat,3.194
50
+ 3.412,3.58,3.457,2.71,3.034,3.884,3.55,2.775,2.229,allenai/tulu-2-dpo-13b,13.0,Chat,3.181
51
+ 3.388,3.4,3.414,3.01,3.138,3.725,3.43,3.075,2.014,allenai/codetulu-2-34b,34.0,Chat,3.177
52
+ 3.375,3.56,3.5,3.0,2.897,3.522,3.5,3.05,1.957,HuggingFaceH4/zephyr-7b-beta,7.0,Chat,3.151
53
+ 3.488,3.37,3.186,3.05,2.879,3.681,3.21,2.162,,01-ai/Yi-34B,34.0,Base,3.128
54
+ 3.288,3.49,3.1,2.78,2.759,3.855,3.17,2.45,,meta-llama/Llama-2-70b-hf,70.0,Base,3.111
55
+ 3.125,3.52,3.143,2.99,2.81,3.536,3.07,2.638,,Qwen/Qwen1.5-32B,32.0,Base,3.104
56
+ 3.438,3.62,3.371,2.64,2.741,4.261,3.58,2.175,2.086,meta-llama/Llama-2-7b-chat-hf,7.0,Chat,3.101
57
+ 3.35,3.39,3.286,2.85,2.724,4.101,3.37,2.5,2.186,codellama/CodeLlama-34b-Instruct-hf,34.0,Chat,3.084
58
+ 3.25,3.55,3.643,2.89,2.845,3.493,3.32,2.638,1.971,teknium/OpenHermes-2-Mistral-7B,7.0,Chat,3.067
59
+ 3.088,3.37,3.114,2.75,2.759,3.565,3.25,2.225,,upstage/SOLAR-10.7B-v1.0,10.7,Base,3.015
60
+ 3.25,3.67,3.243,2.68,2.707,3.768,3.51,2.325,1.986,allenai/tulu-2-dpo-7b,7.0,Chat,3.015
61
+ 3.012,3.31,3.271,2.68,2.707,3.841,3.2,2.325,2.057,allenai/tulu-2-13b,13.0,Chat,2.934
62
+ 3.088,3.37,3.057,2.62,2.793,3.42,3.22,2.988,1.8,allenai/codetulu-2-13b,13.0,Chat,2.928
63
+ 3.388,3.3,2.914,2.72,2.862,2.623,3.06,2.55,,Qwen/Qwen1.5-14B,14.0,Base,2.927
64
+ 3.038,3.2,3.157,2.59,2.483,3.971,3.21,2.312,2.157,codellama/CodeLlama-13b-Instruct-hf,13.0,Chat,2.902
65
+ 3.25,3.22,2.786,2.76,2.69,3.261,2.92,2.312,,meta-llama/Meta-Llama-3-70B,70.0,Base,2.9
66
+ 3.15,3.34,2.814,2.91,2.828,3.652,3.17,2.2,1.657,google/gemma-7b-it,7.0,Chat,2.858
67
+ 3.138,3.18,3.029,2.58,2.586,3.826,3.19,2.212,1.7,codellama/CodeLlama-7b-Instruct-hf,7.0,Chat,2.827
68
+ 2.938,3.23,2.914,2.68,2.466,3.406,2.9,1.975,,mistralai/Mistral-7B-v0.1,7.0,Base,2.814
69
+ 2.912,3.29,3.029,2.55,2.707,4.13,3.25,1.675,1.657,google/gemma-1.1-2b-it,2.0,Chat,2.8
70
+ 3.0,3.45,3.129,2.49,2.603,3.507,3.56,1.888,1.529,01-ai/Yi-6B-Chat,6.0,Chat,2.795
71
+ 3.025,3.24,2.786,2.58,2.483,3.203,3.07,1.862,,mistral-community/Mistral-7B-v0.2,7.0,Base,2.781
72
+ 2.8,3.18,3.0,2.49,2.724,3.348,3.12,2.525,1.829,allenai/codetulu-2-7b,7.0,Chat,2.78
73
+ 2.85,3.21,3.1,2.56,2.517,3.681,3.12,2.0,1.729,allenai/tulu-2-7b,7.0,Chat,2.752
74
+ 2.962,2.75,2.714,2.69,2.569,3.435,2.98,1.65,,microsoft/phi-2,2.7,Base,2.719
75
+ 2.8,3.09,2.971,2.36,2.638,4.043,3.12,1.75,1.686,google/gemma-2b-it,2.0,Chat,2.718
76
+ 2.938,3.0,2.843,2.37,2.414,3.072,2.58,2.175,,Qwen/Qwen1.5-7B,7.0,Base,2.674
77
+ 2.8,3.1,2.871,2.53,2.862,3.348,3.0,1.938,1.471,Qwen/Qwen1.5-4B-Chat,4.0,Chat,2.658
78
+ 2.95,3.44,2.971,2.33,2.414,3.072,3.19,1.988,1.4,allenai/OLMo-7B-Instruct,7.0,Chat,2.639
79
+ 2.925,2.51,2.386,2.62,2.448,4.217,2.56,1.738,1.757,codellama/CodeLlama-70b-Instruct-hf,70.0,Chat,2.573
80
+ 2.862,3.13,2.886,2.33,2.259,3.507,2.95,1.725,1.229,allenai/OLMo-7B-SFT,7.0,Chat,2.542
81
+ 2.838,2.8,2.5,2.53,2.276,2.884,2.61,1.775,,EleutherAI/llemma_34b,34.0,Base,2.527
82
+ 2.762,3.01,2.6,2.15,2.138,3.217,2.65,1.512,,meta-llama/Llama-2-13b-hf,13.0,Base,2.505
83
+ 2.788,2.89,2.443,2.23,2.155,3.275,2.51,1.675,,Qwen/Qwen1.5-4B,4.0,Base,2.496
84
+ 2.975,2.81,2.314,2.27,2.362,2.913,2.64,1.65,,meta-llama/Meta-Llama-3-8B,8.0,Base,2.492
85
+ 2.85,3.11,2.643,2.24,2.517,2.725,3.11,1.662,1.329,Qwen/Qwen1.5-1.8B-Chat,1.8,Chat,2.465
86
+ 2.775,2.76,2.557,2.3,2.052,3.043,2.74,1.412,,01-ai/Yi-6B,6.0,Base,2.455
87
+ 2.75,2.42,2.329,2.32,1.966,2.696,2.23,2.025,,codellama/CodeLlama-70b-hf,70.0,Base,2.342
88
+ 2.45,2.84,2.257,2.12,2.172,2.913,2.62,1.275,,microsoft/phi-1_5,1.3,Base,2.331
89
+ 2.888,2.47,1.629,2.13,2.017,2.826,2.8,2.05,1.971,microsoft/Orca-2-13b,13.0,Chat,2.309
90
+ 2.462,2.87,2.257,2.05,1.793,3.159,2.4,1.262,,meta-llama/Llama-2-7b-hf,7.0,Base,2.282
91
+ 2.675,2.41,2.129,1.98,2.069,2.594,2.45,1.8,,codellama/CodeLlama-34b-hf,34.0,Base,2.263
92
+ 2.425,2.7,2.229,1.81,2.086,2.449,2.38,1.35,,Qwen/Qwen1.5-1.8B,1.8,Base,2.179
93
+ 2.25,2.65,2.086,1.94,1.862,2.638,2.31,1.288,,google/gemma-2b,2.0,Base,2.128
94
+ 2.238,2.46,1.829,1.97,1.897,2.522,2.03,1.612,,EleutherAI/llemma_7b,7.0,Base,2.07
95
+ 2.288,2.26,1.314,1.72,1.81,2.623,2.25,1.338,1.843,microsoft/Orca-2-7b,7.0,Chat,1.938
96
+ 2.125,2.19,1.743,1.76,1.828,2.667,2.02,1.15,,allenai/OLMo-7B,7.0,Base,1.935
97
+ 2.075,2.44,1.914,1.64,1.69,2.42,2.26,1.25,1.186,Qwen/Qwen1.5-0.5B-Chat,0.5,Chat,1.875
98
+ 2.1,2.06,1.757,1.71,1.621,2.275,1.89,1.588,,codellama/CodeLlama-13b-hf,13.0,Base,1.875
99
+ 1.75,2.05,1.471,1.59,1.534,2.261,1.79,1.375,,codellama/CodeLlama-7b-hf,7.0,Base,1.728
100
+ 1.925,2.04,1.6,1.51,1.5,1.957,1.72,1.188,,Qwen/Qwen1.5-0.5B,0.5,Base,1.68
101
+ 1.675,1.64,1.357,1.31,1.31,2.087,1.44,1.062,,allenai/OLMo-1B,1.0,Base,1.485
102
+ 1.25,1.4,1.357,1.34,1.362,1.667,1.4,1.15,1.157,CohereForAI/aya-101,13.0,Chat,1.343
103
+ 1.375,1.46,1.214,1.22,1.034,1.928,1.19,1.012,,google/gemma-7b,7.0,Base,1.304
104
+ 1.038,1.01,1.0,1.0,1.017,1.377,1.0,1.012,,microsoft/phi-1,1.3,Base,1.057
data/bgb-leaderboard-prometheus-bgb-8x7b-v2.0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e542c1e3dbe78dfd4bcfaac73c0a8b27e1a8f117a323b9a7b08600bb4c292034
3
+ size 13924
data/eval_by_gpt-4-turbo-2024-04-09.csv ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,grounding,instruction_following,planning,reasoning,refinement,safety,theory_of_mind,tool_usage,multilingual
2
+ phi-1,1.1125,1.01,1.0,1.0,1.4342105263157894,1.5072463768115942,1.0,1.0125,
3
+ phi-1_5,2.475,2.89,2.5,2.24,2.526315789473684,2.869565217391304,2.95,1.525,
4
+ phi-2,3.1375,2.92,2.857142857142857,2.8,2.763157894736842,3.4057971014492754,3.2,1.7875,
5
+ Qwen1.5-0.5B,2.025,2.12,1.7,1.58,2.1578947368421053,2.0144927536231885,1.8,1.275,
6
+ Qwen1.5-1.8B,2.5375,2.85,2.3857142857142857,1.98,2.6052631578947367,2.4782608695652173,2.55,1.525,
7
+ Qwen1.5-4B,2.8875,2.94,2.7285714285714286,2.45,2.6973684210526314,3.3333333333333335,2.73,1.9,
8
+ gemma-2b,2.3375,2.72,2.357142857142857,2.16,2.0933333333333333,2.6231884057971016,2.32,1.4875,
9
+ OLMo-1B,1.7625,1.8,1.4428571428571428,1.33,1.9473684210526316,2.1884057971014492,1.59,1.125,
10
+ Qwen1.5-0.5B-Chat,2.2,2.61,2.057142857142857,1.76,2.0,2.391304347826087,2.38,1.4625,1.1594202898550725
11
+ Qwen1.5-1.8B-Chat,2.8125,3.27,2.914285714285714,2.28,2.8552631578947367,2.681159420289855,3.13,1.9875,1.3
12
+ Qwen1.5-4B-Chat,2.9,3.19,3.085714285714286,2.83,3.0,3.3333333333333335,3.07,2.4,1.4714285714285715
13
+ Phi-3-mini-4k-instruct,3.725,3.88,3.8,3.81,3.973684210526316,4.144927536231884,3.9,3.3375,1.9142857142857144
14
+ Phi-3-mini-128k-instruct,3.7125,3.8,3.7,3.82,3.513157894736842,3.9565217391304346,3.83,3.1,1.8285714285714285
15
+ gemma-2b-it,2.875,3.24,3.1142857142857143,2.48,2.8815789473684212,3.753623188405797,3.15,1.9625,1.6571428571428573
16
+ gemma-1.1-2b-it,2.9,3.34,3.2285714285714286,2.74,3.0526315789473686,3.971014492753623,3.37,1.975,1.4714285714285715
17
+ gemma-7b,1.325,1.49,1.1857142857142857,1.34,1.5789473684210527,2.1594202898550723,1.2,1.0125,
18
+ Mistral-7B-v0.1,3.225,3.3,3.242857142857143,2.86,2.763157894736842,3.4057971014492754,3.09,2.1625,
19
+ Mistral-7B-v0.2,3.15,3.33,3.1,2.78,2.891891891891892,3.3768115942028984,3.29,2.275,
20
+ Qwen1.5-7B,2.9875,3.14,3.0142857142857142,2.65,2.8266666666666667,3.101449275362319,2.77,2.4875,
21
+ Yi-6B,2.9375,2.97,2.657142857142857,2.36,2.486842105263158,3.2318840579710146,2.89,1.55,
22
+ Llama-2-7b-hf,2.6125,2.87,2.5142857142857142,2.18,2.210526315789474,3.217391304347826,2.6,1.45,
23
+ CodeLlama-7b-hf,1.9625,2.25,1.7714285714285714,1.72,2.1184210526315788,2.347826086956522,1.9,1.5625,
24
+ Meta-Llama-3-8B,3.2625,2.94,2.657142857142857,2.39,3.039473684210526,2.898550724637681,2.82,1.9375,
25
+ llemma_7b,2.4125,2.57,2.085714285714286,2.24,2.3026315789473686,2.5217391304347827,2.19,1.8375,
26
+ OLMo-7B,2.3875,2.26,1.9285714285714286,1.84,2.1052631578947367,2.652173913043478,2.16,1.3125,
27
+ gemma-7b-it,3.3125,3.43,3.0714285714285716,2.97,3.026315789473684,3.7681159420289854,3.15,2.325,1.7857142857142858
28
+ gemma-1.1-7b-it,3.5875,3.53,3.3714285714285714,3.25,3.25,4.043478260869565,3.44,2.7875,2.0
29
+ Mistral-7B-Instruct-v0.2,3.7,3.87,3.8,3.18,3.4473684210526314,3.8260869565217392,3.77,3.3625,2.2857142857142856
30
+ Qwen1.5-7B-Chat,3.5875,3.88,3.7142857142857144,3.3,3.3947368421052633,3.7246376811594204,3.7,3.15,2.057142857142857
31
+ Yi-6B-Chat,3.275,3.52,3.414285714285714,2.85,3.08,3.4782608695652173,3.676767676767677,2.3375,1.457142857142857
32
+ Llama-2-7b-chat-hf,3.3875,3.58,3.585714285714286,2.85,2.960526315789474,4.144927536231884,3.65,2.3,2.0285714285714285
33
+ CodeLlama-7b-Instruct-hf,3.2125,3.36,3.2857142857142856,2.75,2.960526315789474,3.753623188405797,3.22,2.575,1.7714285714285714
34
+ Meta-Llama-3-8B-Instruct,4.125,3.94,3.9285714285714284,3.47,3.506666666666667,3.7246376811594204,3.83,3.5,2.914285714285714
35
+ OLMo-7B-SFT,2.95,3.27,2.9571428571428573,2.4,2.6842105263157894,3.3333333333333335,2.93,2.0875,1.1857142857142857
36
+ OLMo-7B-Instruct,3.1125,3.54,3.2714285714285714,2.47,2.776315789473684,3.101449275362319,3.31,2.2125,1.4142857142857144
37
+ tulu-2-7b,2.8625,3.34,3.2285714285714286,2.81,2.973684210526316,3.63768115942029,3.26,2.2125,1.7142857142857142
38
+ tulu-2-dpo-7b,3.2375,3.76,3.5,2.79,3.0789473684210527,3.753623188405797,3.68,2.4375,1.9714285714285715
39
+ codetulu-2-7b,3.1125,3.41,3.1142857142857143,2.73,2.9078947368421053,3.246376811594203,3.25,2.7875,1.8
40
+ Orca-2-7b,2.425,2.27,1.3714285714285714,1.85,2.3157894736842106,2.5942028985507246,2.24,1.6,1.7285714285714286
41
+ openchat-3.5-0106,3.6375,3.84,3.757142857142857,3.34,3.5657894736842106,3.7246376811594204,3.66,3.125,2.157142857142857
42
+ OpenHermes-2-Mistral-7B,3.525,3.66,3.8,3.28,3.28,3.2318840579710146,3.45,2.925,1.9142857142857144
43
+ OpenHermes-2.5-Mistral-7B,3.6875,3.66,3.7285714285714286,3.28,3.276315789473684,3.4347826086956523,3.57,3.0625,2.1
44
+ Nous-Hermes-2-Mistral-7B-DPO,3.6625,3.74,3.8,3.26,3.3552631578947367,3.3768115942028984,3.69,3.0625,2.1714285714285713
45
+ Starling-LM-7B-alpha,3.7125,3.72,3.8285714285714287,3.33,3.223684210526316,3.9130434782608696,3.54,3.025,2.2285714285714286
46
+ Starling-LM-7B-beta,3.8,3.84,4.0,3.56,3.546666666666667,3.869565217391304,3.87,3.5625,2.2714285714285714
47
+ mistral-orpo-alpha,3.525,3.7,3.6,3.11,3.1710526315789473,3.971014492753623,3.5,2.95,2.085714285714286
48
+ mistral-orpo-beta,3.6125,3.8,3.6857142857142855,3.12,3.263157894736842,3.6956521739130435,3.58,3.025,2.1
49
+ zephyr-7b-beta,3.55,3.72,3.7285714285714286,3.23,3.3815789473684212,3.550724637681159,3.73,3.2875,1.9428571428571428
50
+ Qwen1.5-14B,3.5375,3.41,3.157142857142857,3.0,3.0921052631578947,2.579710144927536,3.16,2.9125,
51
+ Llama-2-13b-hf,2.85,3.09,2.7857142857142856,2.28,2.5789473684210527,3.347826086956522,2.88,1.8125,
52
+ CodeLlama-13b-hf,2.3,2.3,1.957142857142857,2.01,2.0921052631578947,2.449275362318841,2.15,1.8125,
53
+ SOLAR-10.7B-v1.0,3.25,3.56,3.3714285714285714,2.96,3.1973684210526314,3.6666666666666665,3.42,2.5625,
54
+ Qwen1.5-14B-Chat,3.625,3.9,3.857142857142857,3.36,3.263157894736842,3.8550724637681157,3.52,3.2,2.3857142857142857
55
+ SOLAR-10.7B-Instruct-v1.0,3.8125,3.77,3.857142857142857,3.42,3.3815789473684212,3.8260869565217392,3.9,3.4125,2.442857142857143
56
+ aya-101,1.2875,1.45,1.4714285714285715,1.25,1.9078947368421053,1.6666666666666667,1.38,1.1625,1.1285714285714286
57
+ Llama-2-13b-chat-hf,3.6625,3.92,3.6857142857142855,2.76,3.0789473684210527,4.318840579710145,3.71,2.6,2.1142857142857143
58
+ CodeLlama-13b-Instruct-hf,3.2625,3.34,3.357142857142857,2.77,2.8947368421052633,4.043478260869565,3.38,2.6,1.8857142857142857
59
+ tulu-2-13b,3.15,3.38,3.4,2.8,3.026666666666667,3.7681159420289854,3.39,2.775,2.0285714285714285
60
+ tulu-2-dpo-13b,3.45,3.77,3.6,2.9,3.1842105263157894,3.8405797101449277,3.59,3.05,2.142857142857143
61
+ codetulu-2-13b,3.225,3.5,3.4,2.8,3.1973684210526314,3.289855072463768,3.38,3.2375,1.8857142857142857
62
+ Orca-2-13b,2.9375,2.49,1.7857142857142858,2.24,2.486842105263158,2.8115942028985508,2.8,2.3625,2.0428571428571427
63
+ Yi-34B,3.5125,3.54,3.5285714285714285,3.27,3.24,3.579710144927536,3.39,2.5125,
64
+ llemma_34b,2.9875,2.97,2.742857142857143,2.75,2.8157894736842106,2.971014492753623,2.84,2.0875,
65
+ Qwen1.5-32B,3.325,3.64,3.5142857142857142,3.31,3.1184210526315788,3.3333333333333335,3.33,2.925,
66
+ CodeLlama-34b-hf,2.8125,2.66,2.4857142857142858,2.17,2.5657894736842106,2.7246376811594204,2.59,2.0625,
67
+ Mixtral-8x7B-v0.1,3.7125,3.58,3.5,3.3,3.236842105263158,3.869565217391304,3.59,2.775,
68
+ Yi-34B-Chat,3.7375,3.83,3.914285714285714,3.57,3.675675675675676,3.8840579710144927,3.96,3.0375,2.1857142857142855
69
+ Nous-Hermes-2-Yi-34B,3.3375,3.65,3.642857142857143,3.53,3.3733333333333335,3.536231884057971,3.56,3.175,2.0714285714285716
70
+ CodeLlama-34b-Instruct-hf,3.5,3.5,3.4571428571428573,3.04,3.0789473684210527,4.130434782608695,3.46,2.7375,2.1142857142857143
71
+ codetulu-2-34b,3.45,3.51,3.6857142857142855,3.01,3.210526315789474,3.652173913043478,3.5,3.35,2.0
72
+ Qwen1.5-32B-Chat,3.7875,3.85,4.0285714285714285,3.62,3.3947368421052633,4.217391304347826,3.87,3.7375,2.7142857142857144
73
+ Mixtral-8x7B-Instruct-v0.1,3.9,3.88,3.6,3.71,3.4342105263157894,3.8115942028985508,3.81,3.4125,2.7142857142857144
74
+ Nous-Hermes-2-Mixtral-8x7B-SFT,3.65,3.78,3.7142857142857144,3.39,3.460526315789474,3.608695652173913,3.63,3.5375,2.4
75
+ Nous-Hermes-2-Mixtral-8x7B-DPO,3.8125,4.06,3.9571428571428573,3.53,3.3421052631578947,3.739130434782609,3.79,3.6625,2.557142857142857
76
+ c4ai-command-r-v01,3.8125,3.88,3.9,3.39,3.4473684210526314,3.898550724637681,3.9,3.1875,2.1857142857142855
77
+ Llama-2-70b-hf,3.425,3.56,3.3857142857142857,3.06,3.1333333333333333,3.869565217391304,3.48,2.625,
78
+ CodeLlama-70b-hf,2.9375,2.62,2.557142857142857,2.44,2.506666666666667,2.8405797101449277,2.44,2.4,
79
+ Mixtral-8x22B-v0.1-AWQ,3.6875,3.7,3.742857142857143,3.5,3.539473684210526,4.0,3.49,3.1875,
80
+ Meta-Llama-3-70B,3.35,3.33,3.1142857142857143,3.04,3.3421052631578947,3.260869565217391,3.04,2.5,
81
+ Qwen1.5-72B,3.4875,3.6,3.5,3.25,3.2266666666666666,3.9420289855072466,3.38,2.9875,
82
+ Llama-2-70b-chat-hf,3.6625,3.88,3.9285714285714284,3.22,3.36,4.3768115942028984,3.73,3.1875,2.3857142857142857
83
+ CodeLlama-70b-Instruct-hf,2.85,2.7,2.6714285714285713,2.83,2.7466666666666666,4.101449275362318,2.55,1.9875,1.9285714285714286
84
+ tulu-2-dpo-70b,3.7,3.89,3.9,3.36,3.4210526315789473,3.753623188405797,3.83,3.6125,2.3142857142857145
85
+ c4ai-command-r-plus-GPTQ,3.9875,4.0,4.185714285714286,3.64,3.460526315789474,3.971014492753623,3.94,3.525,2.757142857142857
86
+ Meta-Llama-3-70B-Instruct,4.125,4.18,4.185714285714286,3.87,3.9066666666666667,4.0144927536231885,4.04,3.775,3.3142857142857145
87
+ Mixtral-8x22B-Instruct-v0.1-AWQ,4.0125,4.0,4.0,3.96,3.8421052631578947,4.086956521739131,3.87,3.7125,2.7142857142857144
88
+ zephyr-orpo-141b-A35b-v0.1-AWQ,3.55,3.62,3.9571428571428573,3.52,3.6184210526315788,3.449275362318841,3.58,3.2875,2.585714285714286
89
+ Qwen1.5-72B-Chat,3.8875,3.99,4.0285714285714285,3.68,3.6315789473684212,3.9565217391304346,3.96,3.525,2.914285714285714
90
+ qwen-110b-chat,4.15,4.01,4.228571428571429,3.94,3.8815789473684212,4.043478260869565,3.99,3.5875,2.7714285714285714
91
+ gpt-3.5-turbo-1106,4.025,3.79,3.8285714285714287,3.51,3.4342105263157894,4.0,3.67,3.1625,2.557142857142857
92
+ gpt-3.5-turbo-0125,3.925,3.85,3.842857142857143,3.65,3.4342105263157894,3.8840579710144927,3.79,3.1375,2.6142857142857143
93
+ gpt-4-1106-preview,4.2875,4.23,4.271428571428571,4.22,4.171052631578948,4.565217391304348,4.24,3.775,3.6
94
+ gpt-4-0125-preview,4.3,4.2,4.357142857142857,4.16,4.144736842105263,4.173913043478261,4.26,3.925,3.5428571428571427
95
+ gpt-4-turbo-2024-04-09,4.3125,4.13,4.3,4.2,4.105263157894737,4.086956521739131,4.12,3.8,3.4714285714285715
96
+ gpt-4o-2024-05-13,4.2375,4.26,4.357142857142857,4.21,4.078947368421052,4.057971014492754,4.08,3.85,3.642857142857143
97
+ mistral-medium,3.9625,3.94,4.0285714285714285,3.95,3.776315789473684,4.057971014492754,3.9,3.8625,2.9285714285714284
98
+ mistral-large,4.025,3.99,4.0285714285714285,3.93,3.776315789473684,3.9130434782608696,3.93,3.825,2.8857142857142857
99
+ gemini-1.0-pro,3.6,3.84,3.8714285714285714,3.62,3.3733333333333335,3.9420289855072466,3.75,3.125,3.1857142857142855
100
+ gemini-pro-1.5,4.05,4.04,4.128571428571429,4.06,3.6710526315789473,4.115942028985507,4.07,3.4875,3.257142857142857
101
+ gemini-flash-1.5,4.1375,3.91,3.9714285714285715,3.92,3.453333333333333,4.217391304347826,3.96,3.625,2.6714285714285713
102
+ claude-3-haiku-20240307,4.1375,4.01,4.128571428571429,3.69,3.6315789473684212,4.304347826086956,3.98,3.75,3.0714285714285716
103
+ claude-3-sonnet-20240229,4.25,3.92,4.171428571428572,3.91,3.723684210526316,4.36231884057971,4.0,3.75,3.1857142857142855
104
+ claude-3-opus-20240229,4.2875,4.06,4.185714285714286,3.97,3.9078947368421053,4.536231884057971,4.09,3.7875,3.5714285714285716
data/eval_by_prometheus-bgb-8x7b-v2.0.csv ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name,grounding,instruction_following,planning,reasoning,refinement,safety,theory_of_mind,tool_usage,multilingual
2
+ phi-1,1.0375,1.01,1.0,1.0,1.0172413793103448,1.3768115942028984,1.0,1.0125,
3
+ phi-1_5,2.45,2.84,2.257142857142857,2.12,2.1724137931034484,2.9130434782608696,2.62,1.275,
4
+ phi-2,2.9625,2.75,2.7142857142857144,2.69,2.5689655172413794,3.4347826086956523,2.98,1.65,
5
+ Qwen1.5-0.5B,1.925,2.04,1.6,1.51,1.5,1.9565217391304348,1.72,1.1875,
6
+ Qwen1.5-1.8B,2.425,2.7,2.2285714285714286,1.81,2.086206896551724,2.449275362318841,2.38,1.35,
7
+ Qwen1.5-4B,2.7875,2.89,2.442857142857143,2.23,2.1551724137931036,3.2753623188405796,2.51,1.675,
8
+ gemma-2b,2.25,2.65,2.085714285714286,1.94,1.8620689655172413,2.63768115942029,2.31,1.2875,
9
+ OLMo-1B,1.675,1.64,1.3571428571428572,1.31,1.3103448275862069,2.0869565217391304,1.44,1.0625,
10
+ Qwen1.5-0.5B-Chat,2.075,2.44,1.9142857142857144,1.64,1.6896551724137931,2.420289855072464,2.26,1.25,1.1857142857142857
11
+ Qwen1.5-1.8B-Chat,2.85,3.11,2.642857142857143,2.24,2.5172413793103448,2.7246376811594204,3.11,1.6625,1.3285714285714285
12
+ Qwen1.5-4B-Chat,2.8,3.1,2.8714285714285714,2.53,2.8620689655172415,3.347826086956522,3.0,1.9375,1.4714285714285715
13
+ Phi-3-mini-4k-instruct,3.9,3.85,3.4857142857142858,3.54,3.7758620689655173,4.231884057971015,3.81,3.0625,1.9714285714285715
14
+ Phi-3-mini-128k-instruct,3.5875,3.66,3.4714285714285715,3.66,3.3448275862068964,3.9420289855072466,3.7,2.9125,1.8142857142857143
15
+ gemma-2b-it,2.8,3.09,2.9714285714285715,2.36,2.6379310344827585,4.043478260869565,3.12,1.75,1.6857142857142857
16
+ gemma-1.1-2b-it,2.9125,3.29,3.0285714285714285,2.55,2.706896551724138,4.130434782608695,3.25,1.675,1.6571428571428573
17
+ gemma-7b,1.375,1.46,1.2142857142857142,1.22,1.0344827586206897,1.9275362318840579,1.19,1.0125,
18
+ Mistral-7B-v0.1,2.9375,3.23,2.914285714285714,2.68,2.4655172413793105,3.4057971014492754,2.9,1.975,
19
+ Mistral-7B-v0.2,3.025,3.24,2.7857142857142856,2.58,2.4827586206896552,3.2028985507246377,3.07,1.8625,
20
+ Qwen1.5-7B,2.9375,3.0,2.842857142857143,2.37,2.413793103448276,3.072463768115942,2.58,2.175,
21
+ Yi-6B,2.775,2.76,2.557142857142857,2.3,2.0517241379310347,3.0434782608695654,2.74,1.4125,
22
+ Llama-2-7b-hf,2.4625,2.87,2.257142857142857,2.05,1.793103448275862,3.1594202898550723,2.4,1.2625,
23
+ CodeLlama-7b-hf,1.75,2.05,1.4714285714285715,1.59,1.5344827586206897,2.260869565217391,1.79,1.375,
24
+ Meta-Llama-3-8B,2.975,2.81,2.3142857142857145,2.27,2.3620689655172415,2.9130434782608696,2.64,1.65,
25
+ llemma_7b,2.2375,2.46,1.8285714285714285,1.97,1.896551724137931,2.5217391304347827,2.03,1.6125,
26
+ OLMo-7B,2.125,2.19,1.7428571428571429,1.76,1.8275862068965518,2.6666666666666665,2.02,1.15,
27
+ gemma-7b-it,3.15,3.34,2.8142857142857145,2.91,2.8275862068965516,3.652173913043478,3.17,2.2,1.6571428571428573
28
+ gemma-1.1-7b-it,3.4875,3.56,3.3142857142857145,3.12,3.0517241379310347,4.072463768115942,3.44,2.675,2.0285714285714285
29
+ Mistral-7B-Instruct-v0.2,3.6875,3.74,3.6,3.01,3.103448275862069,3.9565217391304346,3.49,3.0125,2.6
30
+ Qwen1.5-7B-Chat,3.4,3.74,3.4,3.04,3.0,3.753623188405797,3.71,2.975,2.0428571428571427
31
+ Yi-6B-Chat,3.0,3.45,3.1285714285714286,2.49,2.603448275862069,3.5072463768115942,3.56,1.8875,1.5285714285714285
32
+ Llama-2-7b-chat-hf,3.4375,3.62,3.3714285714285714,2.64,2.7413793103448274,4.260869565217392,3.58,2.175,2.085714285714286
33
+ CodeLlama-7b-Instruct-hf,3.1375,3.18,3.0285714285714285,2.58,2.586206896551724,3.8260869565217392,3.19,2.2125,1.7
34
+ Meta-Llama-3-8B-Instruct,3.85,3.75,3.8142857142857145,3.3,3.3448275862068964,3.927536231884058,3.71,3.3625,3.0428571428571427
35
+ OLMo-7B-SFT,2.8625,3.13,2.8857142857142857,2.33,2.2586206896551726,3.5072463768115942,2.95,1.725,1.2285714285714286
36
+ OLMo-7B-Instruct,2.95,3.44,2.9714285714285715,2.33,2.413793103448276,3.072463768115942,3.19,1.9875,1.4
37
+ tulu-2-7b,2.85,3.21,3.1,2.56,2.5172413793103448,3.681159420289855,3.12,2.0,1.7285714285714286
38
+ tulu-2-dpo-7b,3.25,3.67,3.242857142857143,2.68,2.706896551724138,3.7681159420289854,3.51,2.325,1.9857142857142858
39
+ codetulu-2-7b,2.8,3.18,3.0,2.49,2.7241379310344827,3.347826086956522,3.12,2.525,1.8285714285714285
40
+ Orca-2-7b,2.2875,2.26,1.3142857142857143,1.72,1.8103448275862069,2.6231884057971016,2.25,1.3375,1.8428571428571427
41
+ openchat-3.5-0106,3.525,3.76,3.5142857142857142,3.26,3.310344827586207,3.8405797101449277,3.61,2.8875,2.3142857142857145
42
+ OpenHermes-2-Mistral-7B,3.25,3.55,3.642857142857143,2.89,2.8448275862068964,3.4927536231884058,3.32,2.6375,1.9714285714285715
43
+ OpenHermes-2.5-Mistral-7B,3.575,3.53,3.557142857142857,3.07,3.1724137931034484,3.3043478260869565,3.42,2.875,2.242857142857143
44
+ Nous-Hermes-2-Mistral-7B-DPO,3.4375,3.58,3.6285714285714286,3.05,3.1724137931034484,3.318840579710145,3.46,2.925,2.2142857142857144
45
+ Starling-LM-7B-alpha,3.7125,3.74,3.5,3.2,2.9482758620689653,3.9420289855072466,3.53,2.8375,2.1285714285714286
46
+ Starling-LM-7B-beta,3.775,3.86,3.8,3.44,3.5344827586206895,3.9855072463768115,3.91,3.325,2.4285714285714284
47
+ mistral-orpo-alpha,3.3875,3.56,3.442857142857143,2.86,3.103448275862069,4.028985507246377,3.45,2.825,2.1142857142857143
48
+ mistral-orpo-beta,3.4625,3.66,3.4285714285714284,2.97,2.9310344827586206,3.898550724637681,3.54,2.8125,2.1285714285714286
49
+ zephyr-7b-beta,3.375,3.56,3.5,3.0,2.896551724137931,3.5217391304347827,3.5,3.05,1.957142857142857
50
+ Qwen1.5-14B,3.3875,3.3,2.914285714285714,2.72,2.8620689655172415,2.6231884057971016,3.06,2.55,
51
+ Llama-2-13b-hf,2.7625,3.01,2.6,2.15,2.1379310344827585,3.217391304347826,2.65,1.5125,
52
+ CodeLlama-13b-hf,2.1,2.06,1.7571428571428571,1.71,1.6206896551724137,2.2753623188405796,1.89,1.5875,
53
+ SOLAR-10.7B-v1.0,3.0875,3.37,3.1142857142857143,2.75,2.7586206896551726,3.5652173913043477,3.25,2.225,
54
+ Qwen1.5-14B-Chat,3.5875,3.77,3.6142857142857143,3.26,3.1206896551724137,3.8840579710144927,3.5,3.0625,2.4857142857142858
55
+ SOLAR-10.7B-Instruct-v1.0,3.7,3.8,3.585714285714286,3.21,3.0344827586206895,3.8260869565217392,3.7,3.4875,2.585714285714286
56
+ aya-101,1.25,1.4,1.3571428571428572,1.34,1.3620689655172413,1.6666666666666667,1.4,1.15,1.1571428571428573
57
+ Llama-2-13b-chat-hf,3.5875,3.7,3.342857142857143,2.71,2.8620689655172415,4.318840579710145,3.66,2.5125,2.342857142857143
58
+ CodeLlama-13b-Instruct-hf,3.0375,3.2,3.157142857142857,2.59,2.4827586206896552,3.971014492753623,3.21,2.3125,2.157142857142857
59
+ tulu-2-13b,3.0125,3.31,3.2714285714285714,2.68,2.706896551724138,3.8405797101449277,3.2,2.325,2.057142857142857
60
+ tulu-2-dpo-13b,3.4125,3.58,3.4571428571428573,2.71,3.0344827586206895,3.8840579710144927,3.55,2.775,2.2285714285714286
61
+ codetulu-2-13b,3.0875,3.37,3.057142857142857,2.62,2.793103448275862,3.420289855072464,3.22,2.9875,1.8
62
+ Orca-2-13b,2.8875,2.47,1.6285714285714286,2.13,2.0172413793103448,2.8260869565217392,2.8,2.05,1.9714285714285715
63
+ Yi-34B,3.4875,3.37,3.1857142857142855,3.05,2.8793103448275863,3.681159420289855,3.21,2.1625,
64
+ llemma_34b,2.8375,2.8,2.5,2.53,2.2758620689655173,2.8840579710144927,2.61,1.775,
65
+ Qwen1.5-32B,3.125,3.52,3.142857142857143,2.99,2.810344827586207,3.536231884057971,3.07,2.6375,
66
+ CodeLlama-34b-hf,2.675,2.41,2.1285714285714286,1.98,2.0689655172413794,2.5942028985507246,2.45,1.8,
67
+ Mixtral-8x7B-v0.1,3.55,3.45,3.1857142857142855,3.14,2.7586206896551726,3.8115942028985508,3.33,2.5375,
68
+ Yi-34B-Chat,3.4625,3.74,3.7142857142857144,3.27,3.413793103448276,4.086956521739131,3.81,2.8125,2.0142857142857142
69
+ Nous-Hermes-2-Yi-34B,3.2,3.63,3.557142857142857,3.24,3.206896551724138,3.608695652173913,3.55,2.85,1.9
70
+ CodeLlama-34b-Instruct-hf,3.35,3.39,3.2857142857142856,2.85,2.7241379310344827,4.101449275362318,3.37,2.5,2.1857142857142855
71
+ codetulu-2-34b,3.3875,3.4,3.414285714285714,3.01,3.1379310344827585,3.7246376811594204,3.43,3.075,2.0142857142857142
72
+ Qwen1.5-32B-Chat,3.65,3.85,3.642857142857143,3.55,3.1206896551724137,4.246376811594203,3.8,3.4875,2.6714285714285713
73
+ Mixtral-8x7B-Instruct-v0.1,3.65,3.89,3.5714285714285716,3.45,3.1379310344827585,4.0144927536231885,3.78,3.2,2.742857142857143
74
+ Nous-Hermes-2-Mixtral-8x7B-SFT,3.6875,3.69,3.6285714285714286,3.16,3.103448275862069,3.652173913043478,3.59,3.225,2.414285714285714
75
+ Nous-Hermes-2-Mixtral-8x7B-DPO,3.6625,3.84,3.6714285714285713,3.24,3.1551724137931036,3.782608695652174,3.71,3.3375,2.5285714285714285
76
+ c4ai-command-r-v01,3.7125,3.72,3.642857142857143,3.14,3.189655172413793,4.0144927536231885,3.88,2.95,1.957142857142857
77
+ Llama-2-70b-hf,3.2875,3.49,3.1,2.78,2.7586206896551726,3.8550724637681157,3.17,2.45,
78
+ CodeLlama-70b-hf,2.75,2.42,2.3285714285714287,2.32,1.9655172413793103,2.6956521739130435,2.23,2.025,
79
+ Mixtral-8x22B-v0.1-AWQ,3.525,3.59,3.5,3.44,3.206896551724138,3.9420289855072466,3.37,2.7625,
80
+ Meta-Llama-3-70B,3.25,3.22,2.7857142857142856,2.76,2.689655172413793,3.260869565217391,2.92,2.3125,
81
+ Qwen1.5-72B,3.375,3.41,3.1142857142857143,2.97,2.913793103448276,3.898550724637681,3.17,2.7625,
82
+ Llama-2-70b-chat-hf,3.6125,3.72,3.657142857142857,2.98,3.1551724137931036,4.463768115942029,3.79,2.8875,2.4285714285714284
83
+ CodeLlama-70b-Instruct-hf,2.925,2.51,2.3857142857142857,2.62,2.4482758620689653,4.217391304347826,2.56,1.7375,1.7571428571428571
84
+ tulu-2-dpo-70b,3.6375,3.8,3.8,3.17,3.1551724137931036,3.8260869565217392,3.7,3.5,2.4
85
+ c4ai-command-r-plus-GPTQ,3.925,4.02,3.857142857142857,3.46,3.5172413793103448,3.927536231884058,3.91,3.425,2.8285714285714287
86
+ Meta-Llama-3-70B-Instruct,4.175,3.92,3.9714285714285715,3.76,3.7413793103448274,4.028985507246377,3.97,3.625,3.1142857142857143
87
+ Mixtral-8x22B-Instruct-v0.1-AWQ,3.8125,3.96,3.7714285714285714,3.6,3.3793103448275863,4.043478260869565,3.84,3.45,2.757142857142857
88
+ zephyr-orpo-141b-A35b-v0.1-AWQ,3.2875,3.62,3.6857142857142855,3.25,3.3448275862068964,3.550724637681159,3.45,3.0625,2.5428571428571427
89
+ Qwen1.5-72B-Chat,3.7125,3.92,3.7714285714285714,3.53,3.586206896551724,4.101449275362318,3.92,3.425,2.6285714285714286
90
+ qwen-110b-chat,4.075,4.03,4.0,3.83,3.7758620689655173,4.130434782608695,3.96,3.325,2.7714285714285714
91
+ gpt-3.5-turbo-1106,3.8125,3.75,3.7142857142857144,3.41,3.2413793103448274,4.086956521739131,3.65,3.0,2.585714285714286
92
+ gpt-3.5-turbo-0125,3.8,3.86,3.757142857142857,3.43,3.2586206896551726,3.9565217391304346,3.64,2.9875,2.585714285714286
93
+ gpt-4-1106-preview,4.0125,4.21,4.0285714285714285,4.01,4.0344827586206895,4.449275362318841,4.09,3.6,3.4285714285714284
94
+ gpt-4-0125-preview,4.1125,4.13,3.9285714285714284,4.15,4.0,4.144927536231884,4.15,3.725,3.3285714285714287
95
+ gpt-4-turbo-2024-04-09,4.1125,4.09,3.9857142857142858,3.92,3.8620689655172415,4.115942028985507,4.06,3.6875,3.357142857142857
96
+ gpt-4o-2024-05-13,4.175,4.14,4.1,3.98,3.789473684210526,4.235294117647059,4.06,3.7875,3.414285714285714
97
+ mistral-medium,3.925,3.91,3.842857142857143,3.82,3.5517241379310347,4.115942028985507,3.91,3.6875,2.9714285714285715
98
+ mistral-large,3.9,3.83,3.757142857142857,3.66,3.6379310344827585,3.9565217391304346,3.94,3.7125,2.8714285714285714
99
+ gemini-1.0-pro,3.5625,3.65,3.6285714285714286,3.48,3.0689655172413794,3.8840579710144927,3.74,3.0625,2.9857142857142858
100
+ gemini-pro-1.5,3.875,3.88,3.8714285714285714,3.83,3.5,4.144927536231884,4.01,3.2875,3.1
101
+ gemini-flash-1.5,4.05,3.81,3.742857142857143,3.81,3.310344827586207,4.144927536231884,3.97,3.45,2.7285714285714286
102
+ claude-3-haiku-20240307,4.0,3.94,3.9571428571428573,3.58,3.5689655172413794,4.27536231884058,3.93,3.5375,2.8714285714285714
103
+ claude-3-sonnet-20240229,3.8625,3.83,3.942857142857143,3.84,3.689655172413793,4.2898550724637685,3.86,3.5,3.0428571428571427
104
+ claude-3-opus-20240229,4.075,3.88,4.1571428571428575,3.8,3.7413793103448274,4.434782608695652,4.05,3.425,3.357142857142857
data/llm-perf-leaderboard-1xA10.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/llm-perf-leaderboard-1xA100.csv ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Experiment 🧪,Model 🤗,Prefill (s),Per Token (s),Decode (tokens/s),Energy (tokens/kWh),Memory (MB),Backend 🏭,Precision 📥,Quantization 🗜️,Attention 👁️,Kernel ⚛️,Architecture 🏛️,End-to-End (s),Open LLM Score (%),Params (B)
2
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-110B,2.486,2.3411865234375,0.398,2661.495,65311.037,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,149.921,75.42,110
3
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-110B,2.513,2.368027587890625,0.421,2633.025,65311.036,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,151.799,75.42,110
4
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-110B,2.515,2.3592529296875,0.424,2662.679,65311.037,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,151.175,75.42,110
5
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-110B,2.499,2.349084716796875,0.425,2666.191,65311.036,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,150.5,75.42,110
6
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-110B,2.48,2.33512646484375,0.428,2664.976,65311.036,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,149.561,75.42,110
7
+ 4bit-bnb-fa2,Qwen/Qwen1.5-110B,4.467,0.2968944702148438,3.363,23268.535,65013.93,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,23.206,75.42,110
8
+ 4bit-bnb-eager,Qwen/Qwen1.5-110B,4.446,0.2606571655273437,3.835,25487.07,65014.062,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,20.873,75.42,110
9
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-110B,4.436,0.2596505737304687,3.848,26017.077,65013.93,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,20.818,75.42,110
10
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-72B,1.659,1.5589693603515624,0.64,3969.019,45374.151,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,100.136,72.91,72
11
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-72B,1.64,1.544901611328125,0.647,4022.375,45374.151,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,98.929,72.91,72
12
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-72B,1.645,1.5385220947265623,0.65,4074.884,45374.151,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,98.609,72.91,72
13
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-72B,1.64,1.5355289306640625,0.651,4083.113,45374.151,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,98.385,72.91,72
14
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-72B,1.641,1.5341466064453124,0.652,4086.555,45374.152,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,98.294,72.91,72
15
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-72B,1.641,1.5337093505859376,0.652,4073.93,45374.152,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,98.238,72.91,72
16
+ 8bit-bnb-eager,Qwen/Qwen1.5-72B,0.266,0.2631034851074219,3.788,30333.894,77840.722,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,16.875,72.91,72
17
+ 8bit-bnb-fa2,Qwen/Qwen1.5-72B,0.263,0.2624276428222656,3.795,30225.887,77841.345,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,16.843,72.91,72
18
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-72B,0.259,0.2576486511230468,3.847,31114.81,77841.345,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,16.578,72.91,72
19
+ 4bit-bnb-fa2,Qwen/Qwen1.5-72B,2.914,0.209786880493164,4.759,34328.499,44278.471,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,16.154,72.91,72
20
+ 4bit-bnb-eager,Qwen/Qwen1.5-72B,2.907,0.1775267791748047,5.625,38909.536,44278.602,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,14.101,72.91,72
21
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-72B,2.884,0.1753354187011718,5.697,39707.214,44278.471,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,13.939,72.91,72
22
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-32B,0.759,0.7142164306640625,1.4,8742.68,21326.311,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Unknown,45.777,70.47,32
23
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-32B,0.758,0.7138324584960938,1.4,8749.008,21326.311,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Unknown,45.745,70.47,32
24
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-32B,0.749,0.697112548828125,1.435,8958.627,21326.312,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Unknown,44.666,70.47,32
25
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-32B,0.748,0.69574755859375,1.437,8980.23,21326.312,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Unknown,44.571,70.47,32
26
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-32B,0.744,0.6952366333007812,1.438,8995.519,21326.311,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Unknown,44.558,70.47,32
27
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-32B,0.742,0.6944102172851563,1.44,9019.264,21326.311,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Unknown,44.505,70.47,32
28
+ 8bit-bnb-eager,Qwen/Qwen1.5-32B,0.215,0.214687744140625,4.64,37808.933,35661.209,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,13.77,70.47,32
29
+ 8bit-bnb-fa2,Qwen/Qwen1.5-32B,0.212,0.212853759765625,4.679,38609.39,35661.209,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,13.657,70.47,32
30
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-32B,0.207,0.2074357757568359,4.803,39412.893,35661.209,pytorch,float16,BnB.8bit,SDPA,No Kernel,Unknown,13.313,70.47,32
31
+ 4bit-bnb-fa2,Qwen/Qwen1.5-32B,1.231,0.1302108154296875,7.65,57873.387,21184.84,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,9.455,70.47,32
32
+ 4bit-bnb-eager,Qwen/Qwen1.5-32B,1.221,0.1254154205322265,7.962,58872.682,21184.971,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,9.122,70.47,32
33
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-32B,1.216,0.1207705612182617,8.237,61166.327,21184.84,pytorch,float16,BnB.4bit,SDPA,No Kernel,Unknown,8.858,70.47,32
34
+ bfloat16-sdpa,Qwen/Qwen1.5-32B,0.113,0.0539330558776855,18.422,114101.162,66512.805,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Unknown,3.525,70.47,32
35
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-32B,0.759,0.7142164306640625,1.4,8742.68,21326.311,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,45.777,70.39,32
36
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-32B,0.758,0.7138324584960938,1.4,8749.008,21326.311,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,45.745,70.39,32
37
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-32B,0.749,0.697112548828125,1.435,8958.627,21326.312,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,44.666,70.39,32
38
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-32B,0.748,0.69574755859375,1.437,8980.23,21326.312,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,44.571,70.39,32
39
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-32B,0.744,0.6952366333007812,1.438,8995.519,21326.311,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,44.558,70.39,32
40
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-32B,0.742,0.6944102172851563,1.44,9019.264,21326.311,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,44.505,70.39,32
41
+ 8bit-bnb-eager,Qwen/Qwen1.5-32B,0.215,0.214687744140625,4.64,37808.933,35661.209,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,13.77,70.39,32
42
+ 8bit-bnb-fa2,Qwen/Qwen1.5-32B,0.212,0.212853759765625,4.679,38609.39,35661.209,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,13.657,70.39,32
43
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-32B,0.207,0.2074357757568359,4.803,39412.893,35661.209,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,13.313,70.39,32
44
+ 4bit-bnb-fa2,Qwen/Qwen1.5-32B,1.231,0.1302108154296875,7.65,57873.387,21184.84,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,9.455,70.39,32
45
+ 4bit-bnb-eager,Qwen/Qwen1.5-32B,1.221,0.1254154205322265,7.962,58872.682,21184.971,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,9.122,70.39,32
46
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-32B,1.216,0.1207705612182617,8.237,61166.327,21184.84,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,8.858,70.39,32
47
+ bfloat16-sdpa,Qwen/Qwen1.5-32B,0.113,0.0539330558776855,18.422,114101.162,66512.805,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,3.525,70.39,32
48
+ float32-eager,internlm/internlm2-20b,0.673,0.0580843505859375,17.204,107334.978,81737.513,pytorch,float32,Unquantized,Eager,No Kernel,Unknown,4.333,69.75,20
49
+ bfloat16-eager,internlm/internlm2-20b,0.085,0.0520796165466308,18.595,131877.402,40915.737,pytorch,bfloat16,Unquantized,Eager,No Kernel,Unknown,3.48,69.75,20
50
+ float16-eager,internlm/internlm2-20b,0.083,0.0429731826782226,22.885,147138.442,40915.713,pytorch,float16,Unquantized,Eager,No Kernel,Unknown,2.843,69.75,20
51
+ float16-fa2,internlm/internlm2-20b,0.08,0.0387727355957031,25.159,141982.091,40909.85,pytorch,float16,Unquantized,FAv2,No Kernel,Unknown,2.554,69.75,20
52
+ bfloat16-fa2,internlm/internlm2-20b,0.08,0.0382894096374511,25.999,164064.681,40909.85,pytorch,bfloat16,Unquantized,FAv2,No Kernel,Unknown,2.496,69.75,20
53
+ 4bit-gptq-exllama-v1-fa2,01-ai/Yi-34B,0.806,0.7479408569335938,1.337,8398.815,20339.706,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,LlamaForCausalLM,47.925,69.42,34
54
+ 4bit-gptq-exllama-v1-eager,01-ai/Yi-34B,0.802,0.7413575439453125,1.349,8446.395,20339.707,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,LlamaForCausalLM,47.518,69.42,34
55
+ 4bit-gptq-exllama-v2-fa2,01-ai/Yi-34B,0.797,0.7391539306640625,1.353,8502.111,20339.706,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,LlamaForCausalLM,47.378,69.42,34
56
+ 4bit-gptq-exllama-v1-sdpa,01-ai/Yi-34B,0.793,0.7362805786132812,1.358,8507.675,20339.706,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,LlamaForCausalLM,47.181,69.42,34
57
+ 4bit-gptq-exllama-v2-sdpa,01-ai/Yi-34B,0.791,0.7357869873046875,1.359,8497.601,20339.706,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,LlamaForCausalLM,47.141,69.42,34
58
+ 4bit-gptq-exllama-v2-eager,01-ai/Yi-34B,0.795,0.7347783813476563,1.361,8496.697,20339.707,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,LlamaForCausalLM,47.088,69.42,34
59
+ 8bit-bnb-fa2,01-ai/Yi-34B,0.222,0.2120785980224609,4.63,40260.489,35777.361,pytorch,float16,BnB.8bit,FAv2,No Kernel,LlamaForCausalLM,13.934,69.42,34
60
+ 8bit-bnb-sdpa,01-ai/Yi-34B,0.207,0.2068643798828125,4.797,39300.981,35784.527,pytorch,float16,BnB.8bit,SDPA,No Kernel,LlamaForCausalLM,13.35,69.42,34
61
+ 8bit-bnb-eager,01-ai/Yi-34B,0.208,0.2074746856689453,4.803,39386.631,35784.558,pytorch,float16,BnB.8bit,Eager,No Kernel,LlamaForCausalLM,13.307,69.42,34
62
+ 4bit-bnb-eager,01-ai/Yi-34B,1.265,0.1208412170410156,8.253,60268.352,20257.332,pytorch,float16,BnB.4bit,Eager,No Kernel,LlamaForCausalLM,8.88,69.42,34
63
+ 4bit-bnb-fa2,01-ai/Yi-34B,1.263,0.1186662368774414,8.336,61121.736,20257.201,pytorch,float16,BnB.4bit,FAv2,No Kernel,LlamaForCausalLM,8.823,69.42,34
64
+ 4bit-bnb-sdpa,01-ai/Yi-34B,1.259,0.1156628494262695,8.549,61089.017,20257.201,pytorch,float16,BnB.4bit,SDPA,No Kernel,LlamaForCausalLM,8.539,69.42,34
65
+ bfloat16-eager,01-ai/Yi-34B,0.139,0.0625111045837402,15.957,98579.367,69113.77,pytorch,bfloat16,Unquantized,Eager,No Kernel,LlamaForCausalLM,4.082,69.42,34
66
+ float16-eager,01-ai/Yi-34B,0.139,0.0617041931152343,16.108,100686.062,69113.741,pytorch,float16,Unquantized,Eager,No Kernel,LlamaForCausalLM,4.031,69.42,34
67
+ float16-sdpa,01-ai/Yi-34B,0.135,0.0574320640563964,17.306,108446.829,69113.726,pytorch,float16,Unquantized,SDPA,No Kernel,LlamaForCausalLM,3.771,69.42,34
68
+ bfloat16-sdpa,01-ai/Yi-34B,0.131,0.0570808334350585,17.41,109196.143,69113.726,pytorch,bfloat16,Unquantized,SDPA,No Kernel,LlamaForCausalLM,3.737,69.42,34
69
+ bfloat16-fa2,01-ai/Yi-34B,0.129,0.0554455032348632,17.872,110149.156,69106.595,pytorch,bfloat16,Unquantized,FAv2,No Kernel,LlamaForCausalLM,3.642,69.42,34
70
+ float16-fa2,01-ai/Yi-34B,0.131,0.0554065933227539,17.94,111925.191,69106.595,pytorch,float16,Unquantized,FAv2,No Kernel,LlamaForCausalLM,3.637,69.42,34
71
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-14B,0.322,0.302266357421875,3.307,20680.95,11417.443,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,19.376,66.7,14
72
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-14B,0.319,0.298787841796875,3.343,20946.25,11417.443,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,19.169,66.7,14
73
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-14B,0.318,0.2930442199707031,3.411,21276.966,11417.444,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,18.787,66.7,14
74
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-14B,0.317,0.29292236328125,3.412,21318.34,11417.444,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,18.781,66.7,14
75
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-14B,0.315,0.292917236328125,3.413,21325.519,11417.443,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,18.772,66.7,14
76
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-14B,0.316,0.2926960754394531,3.415,21284.671,11417.443,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,18.757,66.7,14
77
+ 8bit-bnb-eager,Qwen/Qwen2-beta-14B,0.134,0.1311662139892578,7.571,63129.015,17162.983,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,8.407,66.7,14
78
+ 8bit-bnb-fa2,Qwen/Qwen2-beta-14B,0.132,0.1310904388427734,7.616,63784.45,17162.139,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,8.392,66.7,14
79
+ 8bit-bnb-eager,Qwen/Qwen1.5-14B,0.133,0.1306890258789062,7.632,63327.692,17162.983,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,8.375,66.7,14
80
+ 8bit-bnb-fa2,Qwen/Qwen1.5-14B,0.131,0.13016064453125,7.672,62918.239,17162.139,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,8.334,66.7,14
81
+ 8bit-bnb-sdpa,Qwen/Qwen2-beta-14B,0.128,0.1272565765380859,7.839,63963.619,17162.139,pytorch,float16,BnB.8bit,SDPA,No Kernel,Unknown,8.167,66.7,14
82
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-14B,0.127,0.1261967391967773,7.885,65452.264,17162.139,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,8.115,66.7,14
83
+ 4bit-bnb-fa2,Qwen/Qwen2-beta-14B,0.511,0.0763965454101562,12.979,102068.536,11094.619,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,5.359,66.7,14
84
+ 4bit-bnb-eager,Qwen/Qwen1.5-14B,0.502,0.0766648330688476,12.995,101028.853,11093.767,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,5.355,66.7,14
85
+ 4bit-bnb-fa2,Qwen/Qwen1.5-14B,0.511,0.0764078063964843,13.067,100869.91,11094.619,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,5.319,66.7,14
86
+ 4bit-bnb-eager,Qwen/Qwen2-beta-14B,0.504,0.0760985565185546,13.091,101519.305,11093.767,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,5.303,66.7,14
87
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-14B,0.501,0.0729886703491211,13.606,106419.525,11094.619,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,5.134,66.7,14
88
+ 4bit-bnb-sdpa,Qwen/Qwen2-beta-14B,0.5,0.0723947525024414,13.785,105456.899,11094.619,pytorch,float16,BnB.4bit,SDPA,No Kernel,Unknown,5.064,66.7,14
89
+ float32-sdpa,Qwen/Qwen2-beta-14B,0.472,0.0421468162536621,23.705,148566.494,59131.042,pytorch,float32,Unquantized,SDPA,No Kernel,Unknown,3.128,66.7,14
90
+ float32-eager,Qwen/Qwen2-beta-14B,0.473,0.0414248962402343,24.118,150604.059,59131.042,pytorch,float32,Unquantized,Eager,No Kernel,Unknown,3.083,66.7,14
91
+ float16-fa2,Qwen/Qwen2-beta-14B,0.057,0.03829248046875,25.962,167208.938,29628.641,pytorch,float16,Unquantized,FAv2,No Kernel,Unknown,2.477,66.7,14
92
+ bfloat16-fa2,Qwen/Qwen2-beta-14B,0.056,0.0382750701904296,26.024,171028.502,29628.641,pytorch,bfloat16,Unquantized,FAv2,No Kernel,Unknown,2.474,66.7,14
93
+ bfloat16-eager,Qwen/Qwen2-beta-14B,0.053,0.033952766418457,29.407,186517.422,29627.777,pytorch,bfloat16,Unquantized,Eager,No Kernel,Unknown,2.193,66.7,14
94
+ float16-eager,Qwen/Qwen2-beta-14B,0.054,0.0333055992126464,29.785,187842.717,29628.641,pytorch,float16,Unquantized,Eager,No Kernel,Unknown,2.161,66.7,14
95
+ bfloat16-sdpa,Qwen/Qwen2-beta-14B,0.05,0.0308305912017822,32.262,199491.508,29628.641,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Unknown,1.996,66.7,14
96
+ float16-sdpa,Qwen/Qwen2-beta-14B,0.052,0.0307169284820556,32.425,202153.056,29628.641,pytorch,float16,Unquantized,SDPA,No Kernel,Unknown,1.988,66.7,14
97
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-7B,0.178,0.1636229095458984,6.104,38185.725,7110.584,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,10.495,61.76,7
98
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-7B,0.176,0.1622589416503906,6.156,38429.543,7110.584,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,10.402,61.76,7
99
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-7B,0.169,0.1540648956298828,6.488,40418.403,7110.584,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,9.882,61.76,7
100
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-7B,0.169,0.1534781494140625,6.515,40703.915,7110.585,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,9.84,61.76,7
101
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-7B,0.17,0.153154556274414,6.523,40746.829,7110.585,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,9.829,61.76,7
102
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-7B,0.168,0.1531156463623046,6.53,40810.499,7110.584,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,9.817,61.76,7
103
+ 8bit-bnb-fa2,Qwen/Qwen1.5-7B,0.106,0.1043712005615234,9.529,81397.173,10046.34,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,6.703,61.76,7
104
+ 8bit-bnb-eager,Qwen/Qwen1.5-7B,0.105,0.1032509460449218,9.667,81417.862,10046.34,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,6.618,61.76,7
105
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-7B,0.102,0.1015767059326171,9.782,83354.464,10046.34,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,6.528,61.76,7
106
+ 4bit-bnb-fa2,Qwen/Qwen1.5-7B,0.291,0.0622233581542968,16.001,129775.678,6859.561,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,4.223,61.76,7
107
+ 4bit-bnb-eager,Qwen/Qwen1.5-7B,0.283,0.0611809272766113,16.29,133802.283,6859.693,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,4.153,61.76,7
108
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-7B,0.281,0.0579102706909179,17.241,140087.9,6859.561,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,3.923,61.76,7
109
+ bfloat16-eager,Qwen/Qwen1.5-7B,0.033,0.027060224533081,36.685,256436.742,16416.242,pytorch,bfloat16,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.749,61.76,7
110
+ float16-eager,Qwen/Qwen1.5-7B,0.035,0.0266495990753173,37.386,254931.518,16416.242,pytorch,float16,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.717,61.76,7
111
+ float32-sdpa,Qwen/Qwen1.5-7B,0.269,0.0251545600891113,39.712,248273.588,32662.329,pytorch,float32,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.856,61.76,7
112
+ bfloat16-sdpa,Qwen/Qwen1.5-7B,0.031,0.024498176574707,40.266,273241.258,16416.242,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.582,61.76,7
113
+ float16-sdpa,Qwen/Qwen1.5-7B,0.033,0.0246415367126464,40.4,271307.112,16416.242,pytorch,float16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.59,61.76,7
114
+ 4bit-gptq-exllama-v1-fa2,Deci/DeciLM-7B,0.177,0.1591111755371093,6.242,39234.618,4542.986,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,DeciLMForCausalLM,10.212,61.55,7
115
+ 4bit-gptq-exllama-v1-eager,Deci/DeciLM-7B,0.176,0.1585008697509765,6.28,39403.846,4542.986,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,DeciLMForCausalLM,10.168,61.55,7
116
+ 4bit-gptq-exllama-v2-eager,Deci/DeciLM-7B,0.176,0.1583861694335937,6.301,39448.689,4542.986,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,DeciLMForCausalLM,10.16,61.55,7
117
+ 4bit-gptq-exllama-v2-fa2,Deci/DeciLM-7B,0.177,0.1581578216552734,6.323,39550.391,4542.986,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,DeciLMForCausalLM,10.141,61.55,7
118
+ 8bit-bnb-eager,Deci/DeciLM-7B,0.112,0.1085685729980468,9.145,77902.18,7514.465,pytorch,float16,BnB.8bit,Eager,No Kernel,DeciLMForCausalLM,6.994,61.55,7
119
+ 8bit-bnb-fa2,Deci/DeciLM-7B,0.111,0.1073848342895507,9.272,79804.194,7514.465,pytorch,float16,BnB.8bit,FAv2,No Kernel,DeciLMForCausalLM,6.905,61.55,7
120
+ 4bit-bnb-eager,Deci/DeciLM-7B,0.285,0.0612147216796875,16.165,128174.485,4557.528,pytorch,float16,BnB.4bit,Eager,No Kernel,DeciLMForCausalLM,4.181,61.55,7
121
+ 4bit-bnb-fa2,Deci/DeciLM-7B,0.283,0.0611655693054199,16.223,129606.147,4557.528,pytorch,float16,BnB.4bit,FAv2,No Kernel,DeciLMForCausalLM,4.156,61.55,7
122
+ float16-fa2,Deci/DeciLM-7B,0.035,0.0276981754302978,35.979,246453.59,14290.687,pytorch,float16,Unquantized,FAv2,No Kernel,DeciLMForCausalLM,1.781,61.55,7
123
+ bfloat16-fa2,Deci/DeciLM-7B,0.036,0.0275711994171142,36.273,254203.902,14290.687,pytorch,bfloat16,Unquantized,FAv2,No Kernel,DeciLMForCausalLM,1.772,61.55,7
124
+ bfloat16-eager,Deci/DeciLM-7B,0.036,0.0274544639587402,36.278,252890.35,14290.687,pytorch,bfloat16,Unquantized,Eager,No Kernel,DeciLMForCausalLM,1.769,61.55,7
125
+ float16-eager,Deci/DeciLM-7B,0.035,0.0274513912200927,36.347,254214.391,14290.687,pytorch,float16,Unquantized,Eager,No Kernel,DeciLMForCausalLM,1.765,61.55,7
126
+ float32-eager,Deci/DeciLM-7B,0.26,0.0250234870910644,39.901,248383.54,28529.571,pytorch,float32,Unquantized,Eager,No Kernel,DeciLMForCausalLM,1.838,61.55,7
127
+ 8bit-bnb-eager,TencentARC/Mistral_Pro_8B_v0.1,0.133,0.1332080688476562,7.477,63386.861,10056.919,pytorch,float16,BnB.8bit,Eager,No Kernel,MistralForCausalLM,8.529,61.06,8
128
+ 8bit-bnb-fa2,TencentARC/Mistral_Pro_8B_v0.1,0.133,0.1326766052246093,7.52,63638.018,10056.901,pytorch,float16,BnB.8bit,FAv2,No Kernel,MistralForCausalLM,8.504,61.06,8
129
+ 8bit-bnb-sdpa,TencentARC/Mistral_Pro_8B_v0.1,0.129,0.1305528259277343,7.583,63655.197,10056.901,pytorch,float16,BnB.8bit,SDPA,No Kernel,MistralForCausalLM,8.447,61.06,8
130
+ 4bit-bnb-fa2,TencentARC/Mistral_Pro_8B_v0.1,0.378,0.0774328308105468,12.921,105176.391,6130.076,pytorch,float16,BnB.4bit,FAv2,No Kernel,MistralForCausalLM,5.245,61.06,8
131
+ 4bit-bnb-eager,TencentARC/Mistral_Pro_8B_v0.1,0.368,0.0762798080444336,13.049,105807.92,6130.207,pytorch,float16,BnB.4bit,Eager,No Kernel,MistralForCausalLM,5.189,61.06,8
132
+ 4bit-bnb-sdpa,TencentARC/Mistral_Pro_8B_v0.1,0.364,0.0726702117919921,13.705,108685.174,6130.076,pytorch,float16,BnB.4bit,SDPA,No Kernel,MistralForCausalLM,4.942,61.06,8
133
+ bfloat16-fa2,TencentARC/Mistral_Pro_8B_v0.1,0.064,0.0494766082763671,18.898,192652.736,18774.938,pytorch,bfloat16,Unquantized,FAv2,No Kernel,MistralForCausalLM,3.182,61.06,8
134
+ float16-fa2,TencentARC/Mistral_Pro_8B_v0.1,0.052,0.0361553916931152,27.636,160155.483,18774.938,pytorch,float16,Unquantized,FAv2,No Kernel,MistralForCausalLM,2.33,61.06,8
135
+ float16-eager,TencentARC/Mistral_Pro_8B_v0.1,0.045,0.0354969596862793,28.055,199077.816,18774.948,pytorch,float16,Unquantized,Eager,No Kernel,MistralForCausalLM,2.282,61.06,8
136
+ bfloat16-eager,TencentARC/Mistral_Pro_8B_v0.1,0.047,0.03498291015625,28.49,151470.549,18774.964,pytorch,bfloat16,Unquantized,Eager,No Kernel,MistralForCausalLM,2.25,61.06,8
137
+ bfloat16-sdpa,TencentARC/Mistral_Pro_8B_v0.1,0.044,0.0321003532409668,28.582,214674.752,18774.938,pytorch,bfloat16,Unquantized,SDPA,No Kernel,MistralForCausalLM,2.086,61.06,8
138
+ float16-sdpa,TencentARC/Mistral_Pro_8B_v0.1,0.042,0.0322201614379882,30.637,214042.402,18774.938,pytorch,float16,Unquantized,SDPA,No Kernel,MistralForCausalLM,2.078,61.06,8
139
+ float32-eager,TencentARC/Mistral_Pro_8B_v0.1,0.335,0.0316262397766113,31.508,197831.613,37534.53,pytorch,float32,Unquantized,Eager,No Kernel,MistralForCausalLM,2.333,61.06,8
140
+ float32-sdpa,TencentARC/Mistral_Pro_8B_v0.1,0.33,0.0313978881835937,31.77,198934.234,37534.494,pytorch,float32,Unquantized,SDPA,No Kernel,MistralForCausalLM,2.31,61.06,8
141
+ float32-eager,internlm/internlm-20b,0.705,0.0793333740234375,12.56,78971.296,82203.53,pytorch,float32,Unquantized,Eager,No Kernel,InternLMForCausalLM,5.712,59.55,20
142
+ float16-eager,internlm/internlm-20b,0.081,0.0755814437866211,13.025,99846.325,41420.788,pytorch,float16,Unquantized,Eager,No Kernel,InternLMForCausalLM,4.957,59.55,20
143
+ bfloat16-fa2,internlm/internlm-20b,0.075,0.0668876800537109,15.024,103362.431,41420.787,pytorch,bfloat16,Unquantized,FAv2,No Kernel,InternLMForCausalLM,4.286,59.55,20
144
+ bfloat16-eager,internlm/internlm-20b,0.081,0.0650844192504882,15.33,99111.479,41442.261,pytorch,bfloat16,Unquantized,Eager,No Kernel,InternLMForCausalLM,4.184,59.55,20
145
+ float16-fa2,internlm/internlm-20b,0.078,0.0617318382263183,16.104,103591.136,41420.787,pytorch,float16,Unquantized,FAv2,No Kernel,InternLMForCausalLM,3.98,59.55,20
146
+ 8bit-bnb-eager,Qwen/Qwen1.5-4B,0.139,0.1358602294921875,7.319,64939.622,5789.886,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,8.747,57.05,3
147
+ 8bit-bnb-fa2,Qwen/Qwen1.5-4B,0.13,0.1299681243896484,7.674,67641.747,5789.886,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,8.333,57.05,3
148
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-4B,0.128,0.1278269424438476,7.793,68389.147,5789.886,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,8.209,57.05,3
149
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-4B,0.112,0.1010135040283203,9.871,62441.263,4389.693,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,6.483,57.05,3
150
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-4B,0.111,0.1008404464721679,9.899,63499.758,4389.693,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,6.467,57.05,3
151
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-4B,0.099,0.0861163482666015,11.597,72553.687,4389.694,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,5.528,57.05,3
152
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-4B,0.098,0.0855541763305664,11.664,73434.956,4389.694,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,5.492,57.05,3
153
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-4B,0.097,0.0848998413085937,11.755,73710.179,4389.693,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,5.451,57.05,3
154
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-4B,0.097,0.0848783340454101,11.758,73660.092,4389.693,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,5.45,57.05,3
155
+ 4bit-bnb-fa2,Qwen/Qwen1.5-4B,0.157,0.0778475494384765,12.779,108045.366,4291.035,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,5.097,57.05,3
156
+ 4bit-bnb-eager,Qwen/Qwen1.5-4B,0.142,0.075971580505371,13.076,114089.801,4291.293,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,4.95,57.05,3
157
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-4B,0.141,0.0751001586914062,13.151,114564.921,4291.035,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,4.913,57.05,3
158
+ 8bit-bnb-eager,Qwen/Qwen1.5-MoE-A2.7B,0.741,0.6937733154296875,1.399,12842.708,15921.993,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,44.485,56.03,14
159
+ 8bit-bnb-fa2,Qwen/Qwen1.5-MoE-A2.7B,0.683,0.6790184936523438,1.471,12733.116,15921.207,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,43.541,56.03,14
160
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-MoE-A2.7B,0.668,0.6668635864257813,1.493,13064.707,15921.207,pytorch,float16,BnB.8bit,SDPA,No Kernel,Unknown,42.798,56.03,14
161
+ 4bit-bnb-eager,Qwen/Qwen1.5-MoE-A2.7B,0.665,0.6048717041015625,1.625,14351.577,8963.124,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,39.454,56.03,14
162
+ 4bit-bnb-fa2,Qwen/Qwen1.5-MoE-A2.7B,0.634,0.5769584350585938,1.728,14927.261,8963.124,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,37.038,56.03,14
163
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-MoE-A2.7B,0.618,0.57547265625,1.732,15221.615,8963.124,pytorch,float16,BnB.4bit,SDPA,No Kernel,Unknown,36.985,56.03,14
164
+ float16-fa2,Qwen/Qwen1.5-MoE-A2.7B,0.319,0.2504622039794922,3.939,33652.697,29029.726,pytorch,float16,Unquantized,FAv2,No Kernel,Unknown,16.154,56.03,14
165
+ bfloat16-fa2,Qwen/Qwen1.5-MoE-A2.7B,0.318,0.2515455932617187,3.953,33955.497,29029.726,pytorch,bfloat16,Unquantized,FAv2,No Kernel,Unknown,16.261,56.03,14
166
+ float16-sdpa,Qwen/Qwen1.5-MoE-A2.7B,0.319,0.2492241973876953,3.969,33500.653,29029.726,pytorch,float16,Unquantized,SDPA,No Kernel,Unknown,16.187,56.03,14
167
+ bfloat16-sdpa,Qwen/Qwen1.5-MoE-A2.7B,0.317,0.2492610626220703,3.987,34518.439,29029.726,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Unknown,16.058,56.03,14
168
+ 4bit-gptq-exllama-v1-eager,01-ai/Yi-6B,0.148,0.1335818176269531,7.48,46668.673,4383.673,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,LlamaForCausalLM,8.571,54.08,6
169
+ 4bit-gptq-exllama-v2-eager,01-ai/Yi-6B,0.148,0.1333135375976562,7.5,47031.732,4383.673,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,LlamaForCausalLM,8.549,54.08,6
170
+ 4bit-gptq-exllama-v2-sdpa,01-ai/Yi-6B,0.147,0.1325373382568359,7.54,47165.737,4383.672,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,LlamaForCausalLM,8.502,54.08,6
171
+ 4bit-gptq-exllama-v1-sdpa,01-ai/Yi-6B,0.145,0.1321912384033203,7.56,47302.247,4383.672,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,LlamaForCausalLM,8.479,54.08,6
172
+ 4bit-gptq-exllama-v1-fa2,01-ai/Yi-6B,0.146,0.1322506256103515,7.565,47209.478,4383.672,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,LlamaForCausalLM,8.474,54.08,6
173
+ 4bit-gptq-exllama-v2-fa2,01-ai/Yi-6B,0.145,0.1320038452148437,7.576,47258.355,4383.672,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,LlamaForCausalLM,8.46,54.08,6
174
+ 8bit-bnb-fa2,01-ai/Yi-6B,0.122,0.1202708511352539,8.294,71936.758,6883.612,pytorch,float16,BnB.8bit,FAv2,No Kernel,LlamaForCausalLM,7.695,54.08,6
175
+ 8bit-bnb-sdpa,01-ai/Yi-6B,0.121,0.1159710693359375,8.476,72438.094,6883.612,pytorch,float16,BnB.8bit,SDPA,No Kernel,LlamaForCausalLM,7.517,54.08,6
176
+ 8bit-bnb-eager,01-ai/Yi-6B,0.117,0.1180047378540039,8.531,74501.828,6883.612,pytorch,float16,BnB.8bit,Eager,No Kernel,LlamaForCausalLM,7.498,54.08,6
177
+ 4bit-bnb-eager,01-ai/Yi-6B,0.239,0.0685793304443359,14.205,117514.54,4344.191,pytorch,float16,BnB.4bit,Eager,No Kernel,LlamaForCausalLM,4.593,54.08,6
178
+ 4bit-bnb-fa2,01-ai/Yi-6B,0.237,0.0677509155273437,14.478,116945.05,4344.06,pytorch,float16,BnB.4bit,FAv2,No Kernel,LlamaForCausalLM,4.521,54.08,6
179
+ 4bit-bnb-sdpa,01-ai/Yi-6B,0.237,0.0656271362304687,14.777,119850.176,4344.06,pytorch,float16,BnB.4bit,SDPA,No Kernel,LlamaForCausalLM,4.468,54.08,6
180
+ float16-eager,01-ai/Yi-6B,0.037,0.0342794227600097,29.199,225420.285,12315.695,pytorch,float16,Unquantized,Eager,No Kernel,LlamaForCausalLM,2.196,54.08,6
181
+ bfloat16-eager,01-ai/Yi-6B,0.035,0.0330751991271972,30.201,222509.193,12315.695,pytorch,bfloat16,Unquantized,Eager,No Kernel,LlamaForCausalLM,2.113,54.08,6
182
+ float16-sdpa,01-ai/Yi-6B,0.032,0.0304189434051513,32.575,238157.461,12315.695,pytorch,float16,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.964,54.08,6
183
+ float32-eager,01-ai/Yi-6B,0.216,0.0301455364227294,33.033,218482.721,24528.503,pytorch,float32,Unquantized,Eager,No Kernel,LlamaForCausalLM,2.115,54.08,6
184
+ bfloat16-sdpa,01-ai/Yi-6B,0.031,0.0300677127838134,33.055,241680.777,12315.695,pytorch,bfloat16,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.923,54.08,6
185
+ float16-fa2,01-ai/Yi-6B,0.03,0.0293744640350341,33.598,242398.98,12315.695,pytorch,float16,Unquantized,FAv2,No Kernel,LlamaForCausalLM,1.91,54.08,6
186
+ bfloat16-fa2,01-ai/Yi-6B,0.03,0.0291368961334228,34.172,253843.644,12315.695,pytorch,bfloat16,Unquantized,FAv2,No Kernel,LlamaForCausalLM,1.867,54.08,6
187
+ float32-sdpa,01-ai/Yi-6B,0.213,0.0278220806121826,35.817,231139.557,24528.467,pytorch,float32,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.968,54.08,6
188
+ 4bit-gptq-exllama-v1-eager,01-ai/Yi-6B,0.148,0.1335818176269531,7.48,46668.673,4383.673,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,LlamaForCausalLM,8.571,54.02,6
189
+ 4bit-gptq-exllama-v2-eager,01-ai/Yi-6B,0.148,0.1333135375976562,7.5,47031.732,4383.673,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,LlamaForCausalLM,8.549,54.02,6
190
+ 4bit-gptq-exllama-v2-sdpa,01-ai/Yi-6B,0.147,0.1325373382568359,7.54,47165.737,4383.672,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,LlamaForCausalLM,8.502,54.02,6
191
+ 4bit-gptq-exllama-v1-sdpa,01-ai/Yi-6B,0.145,0.1321912384033203,7.56,47302.247,4383.672,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,LlamaForCausalLM,8.479,54.02,6
192
+ 4bit-gptq-exllama-v1-fa2,01-ai/Yi-6B,0.146,0.1322506256103515,7.565,47209.478,4383.672,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,LlamaForCausalLM,8.474,54.02,6
193
+ 4bit-gptq-exllama-v2-fa2,01-ai/Yi-6B,0.145,0.1320038452148437,7.576,47258.355,4383.672,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,LlamaForCausalLM,8.46,54.02,6
194
+ 8bit-bnb-fa2,01-ai/Yi-6B,0.122,0.1202708511352539,8.294,71936.758,6883.612,pytorch,float16,BnB.8bit,FAv2,No Kernel,LlamaForCausalLM,7.695,54.02,6
195
+ 8bit-bnb-sdpa,01-ai/Yi-6B,0.121,0.1159710693359375,8.476,72438.094,6883.612,pytorch,float16,BnB.8bit,SDPA,No Kernel,LlamaForCausalLM,7.517,54.02,6
196
+ 8bit-bnb-eager,01-ai/Yi-6B,0.117,0.1180047378540039,8.531,74501.828,6883.612,pytorch,float16,BnB.8bit,Eager,No Kernel,LlamaForCausalLM,7.498,54.02,6
197
+ 4bit-bnb-eager,01-ai/Yi-6B,0.239,0.0685793304443359,14.205,117514.54,4344.191,pytorch,float16,BnB.4bit,Eager,No Kernel,LlamaForCausalLM,4.593,54.02,6
198
+ 4bit-bnb-fa2,01-ai/Yi-6B,0.237,0.0677509155273437,14.478,116945.05,4344.06,pytorch,float16,BnB.4bit,FAv2,No Kernel,LlamaForCausalLM,4.521,54.02,6
199
+ 4bit-bnb-sdpa,01-ai/Yi-6B,0.237,0.0656271362304687,14.777,119850.176,4344.06,pytorch,float16,BnB.4bit,SDPA,No Kernel,LlamaForCausalLM,4.468,54.02,6
200
+ float16-eager,01-ai/Yi-6B,0.037,0.0342794227600097,29.199,225420.285,12315.695,pytorch,float16,Unquantized,Eager,No Kernel,LlamaForCausalLM,2.196,54.02,6
201
+ bfloat16-eager,01-ai/Yi-6B,0.035,0.0330751991271972,30.201,222509.193,12315.695,pytorch,bfloat16,Unquantized,Eager,No Kernel,LlamaForCausalLM,2.113,54.02,6
202
+ float16-sdpa,01-ai/Yi-6B,0.032,0.0304189434051513,32.575,238157.461,12315.695,pytorch,float16,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.964,54.02,6
203
+ float32-eager,01-ai/Yi-6B,0.216,0.0301455364227294,33.033,218482.721,24528.503,pytorch,float32,Unquantized,Eager,No Kernel,LlamaForCausalLM,2.115,54.02,6
204
+ bfloat16-sdpa,01-ai/Yi-6B,0.031,0.0300677127838134,33.055,241680.777,12315.695,pytorch,bfloat16,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.923,54.02,6
205
+ float16-fa2,01-ai/Yi-6B,0.03,0.0293744640350341,33.598,242398.98,12315.695,pytorch,float16,Unquantized,FAv2,No Kernel,LlamaForCausalLM,1.91,54.02,6
206
+ bfloat16-fa2,01-ai/Yi-6B,0.03,0.0291368961334228,34.172,253843.644,12315.695,pytorch,bfloat16,Unquantized,FAv2,No Kernel,LlamaForCausalLM,1.867,54.02,6
207
+ float32-sdpa,01-ai/Yi-6B,0.213,0.0278220806121826,35.817,231139.557,24528.467,pytorch,float32,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.968,54.02,6
208
+ float32-eager,microsoft/phi-1_5,0.059,0.0195164165496826,49.575,345539.322,5949.832,pytorch,float32,Unquantized,Eager,No Kernel,PhiForCausalLM,1.332,47.69,1
209
+ bfloat16-eager,microsoft/phi-1_5,0.02,0.0190894088745117,52.267,378590.349,3023.634,pytorch,bfloat16,Unquantized,Eager,No Kernel,PhiForCausalLM,1.224,47.69,1
210
+ float16-eager,microsoft/phi-1_5,0.02,0.0188282871246337,52.677,402076.824,3023.634,pytorch,float16,Unquantized,Eager,No Kernel,PhiForCausalLM,1.213,47.69,1
211
+ float16-fa2,microsoft/phi-1_5,0.018,0.0177858562469482,55.95,438770.278,3022.613,pytorch,float16,Unquantized,FAv2,No Kernel,PhiForCausalLM,1.142,47.69,1
212
+ bfloat16-fa2,microsoft/phi-1_5,0.018,0.0170956802368164,57.77,450439.226,3022.613,pytorch,bfloat16,Unquantized,FAv2,No Kernel,PhiForCausalLM,1.103,47.69,1
213
+ float16-sdpa,microsoft/phi-1_5,0.018,0.0170618877410888,58.596,470189.603,3022.613,pytorch,float16,Unquantized,SDPA,No Kernel,PhiForCausalLM,1.092,47.69,1
214
+ bfloat16-sdpa,microsoft/phi-1_5,0.017,0.0164577274322509,59.988,455983.243,3022.613,pytorch,bfloat16,Unquantized,SDPA,No Kernel,PhiForCausalLM,1.062,47.69,1
215
+ float32-sdpa,microsoft/phi-1_5,0.056,0.0159713277816772,62.342,450982.812,5949.832,pytorch,float32,Unquantized,SDPA,No Kernel,PhiForCausalLM,1.059,47.69,1
216
+ 8bit-bnb-eager,Qwen/Qwen1.5-1.8B,0.081,0.0797747192382812,12.476,111433.433,3158.448,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,5.13,46.55,1
217
+ 8bit-bnb-fa2,Qwen/Qwen1.5-1.8B,0.08,0.0789882888793945,12.631,113806.724,3158.448,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,5.073,46.55,1
218
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-1.8B,0.078,0.0767518692016601,12.941,114968.89,3158.448,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,4.943,46.55,1
219
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-1.8B,0.05,0.0484075508117675,20.584,143671.29,2628.77,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,3.103,46.55,1
220
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-1.8B,0.051,0.048449535369873,20.594,144025.889,2628.769,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,3.104,46.55,1
221
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-1.8B,0.051,0.0480430068969726,20.76,145713.086,2628.769,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,3.074,46.55,1
222
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-1.8B,0.049,0.0479569931030273,20.776,146534.596,2628.77,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,3.074,46.55,1
223
+ 4bit-bnb-eager,Qwen/Qwen1.5-1.8B,0.064,0.0466513938903808,21.322,184823.08,2585.787,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,3.01,46.55,1
224
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-1.8B,0.049,0.0467271690368652,21.353,145872.178,2628.769,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,2.992,46.55,1
225
+ 4bit-bnb-fa2,Qwen/Qwen1.5-1.8B,0.073,0.0465735664367675,21.391,187922.909,2585.787,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,3.008,46.55,1
226
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-1.8B,0.048,0.0465264625549316,21.416,147962.283,2628.769,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,2.984,46.55,1
227
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-1.8B,0.062,0.0441620483398437,22.452,196584.268,2585.787,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,2.862,46.55,1
228
+ bfloat16-fa2,Qwen/Qwen1.5-1.8B,0.022,0.0207626247406005,47.657,376911.509,4408.408,pytorch,bfloat16,Unquantized,FAv2,No Kernel,Qwen2ForCausalLM,1.335,46.55,1
229
+ float16-fa2,Qwen/Qwen1.5-1.8B,0.021,0.0204472312927246,48.812,383317.645,4408.408,pytorch,float16,Unquantized,FAv2,No Kernel,Qwen2ForCausalLM,1.31,46.55,1
230
+ bfloat16-eager,Qwen/Qwen1.5-1.8B,0.022,0.0204482555389404,48.834,384089.46,4408.408,pytorch,bfloat16,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.312,46.55,1
231
+ float16-eager,Qwen/Qwen1.5-1.8B,0.022,0.020242431640625,49.096,380746.463,4408.408,pytorch,float16,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.299,46.55,1
232
+ float32-eager,Qwen/Qwen1.5-1.8B,0.06,0.0188979206085205,52.458,402115.515,8597.293,pytorch,float32,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.254,46.55,1
233
+ float16-sdpa,Qwen/Qwen1.5-1.8B,0.02,0.0187688961029052,52.789,414137.925,4408.408,pytorch,float16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.209,46.55,1
234
+ bfloat16-sdpa,Qwen/Qwen1.5-1.8B,0.019,0.0185108470916748,53.864,426916.746,4408.408,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.188,46.55,1
235
+ float32-sdpa,Qwen/Qwen1.5-1.8B,0.058,0.01723801612854,57.76,432794.785,8597.293,pytorch,float32,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.147,46.55,1
236
+ 4bit-bnb-eager,facebook/opt-66b,2.647,0.1709598693847656,5.837,40640.181,37434.811,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,13.443,42.78,66
237
+ 8bit-bnb-eager,facebook/opt-66b,0.173,0.1674977264404296,5.937,43032.007,68003.561,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,10.795,42.78,66
238
+ 4bit-bnb-fa2,facebook/opt-66b,2.647,0.1575536651611328,6.33,43499.097,37434.68,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,12.585,42.78,66
239
+ 8bit-bnb-eager,Salesforce/codegen-16B-nl,0.086,0.085394432067871,11.686,94229.855,17381.431,pytorch,float16,BnB.8bit,Eager,No Kernel,CodeGenForCausalLM,5.467,42.59,16
240
+ float32-eager,Salesforce/codegen-16B-nl,0.576,0.0487710723876953,20.452,128257.079,65363.832,pytorch,float32,Unquantized,Eager,No Kernel,CodeGenForCausalLM,3.649,42.59,16
241
+ bfloat16-eager,Salesforce/codegen-16B-nl,0.064,0.0363489265441894,27.48,175303.81,32792.184,pytorch,bfloat16,Unquantized,Eager,No Kernel,CodeGenForCausalLM,2.36,42.59,16
242
+ float16-eager,Salesforce/codegen-16B-nl,0.063,0.0355614738464355,28.047,178300.329,32792.184,pytorch,float16,Unquantized,Eager,No Kernel,CodeGenForCausalLM,2.306,42.59,16
243
+ 8bit-bnb-eager,facebook/opt-30b,0.127,0.1246095352172851,7.971,64435.961,31446.286,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,8.002,41.99,30
244
+ 8bit-bnb-fa2,facebook/opt-30b,0.117,0.1175275497436523,8.492,67604.764,31450.479,pytorch,float16,BnB.8bit,FAv2,No Kernel,OPTForCausalLM,7.521,41.99,30
245
+ 4bit-bnb-eager,facebook/opt-30b,1.321,0.0912087020874023,10.938,78225.591,17680.925,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,7.075,41.99,30
246
+ 4bit-bnb-fa2,facebook/opt-30b,1.307,0.0774225921630859,12.891,90273.361,17680.794,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,6.191,41.99,30
247
+ float16-eager,facebook/opt-30b,0.12,0.048503807067871,20.586,128680.81,60836.515,pytorch,float16,Unquantized,Eager,No Kernel,OPTForCausalLM,3.179,41.99,30
248
+ bfloat16-eager,facebook/opt-30b,0.11,0.04767232131958,20.935,130625.548,60836.515,pytorch,bfloat16,Unquantized,Eager,No Kernel,OPTForCausalLM,3.116,41.99,30
249
+ float16-fa2,facebook/opt-30b,0.114,0.0442951698303222,22.529,140361.802,60837.496,pytorch,float16,Unquantized,FAv2,No Kernel,OPTForCausalLM,2.909,41.99,30
250
+ bfloat16-fa2,facebook/opt-30b,0.103,0.0439941101074218,22.715,141619.806,60837.496,pytorch,bfloat16,Unquantized,FAv2,No Kernel,OPTForCausalLM,2.876,41.99,30
251
+ 4bit-gptq-exllama-v2-eager,EleutherAI/gpt-neox-20b,0.479,0.4443187255859375,2.25,14044.206,13715.589,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoXForCausalLM,28.483,41.69,20
252
+ 4bit-gptq-exllama-v1-eager,EleutherAI/gpt-neox-20b,0.478,0.4436869201660156,2.253,14084.383,13715.589,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoXForCausalLM,28.438,41.69,20
253
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/gpt-neox-20b,0.469,0.437781494140625,2.284,14277.95,13715.588,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoXForCausalLM,28.05,41.69,20
254
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/gpt-neox-20b,0.468,0.4377108459472656,2.285,14296.128,13715.588,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoXForCausalLM,28.048,41.69,20
255
+ 8bit-bnb-eager,EleutherAI/gpt-neox-20b,0.122,0.1186959381103515,8.298,69928.382,22536.283,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoXForCausalLM,7.672,41.69,20
256
+ 8bit-bnb-fa2,EleutherAI/gpt-neox-20b,0.101,0.1027123184204101,9.496,78557.68,22540.222,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoXForCausalLM,6.733,41.69,20
257
+ 4bit-bnb-eager,EleutherAI/gpt-neox-20b,0.82,0.0790169601440429,12.42,94346.195,13411.544,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoXForCausalLM,5.881,41.69,20
258
+ 4bit-bnb-fa2,EleutherAI/gpt-neox-20b,0.805,0.0695562210083007,14.07,103611.797,13411.544,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoXForCausalLM,5.292,41.69,20
259
+ float32-eager,EleutherAI/gpt-neox-20b,0.755,0.0624803848266601,15.975,100223.669,84145.78,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,4.692,41.69,20
260
+ float16-eager,EleutherAI/gpt-neox-20b,0.086,0.0409323501586914,24.356,153164.052,42460.724,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,2.667,41.69,20
261
+ bfloat16-eager,EleutherAI/gpt-neox-20b,0.085,0.0407807998657226,24.449,153893.023,42460.724,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,2.658,41.69,20
262
+ float16-fa2,EleutherAI/gpt-neox-20b,0.079,0.0363407363891601,27.409,171626.76,42461.861,pytorch,float16,Unquantized,FAv2,No Kernel,GPTNeoXForCausalLM,2.374,41.69,20
263
+ bfloat16-fa2,EleutherAI/gpt-neox-20b,0.076,0.0360693778991699,27.643,173550.304,42461.861,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTNeoXForCausalLM,2.352,41.69,20
264
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/gpt-j-6b,0.154,0.1440143432617187,6.939,43376.583,4531.242,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTJForCausalLM,9.23,40.1,6
265
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/gpt-j-6b,0.155,0.1437634582519531,6.946,43391.488,4531.684,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTJForCausalLM,9.224,40.1,6
266
+ 4bit-gptq-exllama-v1-eager,EleutherAI/gpt-j-6b,0.149,0.1367132110595703,7.305,45621.944,4531.243,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTJForCausalLM,8.768,40.1,6
267
+ 4bit-gptq-exllama-v2-eager,EleutherAI/gpt-j-6b,0.149,0.1364398040771484,7.32,45648.538,4531.243,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTJForCausalLM,8.75,40.1,6
268
+ 8bit-bnb-fa2,EleutherAI/gpt-j-6b,0.099,0.0991825942993164,9.947,85196.754,6915.153,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTJForCausalLM,6.422,40.1,6
269
+ 8bit-bnb-eager,EleutherAI/gpt-j-6b,0.099,0.09643212890625,10.218,97333.755,6910.556,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTJForCausalLM,6.33,40.1,6
270
+ 4bit-bnb-fa2,EleutherAI/gpt-j-6b,0.247,0.0656670684814453,15.046,122819.256,4430.536,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTJForCausalLM,4.42,40.1,6
271
+ 4bit-bnb-eager,EleutherAI/gpt-j-6b,0.243,0.0626647033691406,15.756,132340.74,4430.536,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTJForCausalLM,4.18,40.1,6
272
+ bfloat16-fa2,EleutherAI/gpt-j-6b,0.039,0.0384942092895507,26.187,198075.458,12548.118,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTJForCausalLM,2.455,40.1,6
273
+ float16-fa2,EleutherAI/gpt-j-6b,0.038,0.0371865615844726,26.834,198370.271,12548.118,pytorch,float16,Unquantized,FAv2,No Kernel,GPTJForCausalLM,2.377,40.1,6
274
+ float16-eager,EleutherAI/gpt-j-6b,0.033,0.0316887035369873,31.36,244059.2,12543.514,pytorch,float16,Unquantized,Eager,No Kernel,GPTJForCausalLM,2.041,40.1,6
275
+ bfloat16-eager,EleutherAI/gpt-j-6b,0.033,0.0299622402191162,33.289,253350.114,12543.514,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTJForCausalLM,1.925,40.1,6
276
+ float32-eager,EleutherAI/gpt-j-6b,0.218,0.028654592514038,34.538,221958.982,24932.502,pytorch,float32,Unquantized,Eager,No Kernel,GPTJForCausalLM,2.042,40.1,6
277
+ 8bit-bnb-eager,facebook/opt-13b,0.107,0.1061560287475586,8.819,71610.738,13822.812,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,6.823,40.06,13
278
+ 8bit-bnb-fa2,facebook/opt-13b,0.099,0.0993361892700195,10.049,82083.427,13833.288,pytorch,float16,BnB.8bit,FAv2,No Kernel,OPTForCausalLM,6.355,40.06,13
279
+ 4bit-bnb-eager,facebook/opt-13b,0.507,0.0630169601440429,15.494,124413.509,7922.799,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,4.493,40.06,13
280
+ 4bit-bnb-fa2,facebook/opt-13b,0.498,0.0582451210021972,16.996,132053.433,7922.668,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,4.197,40.06,13
281
+ float32-eager,facebook/opt-13b,0.449,0.0397056007385253,25.116,157631.732,52468.032,pytorch,float32,Unquantized,Eager,No Kernel,OPTForCausalLM,2.953,40.06,13
282
+ float16-eager,facebook/opt-13b,0.048,0.026040319442749,38.242,238581.578,26239.663,pytorch,float16,Unquantized,Eager,No Kernel,OPTForCausalLM,1.691,40.06,13
283
+ bfloat16-eager,facebook/opt-13b,0.047,0.0253767681121826,39.2,244893.072,26239.663,pytorch,bfloat16,Unquantized,Eager,No Kernel,OPTForCausalLM,1.647,40.06,13
284
+ float16-fa2,facebook/opt-13b,0.043,0.0213923835754394,46.507,288914.942,26238.909,pytorch,float16,Unquantized,FAv2,No Kernel,OPTForCausalLM,1.395,40.06,13
285
+ bfloat16-fa2,facebook/opt-13b,0.042,0.0213329925537109,46.743,290583.447,26238.909,pytorch,bfloat16,Unquantized,FAv2,No Kernel,OPTForCausalLM,1.389,40.06,13
286
+ 4bit-bnb-eager,Salesforce/codegen-6B-nl,0.288,0.056576000213623,17.621,140262.324,5007.212,pytorch,float16,BnB.4bit,Eager,No Kernel,CodeGenForCausalLM,3.862,40.0,6
287
+ float16-eager,Salesforce/codegen-6B-nl,0.038,0.0347740173339843,28.681,209899.419,14645.241,pytorch,float16,Unquantized,Eager,No Kernel,CodeGenForCausalLM,2.244,40.0,6
288
+ bfloat16-eager,Salesforce/codegen-6B-nl,0.037,0.0343040008544921,29.055,214394.519,14645.241,pytorch,bfloat16,Unquantized,Eager,No Kernel,CodeGenForCausalLM,2.203,40.0,6
289
+ float32-eager,Salesforce/codegen-6B-nl,0.259,0.0322273292541503,30.885,196412.307,29113.257,pytorch,float32,Unquantized,Eager,No Kernel,CodeGenForCausalLM,2.298,40.0,6
290
+ 8bit-bnb-eager,facebook/opt-6.7b,0.086,0.0839935989379882,10.948,100489.959,7223.648,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,5.439,39.08,6
291
+ 8bit-bnb-fa2,facebook/opt-6.7b,0.082,0.0793855972290039,11.169,104262.084,7223.73,pytorch,float16,BnB.8bit,FAv2,No Kernel,OPTForCausalLM,5.084,39.08,6
292
+ 4bit-bnb-eager,facebook/opt-6.7b,0.288,0.0505968627929687,19.8,159028.281,4334.81,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,3.487,39.08,6
293
+ 4bit-bnb-fa2,facebook/opt-6.7b,0.279,0.046334976196289,21.382,168441.405,4334.679,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,3.215,39.08,6
294
+ float32-eager,facebook/opt-6.7b,0.257,0.0233758716583251,42.745,267623.607,27312.573,pytorch,float32,Unquantized,Eager,No Kernel,OPTForCausalLM,1.73,39.08,6
295
+ float16-eager,facebook/opt-6.7b,0.03,0.0173230075836181,57.528,367011.15,13661.26,pytorch,float16,Unquantized,Eager,No Kernel,OPTForCausalLM,1.124,39.08,6
296
+ bfloat16-eager,facebook/opt-6.7b,0.029,0.0168355846405029,58.993,379198.767,13661.26,pytorch,bfloat16,Unquantized,Eager,No Kernel,OPTForCausalLM,1.095,39.08,6
297
+ float16-fa2,facebook/opt-6.7b,0.026,0.0138690557479858,72.47,463073.715,13661.255,pytorch,float16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.903,39.08,6
298
+ bfloat16-fa2,facebook/opt-6.7b,0.025,0.0133150720596313,74.635,470586.38,13661.255,pytorch,bfloat16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.866,39.08,6
299
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-12b,0.277,0.2572042236328125,3.887,24331.385,8459.203,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoXForCausalLM,16.485,38.82,12
300
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-12b,0.276,0.2561628112792968,3.9,24371.271,8459.203,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoXForCausalLM,16.43,38.82,12
301
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-12b,0.269,0.251293701171875,3.98,24888.757,8459.212,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoXForCausalLM,16.099,38.82,12
302
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-12b,0.268,0.2509199371337891,3.987,24897.149,8459.212,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoXForCausalLM,16.073,38.82,12
303
+ 8bit-bnb-eager,EleutherAI/pythia-12b,0.098,0.0914063339233398,10.762,89600.155,13413.403,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoXForCausalLM,5.945,38.82,12
304
+ 8bit-bnb-fa2,EleutherAI/pythia-12b,0.09,0.0880455703735351,11.187,97378.632,13415.798,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoXForCausalLM,5.702,38.82,12
305
+ 4bit-bnb-eager,EleutherAI/pythia-12b,0.46,0.0602019844055175,16.443,130730.615,8235.73,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoXForCausalLM,4.264,38.82,12
306
+ 4bit-bnb-fa2,EleutherAI/pythia-12b,0.443,0.0583598098754882,16.87,133771.859,8236.778,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoXForCausalLM,4.18,38.82,12
307
+ float32-eager,EleutherAI/pythia-12b,0.414,0.0398981132507324,24.858,157882.54,48751.627,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,2.947,38.82,12
308
+ float16-eager,EleutherAI/pythia-12b,0.05,0.0286873607635498,34.549,196233.439,24655.994,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.863,38.82,12
309
+ bfloat16-eager,EleutherAI/pythia-12b,0.051,0.0285890560150146,34.83,198155.967,24655.994,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.855,38.82,12
310
+ 8bit-bnb-eager,Qwen/Qwen1.5-0.5B,0.08,0.0787793884277343,12.605,114219.704,1096.74,pytorch,float16,BnB.8bit,Eager,No Kernel,Qwen2ForCausalLM,5.081,38.62,0
311
+ 8bit-bnb-fa2,Qwen/Qwen1.5-0.5B,0.079,0.0788326416015625,12.617,114250.058,1096.74,pytorch,float16,BnB.8bit,FAv2,No Kernel,Qwen2ForCausalLM,5.072,38.62,0
312
+ 8bit-bnb-sdpa,Qwen/Qwen1.5-0.5B,0.078,0.0770662384033203,12.909,115418.922,1096.74,pytorch,float16,BnB.8bit,SDPA,No Kernel,Qwen2ForCausalLM,4.96,38.62,0
313
+ 4bit-bnb-eager,Qwen/Qwen1.5-0.5B,0.061,0.0485150718688964,20.232,183674.306,943.535,pytorch,float16,BnB.4bit,Eager,No Kernel,Qwen2ForCausalLM,3.137,38.62,0
314
+ 4bit-gptq-exllama-v2-fa2,Qwen/Qwen1.5-0.5B,0.05,0.0493352966308593,20.483,176558.827,943.923,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Qwen2ForCausalLM,3.111,38.62,0
315
+ 4bit-bnb-fa2,Qwen/Qwen1.5-0.5B,0.057,0.0466257934570312,20.996,196043.867,943.535,pytorch,float16,BnB.4bit,FAv2,No Kernel,Qwen2ForCausalLM,2.992,38.62,0
316
+ 4bit-gptq-exllama-v1-sdpa,Qwen/Qwen1.5-0.5B,0.048,0.0474982414245605,21.121,181895.806,943.923,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV1,Qwen2ForCausalLM,3.037,38.62,0
317
+ 4bit-gptq-exllama-v1-fa2,Qwen/Qwen1.5-0.5B,0.048,0.0471377906799316,21.164,177193.053,943.923,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Qwen2ForCausalLM,3.016,38.62,0
318
+ 4bit-gptq-exllama-v1-eager,Qwen/Qwen1.5-0.5B,0.048,0.0470497283935546,21.184,177138.897,943.923,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Qwen2ForCausalLM,3.017,38.62,0
319
+ 4bit-gptq-exllama-v2-eager,Qwen/Qwen1.5-0.5B,0.048,0.0469975051879882,21.241,176911.57,943.923,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Qwen2ForCausalLM,3.006,38.62,0
320
+ 4bit-gptq-exllama-v2-sdpa,Qwen/Qwen1.5-0.5B,0.046,0.0457164802551269,21.693,183335.303,943.923,pytorch,float16,GPTQ.4bit,SDPA,GPTQ.ExllamaV2,Qwen2ForCausalLM,2.93,38.62,0
321
+ 4bit-bnb-sdpa,Qwen/Qwen1.5-0.5B,0.056,0.0447283210754394,22.236,200195.425,943.535,pytorch,float16,BnB.4bit,SDPA,No Kernel,Qwen2ForCausalLM,2.892,38.62,0
322
+ bfloat16-eager,Qwen/Qwen1.5-0.5B,0.021,0.0204687366485595,46.827,409237.923,1426.272,pytorch,bfloat16,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.313,38.62,0
323
+ bfloat16-fa2,Qwen/Qwen1.5-0.5B,0.021,0.0206673927307128,48.387,410697.889,1426.272,pytorch,bfloat16,Unquantized,FAv2,No Kernel,Qwen2ForCausalLM,1.325,38.62,0
324
+ float16-fa2,Qwen/Qwen1.5-0.5B,0.021,0.0202301445007324,48.979,408205.709,1426.272,pytorch,float16,Unquantized,FAv2,No Kernel,Qwen2ForCausalLM,1.297,38.62,0
325
+ float16-eager,Qwen/Qwen1.5-0.5B,0.021,0.0199096317291259,49.469,416151.969,1426.272,pytorch,float16,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.276,38.62,0
326
+ float32-eager,Qwen/Qwen1.5-0.5B,0.026,0.0194457607269287,51.953,445261.413,2600.839,pytorch,float32,Unquantized,Eager,No Kernel,Qwen2ForCausalLM,1.251,38.62,0
327
+ float16-sdpa,Qwen/Qwen1.5-0.5B,0.019,0.0184360961914062,53.969,444305.757,1426.272,pytorch,float16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.181,38.62,0
328
+ bfloat16-sdpa,Qwen/Qwen1.5-0.5B,0.018,0.0182169609069824,54.749,452141.343,1426.272,pytorch,bfloat16,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.167,38.62,0
329
+ float32-sdpa,Qwen/Qwen1.5-0.5B,0.024,0.0169492473602294,58.642,474112.7,2600.839,pytorch,float32,Unquantized,SDPA,No Kernel,Qwen2ForCausalLM,1.092,38.62,0
330
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-6.7b,0.164,0.1514475555419921,6.599,41202.814,5239.773,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Unknown,9.704,38.06,6
331
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-6.7b,0.163,0.1503180847167968,6.64,41349.32,5239.773,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Unknown,9.642,38.06,6
332
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-6.7b,0.157,0.1464412231445312,6.835,42738.233,5239.772,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Unknown,9.376,38.06,6
333
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-6.7b,0.157,0.1463849029541015,6.838,42772.663,5239.772,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Unknown,9.372,38.06,6
334
+ 8bit-bnb-eager,EleutherAI/pythia-6.7b,0.073,0.0713328628540039,13.956,118077.572,8000.245,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,4.584,38.06,6
335
+ 8bit-bnb-fa2,EleutherAI/pythia-6.7b,0.068,0.0687493133544921,14.493,125061.836,8002.259,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,4.413,38.06,6
336
+ 4bit-bnb-eager,EleutherAI/pythia-6.7b,0.28,0.0504422416687011,19.438,160030.209,5084.626,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,3.493,38.06,6
337
+ 4bit-bnb-fa2,EleutherAI/pythia-6.7b,0.269,0.0449669113159179,22.172,178628.43,5084.625,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,3.106,38.06,6
338
+ 8bit-bnb-eager,EleutherAI/pythia-2.7b,0.082,0.0814673919677734,12.123,109438.11,3631.826,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,5.259,37.09,2
339
+ 8bit-bnb-fa2,EleutherAI/pythia-2.7b,0.074,0.073444351196289,13.383,117673.854,3632.818,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,4.743,37.09,2
340
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-2.7b,0.079,0.0713359375,13.983,88336.712,2494.102,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Unknown,4.577,37.09,2
341
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-2.7b,0.078,0.0711086044311523,14.034,88575.102,2494.102,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Unknown,4.561,37.09,2
342
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-2.7b,0.07,0.0636241912841796,15.69,98174.172,2494.1,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Unknown,4.085,37.09,2
343
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-2.7b,0.07,0.0634490890502929,15.714,98487.865,2494.1,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Unknown,4.076,37.09,2
344
+ 4bit-bnb-eager,EleutherAI/pythia-2.7b,0.125,0.0529807357788085,18.687,157320.029,2358.103,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,3.507,37.09,2
345
+ 4bit-bnb-fa2,EleutherAI/pythia-2.7b,0.114,0.0483450889587402,20.16,174315.707,2358.103,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,3.218,37.09,2
346
+ 8bit-bnb-fa2,facebook/opt-2.7b,0.079,0.0791275482177734,10.881,63573.38,3080.719,pytorch,float16,BnB.8bit,FAv2,No Kernel,OPTForCausalLM,5.062,36.74,2
347
+ 8bit-bnb-eager,facebook/opt-2.7b,0.085,0.0831027221679687,11.997,102075.651,3079.772,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,5.338,36.74,2
348
+ 4bit-bnb-eager,facebook/opt-2.7b,0.122,0.0488171501159667,20.457,170561.982,1840.677,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,3.203,36.74,2
349
+ 4bit-bnb-fa2,facebook/opt-2.7b,0.114,0.0462039031982421,21.547,154257.192,1840.546,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,3.029,36.74,2
350
+ float16-eager,facebook/opt-2.7b,0.019,0.0166922245025634,57.574,314407.304,5540.556,pytorch,float16,Unquantized,Eager,No Kernel,OPTForCausalLM,1.088,36.74,2
351
+ bfloat16-eager,facebook/opt-2.7b,0.019,0.0163471355438232,61.368,451310.36,5540.556,pytorch,bfloat16,Unquantized,Eager,No Kernel,OPTForCausalLM,1.052,36.74,2
352
+ float32-eager,facebook/opt-2.7b,0.104,0.0163532791137695,61.369,419443.59,11168.211,pytorch,float32,Unquantized,Eager,No Kernel,OPTForCausalLM,1.131,36.74,2
353
+ float16-fa2,facebook/opt-2.7b,0.014,0.0134553604125976,74.002,526915.438,5540.548,pytorch,float16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.864,36.74,2
354
+ bfloat16-fa2,facebook/opt-2.7b,0.014,0.0131655683517456,75.814,526560.825,5540.548,pytorch,bfloat16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.844,36.74,2
355
+ 4bit-bnb-eager,facebook/xglm-7.5B,0.289,0.0512839698791503,19.234,147550.761,6018.104,pytorch,float16,BnB.4bit,Eager,No Kernel,XGLMForCausalLM,3.531,36.38,7
356
+ float32-eager,facebook/xglm-7.5B,0.283,0.0253132801055908,39.342,241439.149,30815.491,pytorch,float32,Unquantized,Eager,No Kernel,XGLMForCausalLM,1.88,36.38,7
357
+ float16-eager,facebook/xglm-7.5B,0.033,0.0182353916168212,54.67,346520.776,15412.54,pytorch,float16,Unquantized,Eager,No Kernel,XGLMForCausalLM,1.185,36.38,7
358
+ bfloat16-eager,facebook/xglm-7.5B,0.032,0.0177520637512207,55.846,351184.551,15412.54,pytorch,bfloat16,Unquantized,Eager,No Kernel,XGLMForCausalLM,1.159,36.38,7
359
+ 8bit-bnb-fa2,EleutherAI/gpt-neo-2.7B,0.095,0.0941107177734375,10.445,92094.012,3211.978,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoForCausalLM,6.123,36.2,2
360
+ 8bit-bnb-eager,EleutherAI/gpt-neo-2.7B,0.093,0.0917739486694336,10.858,94364.895,3216.625,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoForCausalLM,5.891,36.2,2
361
+ 4bit-gptq-exllama-v1-eager,EleutherAI/gpt-neo-2.7B,0.08,0.0716933135986328,13.899,88974.602,2079.903,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoForCausalLM,4.603,36.2,2
362
+ 4bit-gptq-exllama-v2-eager,EleutherAI/gpt-neo-2.7B,0.079,0.071201789855957,14.009,88628.153,2079.903,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoForCausalLM,4.568,36.2,2
363
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/gpt-neo-2.7B,0.07,0.0631685104370117,15.815,98451.236,2079.897,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoForCausalLM,4.051,36.2,2
364
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/gpt-neo-2.7B,0.07,0.0629032974243164,15.881,98943.3,2079.897,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoForCausalLM,4.034,36.2,2
365
+ 4bit-bnb-eager,EleutherAI/gpt-neo-2.7B,0.122,0.0542955513000488,18.227,156511.963,1986.218,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoForCausalLM,3.557,36.2,2
366
+ 4bit-bnb-fa2,EleutherAI/gpt-neo-2.7B,0.112,0.0513587188720703,19.02,159217.192,1986.087,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoForCausalLM,3.44,36.2,2
367
+ bfloat16-eager,EleutherAI/gpt-neo-2.7B,0.022,0.0210319366455078,47.132,355023.389,5677.722,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,1.351,36.2,2
368
+ float16-eager,EleutherAI/gpt-neo-2.7B,0.022,0.020853759765625,47.851,360554.821,5677.722,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,1.335,36.2,2
369
+ float32-eager,EleutherAI/gpt-neo-2.7B,0.103,0.0190218238830566,52.437,359668.741,11304.033,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,1.301,36.2,2
370
+ float16-fa2,EleutherAI/gpt-neo-2.7B,0.017,0.0169615364074707,58.707,429652.396,5675.077,pytorch,float16,Unquantized,FAv2,No Kernel,GPTNeoForCausalLM,1.089,36.2,2
371
+ bfloat16-fa2,EleutherAI/gpt-neo-2.7B,0.017,0.0167864322662353,58.838,441408.61,5675.077,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTNeoForCausalLM,1.075,36.2,2
372
+ bfloat16-eager,microsoft/rho-math-1b-v0.1,0.024,0.0233349113464355,42.896,352872.568,2279.027,pytorch,bfloat16,Unquantized,Eager,No Kernel,LlamaForCausalLM,1.495,34.99,1
373
+ float16-eager,microsoft/rho-math-1b-v0.1,0.024,0.0220405769348144,45.158,355023.905,2279.42,pytorch,float16,Unquantized,Eager,No Kernel,LlamaForCausalLM,1.413,34.99,1
374
+ float32-eager,microsoft/rho-math-1b-v0.1,0.044,0.0200570888519287,49.071,366733.618,4492.921,pytorch,float32,Unquantized,Eager,No Kernel,LlamaForCausalLM,1.308,34.99,1
375
+ float32-sdpa,microsoft/rho-math-1b-v0.1,0.042,0.0181954555511474,54.752,408059.01,4492.869,pytorch,float32,Unquantized,SDPA,No Kernel,LlamaForCausalLM,1.19,34.99,1
376
+ 8bit-bnb-eager,EleutherAI/pythia-1.4b,0.061,0.0587837448120117,16.777,146210.57,2004.071,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoXForCausalLM,3.79,34.75,1
377
+ 8bit-bnb-fa2,EleutherAI/pythia-1.4b,0.055,0.055150592803955,17.844,154626.702,1999.766,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoXForCausalLM,3.59,34.75,1
378
+ 4bit-bnb-eager,EleutherAI/pythia-1.4b,0.067,0.0400046081542968,24.431,215851.465,1406.95,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoXForCausalLM,2.655,34.75,1
379
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-1.4b,0.045,0.0405729293823242,24.598,161068.566,1491.577,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoXForCausalLM,2.601,34.75,1
380
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-1.4b,0.044,0.0403845138549804,24.708,163614.962,1491.577,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoXForCausalLM,2.589,34.75,1
381
+ 4bit-bnb-fa2,EleutherAI/pythia-1.4b,0.058,0.0371742706298828,26.269,230222.307,1406.95,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoXForCausalLM,2.461,34.75,1
382
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-1.4b,0.037,0.033570816040039,29.737,188311.256,1491.576,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoXForCausalLM,2.153,34.75,1
383
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-1.4b,0.037,0.03340185546875,29.916,188381.362,1491.576,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoXForCausalLM,2.142,34.75,1
384
+ bfloat16-eager,EleutherAI/pythia-1.4b,0.022,0.02065305519104,46.501,420375.655,3188.153,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.372,34.75,1
385
+ float16-eager,EleutherAI/pythia-1.4b,0.02,0.0190382080078125,52.361,401349.858,3188.153,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.22,34.75,1
386
+ bfloat16-fa2,EleutherAI/pythia-1.4b,0.018,0.0176005115509033,54.601,479466.127,3189.192,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTNeoXForCausalLM,1.166,34.75,1
387
+ float32-eager,EleutherAI/pythia-1.4b,0.053,0.0180305919647216,55.097,427059.397,6138.652,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.195,34.75,1
388
+ float16-fa2,EleutherAI/pythia-1.4b,0.017,0.0165365753173828,59.578,441552.856,3189.192,pytorch,float16,Unquantized,FAv2,No Kernel,GPTNeoXForCausalLM,1.064,34.75,1
389
+ 8bit-bnb-eager,EleutherAI/pythia-1.3b,0.058,0.0571002883911132,17.3,153607.944,2004.071,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,3.668,34.46,1
390
+ 8bit-bnb-fa2,EleutherAI/pythia-1.3b,0.056,0.0556001281738281,17.781,160446.358,1999.766,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,3.615,34.46,1
391
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-1.3b,0.045,0.0406988792419433,24.471,162898.334,1491.577,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Unknown,2.619,34.46,1
392
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-1.3b,0.045,0.0406333427429199,24.504,162882.971,1491.577,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Unknown,2.609,34.46,1
393
+ 4bit-bnb-eager,EleutherAI/pythia-1.3b,0.067,0.0396871681213378,24.768,217022.229,1406.95,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,2.59,34.46,1
394
+ 4bit-bnb-fa2,EleutherAI/pythia-1.3b,0.057,0.0347484169006347,28.298,237695.398,1406.95,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,2.277,34.46,1
395
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-1.3b,0.037,0.0334622726440429,29.676,187372.614,1491.576,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Unknown,2.152,34.46,1
396
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-1.3b,0.037,0.0334131202697753,29.839,187598.604,1491.576,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Unknown,2.144,34.46,1
397
+ bfloat16-eager,EleutherAI/pythia-1.3b,0.019,0.0182722568511962,54.429,438010.527,3188.153,pytorch,bfloat16,Unquantized,Eager,No Kernel,Unknown,1.174,34.46,1
398
+ float16-eager,EleutherAI/pythia-1.3b,0.019,0.0182794246673584,54.462,436015.522,3188.153,pytorch,float16,Unquantized,Eager,No Kernel,Unknown,1.171,34.46,1
399
+ float32-eager,EleutherAI/pythia-1.3b,0.053,0.0171397113800048,58.014,435230.688,6138.652,pytorch,float32,Unquantized,Eager,No Kernel,Unknown,1.133,34.46,1
400
+ bfloat16-fa2,EleutherAI/pythia-1.3b,0.017,0.016449535369873,59.87,481869.771,3189.192,pytorch,bfloat16,Unquantized,FAv2,No Kernel,Unknown,1.069,34.46,1
401
+ float16-fa2,EleutherAI/pythia-1.3b,0.016,0.0157440004348754,63.345,487602.529,3189.192,pytorch,float16,Unquantized,FAv2,No Kernel,Unknown,1.011,34.46,1
402
+ float32-eager,facebook/xglm-4.5B,0.168,0.0249886722564697,39.254,273440.39,18903.143,pytorch,float32,Unquantized,Eager,No Kernel,XGLMForCausalLM,1.763,34.31,5
403
+ float16-eager,facebook/xglm-4.5B,0.03,0.0255989761352539,39.38,294573.831,9490.407,pytorch,float16,Unquantized,Eager,No Kernel,XGLMForCausalLM,1.644,34.31,5
404
+ bfloat16-eager,facebook/xglm-4.5B,0.028,0.0238663673400878,41.593,282201.582,9490.407,pytorch,bfloat16,Unquantized,Eager,No Kernel,XGLMForCausalLM,1.533,34.31,5
405
+ 8bit-bnb-eager,EleutherAI/gpt-neo-1.3B,0.07,0.0691435546875,14.374,124044.621,1668.145,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoForCausalLM,4.455,33.58,1
406
+ 8bit-bnb-fa2,EleutherAI/gpt-neo-1.3B,0.066,0.0653578262329101,15.18,131650.44,1666.097,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoForCausalLM,4.196,33.58,1
407
+ 4bit-gptq-exllama-v2-eager,EleutherAI/gpt-neo-1.3B,0.047,0.0414115829467773,23.973,157244.726,1168.485,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoForCausalLM,2.66,33.58,1
408
+ 4bit-gptq-exllama-v1-eager,EleutherAI/gpt-neo-1.3B,0.047,0.0414136314392089,24.035,161898.336,1168.485,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoForCausalLM,2.662,33.58,1
409
+ 4bit-bnb-eager,EleutherAI/gpt-neo-1.3B,0.067,0.0408975372314453,24.039,206590.25,1117.748,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoForCausalLM,2.674,33.58,1
410
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/gpt-neo-1.3B,0.038,0.0374609909057617,26.4,174804.699,1168.484,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoForCausalLM,2.42,33.58,1
411
+ 4bit-bnb-fa2,EleutherAI/gpt-neo-1.3B,0.057,0.0374231033325195,26.5,227909.248,1117.617,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoForCausalLM,2.42,33.58,1
412
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/gpt-neo-1.3B,0.038,0.0371292152404785,26.834,175035.933,1168.484,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoForCausalLM,2.378,33.58,1
413
+ float16-eager,EleutherAI/gpt-neo-1.3B,0.017,0.0154593276977539,64.567,482897.311,2885.485,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,0.992,33.58,1
414
+ bfloat16-eager,EleutherAI/gpt-neo-1.3B,0.017,0.015388671875,64.702,492113.382,2885.485,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,0.985,33.58,1
415
+ float32-eager,EleutherAI/gpt-neo-1.3B,0.053,0.0148541440963745,67.233,506216.396,5626.042,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,0.986,33.58,1
416
+ float16-fa2,EleutherAI/gpt-neo-1.3B,0.013,0.0125967359542846,78.565,607014.857,2884.409,pytorch,float16,Unquantized,FAv2,No Kernel,GPTNeoForCausalLM,0.808,33.58,1
417
+ bfloat16-fa2,EleutherAI/gpt-neo-1.3B,0.013,0.0125788164138793,79.193,602191.64,2884.409,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTNeoForCausalLM,0.806,33.58,1
418
+ 4bit-gptq-exllama-v1-eager,EleutherAI/polyglot-ko-12.8b,0.308,0.2859171752929687,3.496,21859.014,8808.924,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoXForCausalLM,18.322,33.33,13
419
+ 4bit-gptq-exllama-v2-eager,EleutherAI/polyglot-ko-12.8b,0.308,0.2855116882324219,3.501,21895.97,8809.118,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoXForCausalLM,18.299,33.33,13
420
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/polyglot-ko-12.8b,0.3,0.279900146484375,3.575,22348.191,8808.933,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoXForCausalLM,17.932,33.33,13
421
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/polyglot-ko-12.8b,0.299,0.2790911865234375,3.581,22362.209,8808.933,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoXForCausalLM,17.886,33.33,13
422
+ 8bit-bnb-fa2,EleutherAI/polyglot-ko-12.8b,0.094,0.0925911026000976,10.652,89780.254,14373.646,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoXForCausalLM,5.998,33.33,13
423
+ 8bit-bnb-eager,EleutherAI/polyglot-ko-12.8b,0.094,0.0915855331420898,10.911,93144.898,14370.872,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoXForCausalLM,5.878,33.33,13
424
+ 4bit-bnb-eager,EleutherAI/polyglot-ko-12.8b,0.505,0.0626718711853027,15.64,120462.207,8556.998,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoXForCausalLM,4.526,33.33,13
425
+ 4bit-bnb-fa2,EleutherAI/polyglot-ko-12.8b,0.491,0.0563199996948242,17.643,131995.089,8556.997,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoXForCausalLM,4.06,33.33,13
426
+ float32-eager,EleutherAI/polyglot-ko-12.8b,0.454,0.0427304954528808,23.352,145631.002,53088.639,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,3.147,33.33,13
427
+ bfloat16-eager,EleutherAI/polyglot-ko-12.8b,0.057,0.0305438728332519,32.533,205417.545,26864.106,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.984,33.33,13
428
+ float16-eager,EleutherAI/polyglot-ko-12.8b,0.056,0.0305868797302246,32.546,203009.187,26864.106,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoXForCausalLM,1.986,33.33,13
429
+ bfloat16-fa2,EleutherAI/polyglot-ko-12.8b,0.05,0.0256614398956298,38.823,243811.333,26866.388,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTNeoXForCausalLM,1.671,33.33,13
430
+ float16-fa2,EleutherAI/polyglot-ko-12.8b,0.049,0.0256194553375244,38.83,241693.597,26866.388,pytorch,float16,Unquantized,FAv2,No Kernel,GPTNeoXForCausalLM,1.668,33.33,13
431
+ 8bit-bnb-eager,EleutherAI/pythia-410m,0.058,0.0563548164367675,17.422,154115.627,771.487,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoXForCausalLM,3.689,31.55,0
432
+ 8bit-bnb-fa2,EleutherAI/pythia-410m,0.053,0.0530503692626953,18.696,166297.453,775.103,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoXForCausalLM,3.428,31.55,0
433
+ 4bit-bnb-eager,EleutherAI/pythia-410m,0.045,0.0379965438842773,25.81,231240.067,622.611,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoXForCausalLM,2.453,31.55,0
434
+ 4bit-bnb-fa2,EleutherAI/pythia-410m,0.042,0.0362158088684082,27.379,247916.895,622.924,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoXForCausalLM,2.345,31.55,0
435
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-410m,0.035,0.0342548484802246,29.117,238539.315,644.02,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoXForCausalLM,2.193,31.55,0
436
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-410m,0.035,0.0340756492614746,29.258,235878.211,644.02,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoXForCausalLM,2.18,31.55,0
437
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-410m,0.032,0.031797248840332,31.358,253074.129,644.019,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoXForCausalLM,2.035,31.55,0
438
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-410m,0.032,0.0313794555664062,31.724,256472.321,644.019,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoXForCausalLM,2.01,31.55,0
439
+ 8bit-bnb-eager,facebook/opt-350m,0.065,0.063287296295166,15.599,136832.94,446.104,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,4.065,30.01,0
440
+ 8bit-bnb-fa2,facebook/opt-350m,0.061,0.0607703056335449,16.393,141474.474,446.104,pytorch,float16,BnB.8bit,FAv2,No Kernel,OPTForCausalLM,3.909,30.01,0
441
+ 4bit-bnb-eager,facebook/opt-350m,0.048,0.0381931533813476,26.358,228702.72,298.885,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,2.433,30.01,0
442
+ 4bit-bnb-fa2,facebook/opt-350m,0.044,0.0354037742614746,28.14,240528.176,298.884,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,2.284,30.01,0
443
+ float16-eager,facebook/opt-350m,0.014,0.0123540477752685,80.154,654730.729,749.22,pytorch,float16,Unquantized,Eager,No Kernel,OPTForCausalLM,0.796,30.01,0
444
+ bfloat16-eager,facebook/opt-350m,0.013,0.0120238075256347,82.913,673691.11,749.22,pytorch,bfloat16,Unquantized,Eager,No Kernel,OPTForCausalLM,0.772,30.01,0
445
+ float32-eager,facebook/opt-350m,0.021,0.0118620157241821,84.195,507037.826,1491.807,pytorch,float32,Unquantized,Eager,No Kernel,OPTForCausalLM,0.77,30.01,0
446
+ bfloat16-fa2,facebook/opt-350m,0.011,0.0102732801437377,95.412,794640.402,749.216,pytorch,bfloat16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.669,30.01,0
447
+ float16-fa2,facebook/opt-350m,0.011,0.0100710401535034,98.814,821606.22,749.216,pytorch,float16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.647,30.01,0
448
+ float16-eager,facebook/xglm-564M,0.018,0.0141875195503234,68.478,624894.821,1324.762,pytorch,float16,Unquantized,Eager,No Kernel,XGLMForCausalLM,0.935,29.55,0
449
+ float32-eager,facebook/xglm-564M,0.028,0.0125404157638549,79.162,559873.28,2642.978,pytorch,float32,Unquantized,Eager,No Kernel,XGLMForCausalLM,0.82,29.55,0
450
+ bfloat16-eager,facebook/xglm-564M,0.014,0.0123084802627563,80.473,667864.13,1324.762,pytorch,bfloat16,Unquantized,Eager,No Kernel,XGLMForCausalLM,0.79,29.55,0
451
+ 8bit-bnb-eager,EleutherAI/gpt-neo-125m,0.037,0.036387840270996,26.831,241510.078,271.428,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoForCausalLM,2.386,29.47,0
452
+ 8bit-bnb-fa2,EleutherAI/gpt-neo-125m,0.034,0.0346511344909667,28.303,251476.298,270.703,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoForCausalLM,2.265,29.47,0
453
+ 4bit-bnb-eager,EleutherAI/gpt-neo-125m,0.027,0.0216524791717529,44.66,398142.414,229.949,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoForCausalLM,1.419,29.47,0
454
+ 4bit-gptq-exllama-v2-eager,EleutherAI/gpt-neo-125m,0.023,0.0218941440582275,46.168,411474.529,242.383,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoForCausalLM,1.407,29.47,0
455
+ 4bit-gptq-exllama-v1-eager,EleutherAI/gpt-neo-125m,0.022,0.0210493431091308,46.909,403596.387,242.383,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoForCausalLM,1.359,29.47,0
456
+ 4bit-bnb-fa2,EleutherAI/gpt-neo-125m,0.025,0.0198512649536132,48.346,422284.363,229.306,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoForCausalLM,1.31,29.47,0
457
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/gpt-neo-125m,0.02,0.0192552967071533,51.475,433415.328,242.382,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoForCausalLM,1.235,29.47,0
458
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/gpt-neo-125m,0.02,0.019095552444458,52.071,424968.084,242.382,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoForCausalLM,1.226,29.47,0
459
+ float16-eager,EleutherAI/gpt-neo-125m,0.009,0.0082959361076354,119.461,1006877.327,363.873,pytorch,float16,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,0.533,29.47,0
460
+ bfloat16-eager,EleutherAI/gpt-neo-125m,0.009,0.008196096420288,121.822,1003006.109,363.873,pytorch,bfloat16,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,0.526,29.47,0
461
+ float32-eager,EleutherAI/gpt-neo-125m,0.01,0.0079923200607299,124.599,1064993.47,657.858,pytorch,float32,Unquantized,Eager,No Kernel,GPTNeoForCausalLM,0.514,29.47,0
462
+ float16-fa2,EleutherAI/gpt-neo-125m,0.007,0.0067799038887023,145.789,1233644.886,363.871,pytorch,float16,Unquantized,FAv2,No Kernel,GPTNeoForCausalLM,0.436,29.47,0
463
+ bfloat16-fa2,EleutherAI/gpt-neo-125m,0.007,0.0066672639846801,149.464,1249611.184,363.871,pytorch,bfloat16,Unquantized,FAv2,No Kernel,GPTNeoForCausalLM,0.427,29.47,0
464
+ 8bit-bnb-eager,facebook/opt-125m,0.032,0.0316375045776367,31.45,271342.806,220.298,pytorch,float16,BnB.8bit,Eager,No Kernel,OPTForCausalLM,2.028,29.15,0
465
+ 8bit-bnb-fa2,facebook/opt-125m,0.03,0.0306268157958984,32.544,284216.679,219.642,pytorch,float16,BnB.8bit,FAv2,No Kernel,OPTForCausalLM,1.962,29.15,0
466
+ 4bit-bnb-eager,facebook/opt-125m,0.025,0.0195266551971435,50.783,444506.087,178.504,pytorch,float16,BnB.4bit,Eager,No Kernel,OPTForCausalLM,1.27,29.15,0
467
+ 4bit-bnb-fa2,facebook/opt-125m,0.023,0.0185784320831298,53.029,481425.024,178.503,pytorch,float16,BnB.4bit,FAv2,No Kernel,OPTForCausalLM,1.199,29.15,0
468
+ bfloat16-fa2,facebook/opt-125m,0.012,0.0114432001113891,87.225,1088969.19,312.411,pytorch,bfloat16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.74,29.15,0
469
+ float16-eager,facebook/opt-125m,0.008,0.0068095998764038,144.454,892300.781,312.416,pytorch,float16,Unquantized,Eager,No Kernel,OPTForCausalLM,0.437,29.15,0
470
+ float32-eager,facebook/opt-125m,0.009,0.006565887928009,151.239,838505.545,601.352,pytorch,float32,Unquantized,Eager,No Kernel,OPTForCausalLM,0.423,29.15,0
471
+ bfloat16-eager,facebook/opt-125m,0.007,0.0065126399993896,153.138,1254978.241,312.416,pytorch,bfloat16,Unquantized,Eager,No Kernel,OPTForCausalLM,0.418,29.15,0
472
+ float16-fa2,facebook/opt-125m,0.006,0.0058081278800964,169.275,1492424.358,312.411,pytorch,float16,Unquantized,FAv2,No Kernel,OPTForCausalLM,0.372,29.15,0
473
+ 8bit-bnb-eager,EleutherAI/pythia-160m,0.031,0.0315013122558593,31.118,269934.929,376.32,pytorch,float16,BnB.8bit,Eager,No Kernel,GPTNeoXForCausalLM,2.031,29.02,0
474
+ 8bit-bnb-fa2,EleutherAI/pythia-160m,0.031,0.0284067840576171,34.413,304622.847,375.329,pytorch,float16,BnB.8bit,FAv2,No Kernel,GPTNeoXForCausalLM,1.842,29.02,0
475
+ 4bit-bnb-eager,EleutherAI/pythia-160m,0.024,0.0205936641693115,47.497,416727.952,327.544,pytorch,float16,BnB.4bit,Eager,No Kernel,GPTNeoXForCausalLM,1.355,29.02,0
476
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-160m,0.02,0.0202199039459228,48.236,414443.1,340.903,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,GPTNeoXForCausalLM,1.299,29.02,0
477
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-160m,0.021,0.0198952960968017,50.093,432007.015,340.903,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,GPTNeoXForCausalLM,1.265,29.02,0
478
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-160m,0.021,0.0189532165527343,51.969,456213.069,340.902,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,GPTNeoXForCausalLM,1.237,29.02,0
479
+ 4bit-bnb-fa2,EleutherAI/pythia-160m,0.022,0.0183429126739501,53.354,469893.169,327.815,pytorch,float16,BnB.4bit,FAv2,No Kernel,GPTNeoXForCausalLM,1.191,29.02,0
480
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-160m,0.017,0.0163215351104736,60.986,506696.229,340.902,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,GPTNeoXForCausalLM,1.046,29.02,0
481
+ 8bit-bnb-fa2,EleutherAI/pythia-70m,0.015,0.0151193599700927,63.768,607650.688,197.947,pytorch,float16,BnB.8bit,FAv2,No Kernel,Unknown,0.999,28.93,0
482
+ 8bit-bnb-eager,EleutherAI/pythia-70m,0.015,0.0149831676483154,65.775,618526.295,197.768,pytorch,float16,BnB.8bit,Eager,No Kernel,Unknown,0.976,28.93,0
483
+ 4bit-bnb-eager,EleutherAI/pythia-70m,0.012,0.0100229120254516,99.119,862794.177,188.553,pytorch,float16,BnB.4bit,Eager,No Kernel,Unknown,0.644,28.93,0
484
+ 4bit-bnb-fa2,EleutherAI/pythia-70m,0.012,0.0097966079711914,99.976,851225.469,188.745,pytorch,float16,BnB.4bit,FAv2,No Kernel,Unknown,0.637,28.93,0
485
+ 4bit-gptq-exllama-v1-eager,EleutherAI/pythia-70m,0.01,0.0094412803649902,104.628,876577.936,193.845,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV1,Unknown,0.609,28.93,0
486
+ 4bit-gptq-exllama-v2-eager,EleutherAI/pythia-70m,0.01,0.0093061122894287,107.004,914642.949,193.845,pytorch,float16,GPTQ.4bit,Eager,GPTQ.ExllamaV2,Unknown,0.595,28.93,0
487
+ 4bit-gptq-exllama-v2-fa2,EleutherAI/pythia-70m,0.009,0.0086067199707031,115.952,991460.772,193.844,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV2,Unknown,0.551,28.93,0
488
+ 4bit-gptq-exllama-v1-fa2,EleutherAI/pythia-70m,0.009,0.0085657596588134,116.298,998412.513,193.844,pytorch,float16,GPTQ.4bit,FAv2,GPTQ.ExllamaV1,Unknown,0.549,28.93,0
489
+ float16-fa2,openai-community/gpt2,0.007,0.0067194881439208,147.371,1212590.352,328.799,pytorch,float16,Unquantized,FAv2,No Kernel,GPT2LMHeadModel,0.432,28.53,0