Spaces:
Runtime error
Runtime error
File size: 1,950 Bytes
b782462 a153b62 b782462 a153b62 e01b14a a3733f0 e01b14a a153b62 05bf560 a153b62 e01b14a a153b62 b782462 e01b14a a153b62 e01b14a a153b62 e01b14a a153b62 b782462 a153b62 7a01f89 a153b62 e01b14a a153b62 b782462 e01b14a a153b62 e01b14a a153b62 b782462 a153b62 e01b14a a153b62 e01b14a a153b62 e01b14a a153b62 b782462 a153b62 b782462 e01b14a b782462 a153b62 b782462 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
[
{
"model": "GPT-4o",
"Average": 70.15,
"MMLU": 70.09,
"ARC":86.31,
"WinoGrande":72.22,
"PIQA":60.34,
"CommonsenseQA":70.28,
"Race":67.87 ,
"MedMCQA":57.85 ,
"OpenkookQA":67.21
},
{
"model": "GPT-4-1106-preview",
"Average": 65.93,
"MMLU": 74.77,
"ARC":82.68,
"WinoGrande": 66.22,
"PIQA": 61.64,
"CommonsenseQA": 62.96,
"Race": 67.05,
"MedMCQA": 51.81,
"OpenkookQA": 60.29
},
{
"model": "Claude-3 Opus",
"Average": 62.53,
"MMLU": 70.23,
"ARC":75.47,
"WinoGrande": 63.54,
"PIQA": 59.05,
"CommonsenseQA": 63.66,
"Race": 66.22,
"MedMCQA": 49.14,
"OpenkookQA": 52.95
},
{
"model": "Mistral Large",
"Average": 60.48,
"MMLU": 68.76,
"ARC":72.32,
"WinoGrande": 56.83,
"PIQA": 61.21,
"CommonsenseQA": 55.35,
"Race": 70.17,
"MedMCQA": 43.44,
"OpenkookQA": 58.66
},
{
"model": "GPT-3.5",
"Average": 60.32,
"MMLU": 65.38,
"ARC":78.24,
"WinoGrande": 64.56,
"PIQA": 54.89,
"CommonsenseQA": 67.89,
"Race": 60.11,
"MedMCQA": 41.42,
"OpenkookQA": 49.90
},
{
"model": "Gemini 1.0 Pro",
"Average": 54.06,
"MMLU": 56.04,
"ARC":72.35,
"WinoGrande": 56.35,
"PIQA": 47.70,
"CommonsenseQA": 50.56,
"Race": 61.02,
"MedMCQA": 35.89,
"OpenkookQA": 52.55
},
{
"model": "Llama3-70b-instruct",
"Average": 52.92,
"MMLU": 59.67,
"ARC":67.09,
"WinoGrande": 57.14,
"PIQA": 43.10,
"CommonsenseQA": 55.49,
"Race": 58.21,
"MedMCQA": 41.67,
"OpenkookQA": 40.94
}
] |