Spaces:
Runtime error
Runtime error
[ | |
{ | |
"model": "Phi-3-mini-128k-instruct (3.8B)", | |
"Average": 40.00, | |
"MMLU": 36.97, | |
"ARC":60.94, | |
"WinoGrande": 46.88, | |
"PIQA": 32.04, | |
"CommonsenseQA": 49.15, | |
"Race": 37.81, | |
"MedMCQA": 22.61, | |
"OpenkookQA": 33.60 | |
}, | |
{ | |
"model": "Qwen1.5 (1.8B)", | |
"Average": 21.68, | |
"MMLU": 9.99, | |
"ARC":15.84 , | |
"WinoGrande": 40.96, | |
"PIQA": 15.52, | |
"CommonsenseQA": 31.13, | |
"Race": 34.91, | |
"MedMCQA": 4.7, | |
"OpenkookQA": 20.37 | |
}, | |
{ | |
"model": "Gemma (2B)", | |
"Average": 16.66, | |
"MMLU": 17.52, | |
"ARC":23.93, | |
"WinoGrande": 16.10, | |
"PIQA": 15.09, | |
"CommonsenseQA": 27.46, | |
"Race": 14.32, | |
"MedMCQA": 4.57, | |
"OpenkookQA": 14.26 | |
}, | |
{ | |
"model": "SlimPajama-DC (1.3B)", | |
"Average": 9.60, | |
"MMLU": 9.22, | |
"ARC":14.95, | |
"WinoGrande": 14.76, | |
"PIQA": 5.32, | |
"CommonsenseQA": 9.01, | |
"Race": 16.19, | |
"MedMCQA": 1.68, | |
"OpenkookQA": 5.70 | |
}, | |
{ | |
"model": "RedPajama (1B)", | |
"Average": 9.00, | |
"MMLU": 9.21, | |
"ARC":13.5, | |
"WinoGrande": 16.97, | |
"PIQA": 0.86, | |
"CommonsenseQA": 11.41, | |
"Race": 14.35, | |
"MedMCQA": 1.86, | |
"OpenkookQA": 3.87 | |
}, | |
{ | |
"model": "OLMo (1.2B)", | |
"Average": 8.85, | |
"MMLU": 8.54, | |
"ARC":13.18, | |
"WinoGrande": 6.16, | |
"PIQA": 8.05, | |
"CommonsenseQA": 13.10, | |
"Race": 13.61, | |
"MedMCQA": 2.07, | |
"OpenkookQA": 6.11 | |
}, | |
{ | |
"model": "Pythia (1.4B)", | |
"Average": 8.79, | |
"MMLU": 9.66, | |
"ARC":14.69, | |
"WinoGrande": 11.52, | |
"PIQA": 4.17, | |
"CommonsenseQA": 9.01, | |
"Race": 12.76, | |
"MedMCQA": 3.19, | |
"OpenkookQA": 5.30 | |
}, | |
{ | |
"model": "TinyLLama (1.1B)", | |
"Average": 8.45, | |
"MMLU": 8.94, | |
"ARC":13.31, | |
"WinoGrande": 12.23, | |
"PIQA": 3.59, | |
"CommonsenseQA": 6.06, | |
"Race": 16.7, | |
"MedMCQA": 2.07, | |
"OpenkookQA": 4.68 | |
}, | |
{ | |
"model": "OPT (1.3B)", | |
"Average": 7.89, | |
"MMLU": 7.40, | |
"ARC":11.83, | |
"WinoGrande": 12.47, | |
"PIQA": 4.48, | |
"CommonsenseQA": 7.61, | |
"Race": 13.61, | |
"MedMCQA": 1.25, | |
"OpenkookQA": 4.48 | |
}, | |
{ | |
"model": "GPT-Neo (1.3B)", | |
"Average": 7.42, | |
"MMLU": 6.94, | |
"ARC": 6.69, | |
"WinoGrande": 10.81, | |
"PIQA": 4.31, | |
"CommonsenseQA": 6.34, | |
"Race": 13.75, | |
"MedMCQA": 2.63, | |
"OpenkookQA": 4.89 | |
}, | |
{ | |
"model": "Cerebras-GPT (1.3B)", | |
"Average": 4.86, | |
"MMLU": 5.37, | |
"ARC":4.43, | |
"WinoGrande": 9.31, | |
"PIQA": 2.16, | |
"CommonsenseQA": 6.2, | |
"Race": 6.9, | |
"MedMCQA": 1.04, | |
"OpenkookQA": 3.46 | |
} | |
] |