[ { "model": "Phi-3-mini-128k-instruct (3.8B)", "Average": 40.00, "MMLU": 36.97, "ARC":60.94, "WinoGrande": 46.88, "PiQA": 32.04, "CommonsenseQA": 49.15, "Race": 37.81, "MedMCQA": 22.61, "OpenkookQA": 33.60 }, { "model": "Qwen1.5 (1.8B)", "Average": 21.68, "MMLU": 9.99, "ARC":15.84 , "WinoGrande": 40.96, "PiQA": 15.52, "CommonsenseQA": 31.13, "Race": 34.91, "MedMCQA": 4.7, "OpenkookQA": 20.37 }, { "model": "Gemma (2B)", "Average": 16.66, "MMLU": 17.52, "ARC":23.93, "WinoGrande": 16.10, "PiQA": 15.09, "CommonsenseQA": 27.46, "Race": 14.32, "MedMCQA": 4.57, "OpenkookQA": 14.26 }, { "model": "SlimPajama-DC (1.3B)", "Average": 9.60, "MMLU": 9.22, "ARC":14.95, "WinoGrande": 14.76, "PiQA": 5.32, "CommonsenseQA": 9.01, "Race": 16.19, "MedMCQA": 1.68, "OpenkookQA": 5.70 }, { "model": "RedPajama (1B)", "Average": 9.00, "MMLU": 9.21, "ARC":13.5, "WinoGrande": 16.97, "PiQA": 0.86, "CommonsenseQA": 11.41, "Race": 14.35, "MedMCQA": 1.86, "OpenkookQA": 3.87 }, { "model": "OLMo (1.2B)", "Average": 8.85, "MMLU": 8.54, "ARC":13.18, "WinoGrande": 6.16, "PiQA": 8.05, "CommonsenseQA": 13.10, "Race": 13.61, "MedMCQA": 2.07, "OpenkookQA": 6.11 }, { "model": "Pythia (1.4B)", "Average": 8.79, "MMLU": 9.66, "ARC":14.69, "WinoGrande": 11.52, "PiQA": 4.17, "CommonsenseQA": 9.01, "Race": 12.76, "MedMCQA": 3.19, "OpenkookQA": 5.30 }, { "model": "TinyLLama (1.1B)", "Average": 8.45, "MMLU": 8.94, "ARC":13.31, "WinoGrande": 12.23, "PiQA": 3.59, "CommonsenseQA": 6.06, "Race": 16.7, "MedMCQA": 2.07, "OpenkookQA": 4.68 }, { "model": "OPT (1.3B)", "Average": 7.89, "MMLU": 7.40, "ARC":11.83, "WinoGrande": 12.47, "PiQA": 4.48, "CommonsenseQA": 7.61, "Race": 13.61, "MedMCQA": 1.25, "OpenkookQA": 4.48 }, { "model": "GPT-Neo (1.3B)", "Average": 7.42, "MMLU": 6.94, "ARC": 6.69, "WinoGrande": 10.81, "PiQA": 4.31, "CommonsenseQA": 6.34, "Race": 13.75, "MedMCQA": 2.63, "OpenkookQA": 4.89 }, { "model": "Cerebras-GPT (1.3B)", "Average": 4.86, "MMLU": 5.37, "ARC":4.43, "WinoGrande": 9.31, "PiQA": 2.16, "CommonsenseQA": 6.2, "Race": 6.9, "MedMCQA": 1.04, "OpenkookQA": 3.46 } ]