[ { "model": "GPT-4", "Average": 65.94, "MMLU": 74.8, "WinoGrande": 66.2, "PiQA": 61.6, "CommonsenseQA": 63.0, "Race": 67.0, "MedMCQA": 51.8, "OpenkookQA": 60.3 }, { "model": "Claude-3 Opus", "Average": 62.64, "MMLU": 70.4, "WinoGrande": 63.5, "PiQA": 59.1, "CommonsenseQA": 63.7, "Race": 66.2, "MedMCQA": 49.1, "OpenkookQA": 54.0 }, { "model": "Mistral Large", "Average": 61.45, "MMLU": 67.8, "WinoGrande": 56.8, "PiQA": 61.2, "CommonsenseQA": 55.4, "Race": 70.1, "MedMCQA": 43.4, "OpenkookQA": 58.7 }, { "model": "GPT-3.5", "Average": 59.06, "MMLU": 65.4, "WinoGrande": 54.6, "PiQA": 54.9, "CommonsenseQA": 67.9, "Race": 60.1, "MedMCQA": 41.4, "OpenkookQA": 49.9 }, { "model": "Gemini Pro", "Average": 54.45, "MMLU": 57.7, "WinoGrande": 56.4, "PiQA": 47.7, "CommonsenseQA": 50.6, "Race": 61.0, "MedMCQA": 37.5, "OpenkookQA": 52.5 }, { "model": "Llama3-70b-instruct", "Average": 54.06, "MMLU": 64.67, "WinoGrande": 57.14, "PiQA": 43.1, "CommonsenseQA": 55.49, "Race": 58.21, "MedMCQA": 41.67, "OpenkookQA": 41.93 } ]