[ {"config": { "model_name": "ChatGPT-4o-latest (2024-09-03)", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2023/10" }, "results": { "math-algebra": {"Score": 99.19484702, "Avg Rank": 1.666666667, "Min Rank": 1, "Max Rank": 3}, "math-probability": {"Score": 100, "Avg Rank": 1, "Min Rank": 1, "Max Rank": 1}, "reasoning-logical": {"Avg Rank": 1, "Min Rank": 1, "Max Rank": 1}, "overall": {"Avg Rank": 2, "Min Rank": 2, "Max Rank": 2} }}, {"config": { "model_name": "gpt-4o-2024-08-06", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2023/10" }, "results": { "math-algebra": {"Score": 98.38969404, "Avg Rank": 1.666666667, "Min Rank": 1, "Max Rank": 2}, "math-probability": {"Score": 96.49758454, "Avg Rank": 2.666666667, "Min Rank": 2, "Max Rank": 4}, "reasoning-logical": {"Avg Rank": 4.333333333, "Min Rank": 3, "Max Rank": 5}, "overall": {"Avg Rank": 7.33, "Min Rank": 4, "Max Rank": 9} }}, {"config": { "model_name": "gpt-4o-2024-05-13", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2023/10" }, "results": { "math-algebra": {"Score": 98.15480333, "Avg Rank": 2.666666667, "Min Rank": 2, "Max Rank": 3}, "math-probability": {"Score": 94.83939431, "Avg Rank": 3.666666667, "Min Rank": 2, "Max Rank": 5}, "reasoning-logical": {"Avg Rank": 6.333333333, "Min Rank": 3, "Max Rank": 8}, "overall": {"Avg Rank": 7.67, "Min Rank": 7, "Max Rank": 9} }}, {"config": { "model_name": "gpt-4-turbo-2024-04-09", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2023/12" }, "results": { "math-algebra": {"Score": 96.03195879, "Avg Rank": 4, "Min Rank": 4, "Max Rank": 4}, "math-probability": {"Score": 93.59903382, "Avg Rank": 6.666666667, "Min Rank": 6, "Max Rank": 8}, "reasoning-logical": {"Avg Rank": 4, "Min Rank": 2, "Max Rank": 7}, "overall": {"Avg Rank": 6, "Min Rank": 5, "Max Rank": 8} }}, {"config": { "model_name": "gemini-1.5-pro-001", "organization": "Google", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 94.7572213, "Avg Rank": 5, "Min Rank": 5, "Max Rank": 5}, "math-probability": {"Score": 91.42512077, "Avg Rank": 9.666666667, "Min Rank": 9, "Max Rank": 10}, "reasoning-logical": {"Avg Rank": 9.666666667, "Min Rank": 9, "Max Rank": 11}, "overall": {"Avg Rank": 14, "Min Rank": 13, "Max Rank": 15} }}, {"config": { "model_name": "qwen2-72b-instruct", "organization": "Alibaba", "license": "Qianwen LICENSE", "knowledge_cutoff": "2024-02" }, "results": { "math-algebra": {"Score": 93.88818605, "Avg Rank": 6, "Min Rank": 6, "Max Rank": 6}, "math-probability": {"Score": 91.54326174, "Avg Rank": 4, "Min Rank": 3, "Max Rank": 5}, "reasoning-logical": {"Avg Rank": 15.66666667, "Min Rank": 15, "Max Rank": 17}, "overall": {"Avg Rank": 17, "Min Rank": 17, "Max Rank": 17} }}, {"config": { "model_name": "gpt-4o-mini-2024-07-18", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2024-07" }, "results": { "math-algebra": {"Score": 93.22073596, "Avg Rank": 7, "Min Rank": 7, "Max Rank": 7}, "math-probability": {"Score": 92.17351456, "Avg Rank": 3.666666667, "Min Rank": 3, "Max Rank": 5}, "reasoning-logical": {"Avg Rank": 9.666666667, "Min Rank": 9, "Max Rank": 10}, "overall": {"Avg Rank": 7, "Min Rank": 5, "Max Rank": 8} }}, {"config": { "model_name": "claude-3.5-sonnet", "organization": "Anthropic", "license": "Proprietary", "knowledge_cutoff": "2024-03" }, "results": { "math-algebra": {"Score": 91.5823805, "Avg Rank": 8.333333333, "Min Rank": 8, "Max Rank": 9}, "math-probability": {"Score": 91.55011915, "Avg Rank": 8, "Min Rank": 7, "Max Rank": 9}, "reasoning-logical": {"Avg Rank": 5, "Min Rank": 2, "Max Rank": 7}, "overall": {"Avg Rank": 5, "Min Rank": 4, "Max Rank": 7} }}, {"config": { "model_name": "o1-mini", "organization": "01 AI", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": None, "math-probability": None, "reasoning-logical": None, "overall": {"Avg Rank": 1, "Min Rank": 1, "Max Rank": 1} }}, {"config": { "model_name": "o1-preview", "organization": "01 AI", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": None, "math-probability": None, "reasoning-logical": None, "overall": {"Avg Rank": 3, "Min Rank": 3, "Max Rank": 3} }}, {"config": { "model_name": "gemini-1.5-flash-001", "organization": "Google", "license": "Proprietary", "knowledge_cutoff": "2024-02" }, "results": { "math-algebra": {"Score": 91.30211121, "Avg Rank": 11, "Min Rank": 11, "Max Rank": 11}, "math-probability": {"Score": 91.066099, "Avg Rank": 12, "Min Rank": 10, "Max Rank": 13}, "reasoning-logical": {"Avg Rank": 15.66666667, "Min Rank": 15, "Max Rank": 16}, "overall": {"Avg Rank": 14, "Min Rank": 13, "Max Rank": 15} }}, {"config": { "model_name": "gpt4-1106", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2024-04" }, "results": { "math-algebra": {"Score": 91.2227739, "Avg Rank": 12, "Min Rank": 12, "Max Rank": 12}, "math-probability": {"Score": 91.09550085, "Avg Rank": 11.66666667, "Min Rank": 11, "Max Rank": 12}, "reasoning-logical": {"Avg Rank": 12, "Min Rank": 12, "Max Rank": 12}, "overall": {"Avg Rank": 12, "Min Rank": 11, "Max Rank": 12} }}, {"config": { "model_name": "gemma-2-27b-it", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2024-03" }, "results": { "math-algebra": {"Score": 91.08554346, "Avg Rank": 13.33333333, "Min Rank": 13, "Max Rank": 14}, "math-probability": {"Score": 91.09516215, "Avg Rank": 14, "Min Rank": 14, "Max Rank": 14}, "reasoning-logical": {"Avg Rank": 13, "Min Rank": 13, "Max Rank": 13}, "overall": {"Avg Rank": 13, "Min Rank": 12, "Max Rank": 14} }}, {"config": { "model_name": "claude-3-opus", "organization": "Anthropic", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 89.75345785, "Avg Rank": 13.66666667, "Min Rank": 13, "Max Rank": 14}, "math-probability": {"Score": 91.06939607, "Avg Rank": 11.33333333, "Min Rank": 11, "Max Rank": 12}, "reasoning-logical": {"Avg Rank": 10.66666667, "Min Rank": 10, "Max Rank": 11}, "overall": {"Avg Rank": 12, "Min Rank": 10, "Max Rank": 15} }}, {"config": { "model_name": "gemma-2-9b-it-simpo", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2024-02" }, "results": { "math-algebra": {"Score": 87.66368227, "Avg Rank": 15, "Min Rank": 15, "Max Rank": 15}, "math-probability": {"Score": 73.64665336, "Avg Rank": 17, "Min Rank": 17, "Max Rank": 17}, "reasoning-logical": {"Avg Rank": 19, "Min Rank": 19, "Max Rank": 19}, "overall": {"Avg Rank": 17, "Min Rank": 15, "Max Rank": 19} }}, {"config": { "model_name": "qwen1.5-72b-chat", "organization": "Alibaba", "license": "Qianwen LICENSE", "knowledge_cutoff": "2024-03" }, "results": { "math-algebra": {"Score": 86.56207015, "Avg Rank": 16, "Min Rank": 16, "Max Rank": 16}, "math-probability": {"Score": 72.7735874, "Avg Rank": 21, "Min Rank": 20, "Max Rank": 22}, "reasoning-logical": {"Avg Rank": 29.66666667, "Min Rank": 28, "Max Rank": 31}, "overall": {"Avg Rank": 23, "Min Rank": 16, "Max Rank": 31} }}, {"config": { "model_name": "qwen1.5-32b-chat", "organization": "Alibaba", "license": "Qianwen LICENSE", "knowledge_cutoff": "2024-03" }, "results": { "math-algebra": {"Score": 84.59439036, "Avg Rank": 17.33333333, "Min Rank": 17, "Max Rank": 18}, "math-probability": {"Score": 76.61348265, "Avg Rank": 22.33333333, "Min Rank": 22, "Max Rank": 23}, "reasoning-logical": {"Avg Rank": 28.66666667, "Min Rank": 27, "Max Rank": 30}, "overall": {"Avg Rank": 22, "Min Rank": 17, "Max Rank": 30} }}, {"config": { "model_name": "google-gemma-2-9b-it", "organization": "Google", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 84.18901776, "Avg Rank": 18, "Min Rank": 17, "Max Rank": 19}, "math-probability": {"Score": 74.46332504, "Avg Rank": 16, "Min Rank": 16, "Max Rank": 16}, "reasoning-logical": {"Avg Rank": 14, "Min Rank": 14, "Max Rank": 14}, "overall": {"Avg Rank": 16, "Min Rank": 14, "Max Rank": 19} }}, {"config": { "model_name": "yi-1.5-34b-chat", "organization": "01 AI", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 81.82921677, "Avg Rank": 18.66666667, "Min Rank": 18, "Max Rank": 19}, "math-probability": {"Score": 77.41945842, "Avg Rank": 15, "Min Rank": 15, "Max Rank": 15}, "reasoning-logical": {"Avg Rank": 17.33333333, "Min Rank": 17, "Max Rank": 18}, "overall": {"Avg Rank": 18, "Min Rank": 15, "Max Rank": 19} }}, {"config": { "model_name": "meta-llama-3.1-8b-instruct", "organization": "Meta", "license": "Llama 3.1 Community", "knowledge_cutoff": "2024-02" }, "results": { "math-algebra": {"Score": 75.57121963, "Avg Rank": 20.33333333, "Min Rank": 20, "Max Rank": 21}, "math-probability": {"Score": 75.46243493, "Avg Rank": 20.33333333, "Min Rank": 20, "Max Rank": 21}, "reasoning-logical": {"Avg Rank": 23.66666667, "Min Rank": 23, "Max Rank": 24}, "overall": {"Avg Rank": 21, "Min Rank": 20, "Max Rank": 24} }}, {"config": { "model_name": "gpt3.5-turbo-0125", "organization": "OpenAI", "license": "Proprietary", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 73.29235048, "Avg Rank": 21.33333333, "Min Rank": 21, "Max Rank": 22}, "math-probability": {"Score": 66.27452275, "Avg Rank": 24, "Min Rank": 24, "Max Rank": 24}, "reasoning-logical": {"Avg Rank": 42.66666667, "Min Rank": 42, "Max Rank": 44}, "overall": {"Avg Rank": 29, "Min Rank": 21, "Max Rank": 44} }}, {"config": { "model_name": "llama-3-70b-instruct", "organization": "Meta", "license": "Llama 3 Community", "knowledge_cutoff": "2024-03" }, "results": { "math-algebra": {"Score": 73.75419539, "Avg Rank": 21.33333333, "Min Rank": 20, "Max Rank": 22}, "math-probability": {"Score": 87.86358478, "Avg Rank": 18.33333333, "Min Rank": 18, "Max Rank": 19}, "reasoning-logical": {"Avg Rank": 3.333333333, "Min Rank": 2, "Max Rank": 4}, "overall": {"Avg Rank": 15, "Min Rank": 3, "Max Rank": 22} }}, {"config": { "model_name": "claude-3-sonnet", "organization": "Anthropic", "license": "Proprietary", "knowledge_cutoff": "2024-02" }, "results": { "math-algebra": {"Score": 71.15353833, "Avg Rank": 23, "Min Rank": 23, "Max Rank": 23}, "math-probability": {"Score": 88.02362801, "Avg Rank": 18.66666667, "Min Rank": 18, "Max Rank": 19}, "reasoning-logical": {"Avg Rank": 17.33333333, "Min Rank": 16, "Max Rank": 18}, "overall": {"Avg Rank": 20, "Min Rank": 16, "Max Rank": 23} }}, {"config": { "model_name": "qwen1.5-14b-chat", "organization": "Alibaba", "license": "Qianwen LICENSE", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 69.70470323, "Avg Rank": 24, "Min Rank": 24, "Max Rank": 24}, "math-probability": {"Score": 66.41420544, "Avg Rank": 28.66666667, "Min Rank": 28, "Max Rank": 29}, "reasoning-logical": {"Avg Rank": 34, "Min Rank": 34, "Max Rank": 34}, "overall": {"Avg Rank": 28, "Min Rank": 24, "Max Rank": 34} }}, {"config": { "model_name": "claude-3-haiku", "organization": "Anthropic", "license": "Proprietary", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 68.44060149, "Avg Rank": 25, "Min Rank": 25, "Max Rank": 25}, "math-probability": {"Score": 76.46075239, "Avg Rank": 22.33333333, "Min Rank": 21, "Max Rank": 23}, "reasoning-logical": {"Avg Rank": 20, "Min Rank": 20, "Max Rank": 20}, "overall": {"Avg Rank": 22, "Min Rank": 20, "Max Rank": 25} }}, {"config": { "model_name": "claude-2.1", "organization": "Anthropic", "license": "Proprietary", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 67.59939121, "Avg Rank": 26, "Min Rank": 26, "Max Rank": 26}, "math-probability": {"Score": 68.89772398, "Avg Rank": 27, "Min Rank": 27, "Max Rank": 27}, "reasoning-logical": {"Avg Rank": 21, "Min Rank": 21, "Max Rank": 21}, "overall": {"Avg Rank": 25, "Min Rank": 21, "Max Rank": 27} }}, {"config": { "model_name": "mistral-8x7b-instruct-v0.1", "organization": "Mistral", "license": "Apache 2.0", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 64.71364004, "Avg Rank": 27, "Min Rank": 27, "Max Rank": 27}, "math-probability": {"Score": 67.67468595, "Avg Rank": 26, "Min Rank": 26, "Max Rank": 26}, "reasoning-logical": {"Avg Rank": 29, "Min Rank": 28, "Max Rank": 30}, "overall": {"Avg Rank": 27, "Min Rank": 26, "Max Rank": 30} }}, {"config": { "model_name": "claude-2.0", "organization": "Anthropic", "license": "Proprietary", "knowledge_cutoff": "2023-10" }, "results": { "math-algebra": {"Score": 64.77311289, "Avg Rank": 28, "Min Rank": 28, "Max Rank": 28}, "math-probability": {"Score": 74.34063069, "Avg Rank": 25, "Min Rank": 25, "Max Rank": 25}, "reasoning-logical": {"Avg Rank": 23.33333333, "Min Rank": 23, "Max Rank": 24}, "overall": {"Avg Rank": 25, "Min Rank": 23, "Max Rank": 28} }}, {"config": { "model_name": "starling-lm-7b-beta", "organization": "Nexusflow", "license": "Apache-2.0", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 64.01222884, "Avg Rank": 29.33333333, "Min Rank": 29, "Max Rank": 30}, "math-probability": {"Score": 70.42025806, "Avg Rank": 28.33333333, "Min Rank": 28, "Max Rank": 29}, "reasoning-logical": {"Avg Rank": 25, "Min Rank": 25, "Max Rank": 25}, "overall": {"Avg Rank": 27, "Min Rank": 25, "Max Rank": 30} }}, {"config": { "model_name": "gemini-1.0-pro-001", "organization": "Google", "license": "Proprietary", "knowledge_cutoff": "2023-11" }, "results": { "math-algebra": {"Score": 63.93365247, "Avg Rank": 29.66666667, "Min Rank": 29, "Max Rank": 30}, "math-probability": {"Score": 62.13077748, "Avg Rank": 36.66666667, "Min Rank": 36, "Max Rank": 38}, "reasoning-logical": {"Avg Rank": 37.33333333, "Min Rank": 36, "Max Rank": 40}, "overall": {"Avg Rank": 34, "Min Rank": 29, "Max Rank": 40} }}, {"config": { "model_name": "openchat-3.5-0106", "organization": "OpenChat", "license": "Apache-2.0", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 63.02959506, "Avg Rank": 31, "Min Rank": 31, "Max Rank": 31}, "math-probability": {"Score": 61.00599665, "Avg Rank": 30, "Min Rank": 30, "Max Rank": 30}, "reasoning-logical": {"Avg Rank": 27.66666667, "Min Rank": 27, "Max Rank": 29}, "overall": {"Avg Rank": 29, "Min Rank": 27, "Max Rank": 31} }}, {"config": { "model_name": "openchat-3.5", "organization": "OpenChat", "license": "Apache-2.0", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 61.45954168, "Avg Rank": 32.33333333, "Min Rank": 32, "Max Rank": 33}, "math-probability": {"Score": 62.56195929, "Avg Rank": 32, "Min Rank": 32, "Max Rank": 32}, "reasoning-logical": {"Avg Rank": 32, "Min Rank": 31, "Max Rank": 33}, "overall": {"Avg Rank": 32, "Min Rank": 31, "Max Rank": 33} }}, {"config": { "model_name": "command-r-(08-2024)", "organization": "Cohere", "license": "CC-BY-NC-4.0", "knowledge_cutoff": "2024-08" }, "results": { "math-algebra": {"Score": 61.0679475, "Avg Rank": 32.66666667, "Min Rank": 32, "Max Rank": 33}, "math-probability": {"Score": 66.00833826, "Avg Rank": 31, "Min Rank": 31, "Max Rank": 31}, "reasoning-logical": {"Avg Rank": 37.66666667, "Min Rank": 37, "Max Rank": 38}, "overall": {"Avg Rank": 34, "Min Rank": 31, "Max Rank": 38} }}, {"config": { "model_name": "gemma-1.1-7b-it", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2023-11" }, "results": { "math-algebra": {"Score": 60.92904194, "Avg Rank": 34.33333333, "Min Rank": 34, "Max Rank": 35}, "math-probability": {"Score": 62.17574935, "Avg Rank": 36.66666667, "Min Rank": 36, "Max Rank": 37}, "reasoning-logical": {"Avg Rank": 30.33333333, "Min Rank": 28, "Max Rank": 32}, "overall": {"Avg Rank": 34, "Min Rank": 28, "Max Rank": 37} }}, {"config": { "model_name": "llama3-8b-instruct", "organization": "Meta", "license": "Llama 3 Community", "knowledge_cutoff": "2024-01" }, "results": { "math-algebra": {"Score": 61.06411319, "Avg Rank": 35, "Min Rank": 34, "Max Rank": 36}, "math-probability": {"Score": 62.13077748, "Avg Rank": 34.66666667, "Min Rank": 34, "Max Rank": 35}, "reasoning-logical": {"Avg Rank": 22, "Min Rank": 22, "Max Rank": 22}, "overall": {"Avg Rank": 30, "Min Rank": 22, "Max Rank": 36} }}, {"config": { "model_name": "gemma-2-2b-it", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 59.70248014, "Avg Rank": 36, "Min Rank": 35, "Max Rank": 37}, "math-probability": {"Score": 61.08084527, "Avg Rank": 33.66666667, "Min Rank": 33, "Max Rank": 35}, "reasoning-logical": {"Avg Rank": 26, "Min Rank": 26, "Max Rank": 26}, "overall": {"Avg Rank": 32, "Min Rank": 26, "Max Rank": 37} }}, {"config": { "model_name": "starling-lm-7b-alpha", "organization": "Nexusflow", "license": "Apache-2.0", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 59.574329, "Avg Rank": 36.66666667, "Min Rank": 36, "Max Rank": 37}, "math-probability": {"Score": 64.03683254, "Avg Rank": 33.66666667, "Min Rank": 33, "Max Rank": 34}, "reasoning-logical": {"Avg Rank": 35, "Min Rank": 35, "Max Rank": 35}, "overall": {"Avg Rank": 35, "Min Rank": 33, "Max Rank": 37} }}, {"config": { "model_name": "qwen1.5-4b-chat", "organization": "Alibaba", "license": "Qianwen LICENSE", "knowledge_cutoff": "2024-02" }, "results": { "math-algebra": {"Score": 56.66282914, "Avg Rank": 38.33333333, "Min Rank": 38, "Max Rank": 39}, "math-probability": {"Score": 57.39032697, "Avg Rank": 43, "Min Rank": 43, "Max Rank": 43}, "reasoning-logical": {"Avg Rank": 46, "Min Rank": 46, "Max Rank": 46}, "overall": {"Avg Rank": 42, "Min Rank": 38, "Max Rank": 46} }}, {"config": { "model_name": "command-r-(04-2024)", "organization": "Cohere", "license": "CC-BY-NC-4.0", "knowledge_cutoff": "2024-04" }, "results": { "math-algebra": {"Score": 56.19063413, "Avg Rank": 38.66666667, "Min Rank": 38, "Max Rank": 39}, "math-probability": {"Score": 54.37641509, "Avg Rank": 37.66666667, "Min Rank": 37, "Max Rank": 38}, "reasoning-logical": {"Avg Rank": 32.66666667, "Min Rank": 32, "Max Rank": 33}, "overall": {"Avg Rank": 36, "Min Rank": 32, "Max Rank": 39} }}, {"config": { "model_name": "vicuna-33b", "organization": "LMSYS", "license": "Non-commercial", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 54.71037983, "Avg Rank": 40.66666667, "Min Rank": 40, "Max Rank": 42}, "math-probability": {"Score": 55.02214588, "Avg Rank": 41, "Min Rank": 41, "Max Rank": 41}, "reasoning-logical": {"Avg Rank": 41, "Min Rank": 41, "Max Rank": 41}, "overall": {"Avg Rank": 41, "Min Rank": 40, "Max Rank": 42} }}, {"config": { "model_name": "gemma-7b-it", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 54.35817186, "Avg Rank": 40.66666667, "Min Rank": 40, "Max Rank": 41}, "math-probability": {"Score": 58.19573446, "Avg Rank": 42, "Min Rank": 42, "Max Rank": 42}, "reasoning-logical": {"Avg Rank": 39.33333333, "Min Rank": 39, "Max Rank": 40}, "overall": {"Avg Rank": 41, "Min Rank": 39, "Max Rank": 42} }}, {"config": { "model_name": "mistral-7b-instruct-2", "organization": "Mistral", "license": "Apache 2.0", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 54.39240703, "Avg Rank": 41.66666667, "Min Rank": 41, "Max Rank": 42}, "math-probability": {"Score": 60.35257542, "Avg Rank": 39, "Min Rank": 39, "Max Rank": 39}, "reasoning-logical": {"Avg Rank": 36.66666667, "Min Rank": 36, "Max Rank": 37}, "overall": {"Avg Rank": 39, "Min Rank": 36, "Max Rank": 42} }}, {"config": { "model_name": "mistral-7b-instruct-1", "organization": "Mistral", "license": "Apache 2.0", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 53.80157944, "Avg Rank": 43, "Min Rank": 43, "Max Rank": 43}, "math-probability": {"Score": 56.51960666, "Avg Rank": 40, "Min Rank": 40, "Max Rank": 40}, "reasoning-logical": {"Avg Rank": 45, "Min Rank": 45, "Max Rank": 45}, "overall": {"Avg Rank": 43, "Min Rank": 40, "Max Rank": 45} }}, {"config": { "model_name": "vicuna-13b", "organization": "LMSYS", "license": "Non-commercial", "knowledge_cutoff": "2023-11" }, "results": { "math-algebra": {"Score": 53.5413765, "Avg Rank": 44, "Min Rank": 44, "Max Rank": 44}, "math-probability": {"Score": 53.53586693, "Avg Rank": 44, "Min Rank": 44, "Max Rank": 44}, "reasoning-logical": {"Avg Rank": 43.66666667, "Min Rank": 43, "Max Rank": 44}, "overall": {"Avg Rank": 44, "Min Rank": 43, "Max Rank": 44} }}, {"config": { "model_name": "zephyr-7b-beta", "organization": "HuggingFace", "license": "MIT", "knowledge_cutoff": "2023-10" }, "results": { "math-algebra": {"Score": 52.23039742, "Avg Rank": 46, "Min Rank": 45, "Max Rank": 48}, "math-probability": {"Score": 51.67173535, "Avg Rank": 47.33333333, "Min Rank": 47, "Max Rank": 48}, "reasoning-logical": {"Avg Rank": 50, "Min Rank": 50, "Max Rank": 50}, "overall": {"Avg Rank": 48, "Min Rank": 45, "Max Rank": 50} }}, {"config": { "model_name": "gemma-1.1-2b-it", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 52.22372428, "Avg Rank": 46, "Min Rank": 45, "Max Rank": 47}, "math-probability": {"Score": 51.74306688, "Avg Rank": 46.33333333, "Min Rank": 46, "Max Rank": 47}, "reasoning-logical": {"Avg Rank": 48, "Min Rank": 48, "Max Rank": 48}, "overall": {"Avg Rank": 47, "Min Rank": 45, "Max Rank": 48} }}, {"config": { "model_name": "llama2-7b-chat", "organization": "Meta", "license": "Llama 2 Community", "knowledge_cutoff": "2023-10" }, "results": { "math-algebra": {"Score": 51.83025857, "Avg Rank": 46.33333333, "Min Rank": 46, "Max Rank": 47}, "math-probability": {"Score": 51.19585847, "Avg Rank": 47.33333333, "Min Rank": 46, "Max Rank": 48}, "reasoning-logical": {"Avg Rank": 42.66666667, "Min Rank": 42, "Max Rank": 43}, "overall": {"Avg Rank": 45, "Min Rank": 42, "Max Rank": 48} }}, {"config": { "model_name": "gemma-2b-it", "organization": "Google", "license": "Gemma License", "knowledge_cutoff": "2023-11" }, "results": { "math-algebra": {"Score": 51.60281474, "Avg Rank": 47.66666667, "Min Rank": 47, "Max Rank": 48}, "math-probability": {"Score": 51.52250905, "Avg Rank": 50, "Min Rank": 50, "Max Rank": 50}, "reasoning-logical": {"Avg Rank": 51, "Min Rank": 51, "Max Rank": 51}, "overall": {"Avg Rank": 49, "Min Rank": 47, "Max Rank": 51} }}, {"config": { "model_name": "llama2-13b-chat", "organization": "Meta", "license": "Llama 2 Community", "knowledge_cutoff": "2023-12" }, "results": { "math-algebra": {"Score": 51.21273132, "Avg Rank": 49, "Min Rank": 49, "Max Rank": 49}, "math-probability": {"Score": 51.72056522, "Avg Rank": 45, "Min Rank": 45, "Max Rank": 45}, "reasoning-logical": {"Avg Rank": 39, "Min Rank": 38, "Max Rank": 40}, "overall": {"Avg Rank": 44, "Min Rank": 38, "Max Rank": 49} }}, {"config": { "model_name": "vicuna-7b", "organization": "LMSYS", "license": "Non-commercial", "knowledge_cutoff": "2023-11" }, "results": { "math-algebra": {"Score": 51.31450547, "Avg Rank": 50, "Min Rank": 50, "Max Rank": 50}, "math-probability": {"Score": 52.72504618, "Avg Rank": 49, "Min Rank": 49, "Max Rank": 49}, "reasoning-logical": {"Avg Rank": 47, "Min Rank": 47, "Max Rank": 47}, "overall": {"Avg Rank": 48, "Min Rank": 47, "Max Rank": 50} }}, {"config": { "model_name": "koala-13b", "organization": "UC Berkeley", "license": "Non-commercial", "knowledge_cutoff": "2023-10" }, "results": { "math-algebra": {"Score": 50.19054677, "Avg Rank": 51, "Min Rank": 51, "Max Rank": 51}, "math-probability": {"Score": 50.741989, "Avg Rank": 51, "Min Rank": 51, "Max Rank": 51}, "reasoning-logical": {"Avg Rank": 49, "Min Rank": 49, "Max Rank": 49}, "overall": {"Avg Rank": 50, "Min Rank": 49, "Max Rank": 51} }}, {"config": { "model_name": "openassistant-pythia-12b", "organization": "OpenAssistant", "license": "Non-commercial", "knowledge_cutoff": "2023-09" }, "results": { "math-algebra": {"Score": 50, "Avg Rank": 52, "Min Rank": 52, "Max Rank": 52}, "math-probability": {"Score": 50, "Avg Rank": 52, "Min Rank": 52, "Max Rank": 52}, "reasoning-logical": {"Avg Rank": 52, "Min Rank": 52, "Max Rank": 52}, "overall": {"Avg Rank": 52, "Min Rank": 52, "Max Rank": 52} }} ]