{ "Rank": { "0": 1, "1": 2, "2": 3, "3": 4, "4": 5, "5": 6, "6": 7, "7": 8, "8": 9, "9": 10, "10": 11, "11": 12, "12": 13, "13": 14, "14": 15 }, "Model Type": { "0": "api", "1": "api", "2": "api", "3": "api", "4": "api", "5": "api", "6": "api", "7": "api", "8": "OpenSource", "9": "api", "10": "OpenSource", "11": "OpenSource", "12": "OpenSource", "13": "OpenSource", "14": "OpenSource" }, "Model Size": { "0": "Unknown", "1": "Unknown", "2": "Unknown", "3": "Unknown", "4": "Unknown", "5": "Unknown", "6": "Unknown", "7": "Unknown", "8": "4B-10B", "9": "Unknown", "10": "4B-10B", "11": "10B-20B", "12": "4B-10B", "13": "4B-10B", "14": "<4B" }, "Model": { "0": "Claude3.5-Sonnet", "1": "GPT-4o(0513)", "2": "GPT-4-turbo(0409)", "3": "Claude-3-Sonnet", "4": "Claude-3-Opus", "5": "GPT-4v(1106)", "6": "Claude-3-Haiku", "7": "Gemini-Pro-Vision", "8": "InternLM-XComposer2-VL-7B", "9": "Qwen-VL-Max", "10": "LLaVA-v1.6-Mistral-7B-HF", "11": "LLaVA-v1.6-Vicuna-13B-HF", "12": "IDEFICS2-8B", "13": "Fuyu-8B", "14": "Paligemma-3B-Mix-224" }, "Params (B)": { "0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "7B", "9": "", "10": "7.57B", "11": "13.4B", "12": "8.4B", "13": "9.41B", "14": "2.92B" }, "Supports multiple images": { "0": true, "1": true, "2": true, "3": true, "4": true, "5": true, "6": true, "7": true, "8": false, "9": true, "10": false, "11": true, "12": true, "13": false, "14": false }, "Avg Score\u2b06\ufe0f": { "0": 70.7, "1": 67.06, "2": 54.58, "3": 43.6, "4": 41.99, "5": 37.74, "6": 36.42, "7": 35.96, "8": 18.71, "9": 12.89, "10": 11.13, "11": 6.7, "12": 2.08, "13": 0.0, "14": 0.0 }, "Avg Score\u2b50": { "0": 71.37, "1": 68.53, "2": 57.97, "3": 45.15, "4": 42.88, "5": 46.44, "6": 33.94, "7": 36.56, "8": 19.7, "9": 12.53, "10": 11.72, "11": 5.35, "12": 2.0, "13": 0.0, "14": 0.0 }, "HumanEval-V": { "0": 78.33, "1": 75.83, "2": 76.67, "3": 41.67, "4": 52.5, "5": 78.33, "6": 43.33, "7": 46.96, "8": 15.0, "9": 20.83, "10": 6.67, "11": 8.33, "12": 5.0, "13": 0.0, "14": 0.0 }, "HumanEval-V\u2b50": { "0": 84.17, "1": 85.83, "2": 85.83, "3": 55.83, "4": 61.67, "5": 78.33, "6": 45.0, "7": 49.57, "8": 20.83, "9": 22.5, "10": 9.17, "11": 8.33, "12": 4.17, "13": 0.0, "14": 0.0 }, "MBPP-V": { "0": 60.83, "1": 60.0, "2": 60.83, "3": 48.33, "4": 57.5, "5": 55.83, "6": 49.17, "7": 42.74, "8": 29.17, "9": 18.33, "10": 20.83, "11": 13.33, "12": 5.0, "13": 0.0, "14": 0.0 }, "MBPP-V\u2b50": { "0": 60.0, "1": 60.83, "2": 60.83, "3": 47.5, "4": 54.17, "5": 59.17, "6": 45.0, "7": 47.86, "8": 31.67, "9": 20.0, "10": 20.83, "11": 15.0, "12": 4.17, "13": 0.0, "14": 0.0 }, "GSM8K-V": { "0": 92.98, "1": 93.86, "2": 92.98, "3": 82.46, "4": 55.26, "5": 94.74, "6": 47.37, "7": 67.54, "8": 51.75, "9": 34.21, "10": 4.39, "11": 3.51, "12": 1.75, "13": 0.0, "14": 0.0 }, "GSM8K-V\u2b50": { "0": 92.11, "1": 92.98, "2": 92.98, "3": 79.82, "4": 49.12, "5": 94.74, "6": 55.26, "7": 63.16, "8": 51.75, "9": 39.47, "10": 4.39, "11": 3.51, "12": 1.75, "13": 0.0, "14": 0.0 }, "MATH-V": { "0": 53.85, "1": 53.85, "2": 50.55, "3": 29.67, "4": 18.68, "5": 39.56, "6": 10.99, "7": 14.29, "8": 8.79, "9": 4.4, "10": 4.4, "11": 6.59, "12": 1.1, "13": 0.0, "14": 0.0 }, "MATH-V\u2b50": { "0": 56.04, "1": 56.04, "2": 52.75, "3": 26.37, "4": 18.68, "5": 40.66, "6": 15.38, "7": 13.19, "8": 9.89, "9": 2.2, "10": 4.44, "11": 6.59, "12": 2.2, "13": 0.0, "14": 0.0 }, "VP": { "0": 61.54, "1": 74.36, "2": 2.56, "3": 35.9, "4": 46.15, "5": 18.8, "6": 47.86, "7": 29.06, "8": 0.85, "9": 1.74, "10": 17.95, "11": 0.0, "12": 0.0, "13": 0.0, "14": 0.0 }, "VP\u2b50": { "0": 50.43, "1": 67.52, "2": 18.8, "3": 37.61, "4": 50.43, "5": 49.57, "6": 15.38, "7": 29.06, "8": 0.0, "9": 2.65, "10": 20.51, "11": 0.0, "12": 0.0, "13": 0.0, "14": 0.0 }, "Matplotlib": { "0": 53.41, "1": 46.14, "2": 41.28, "3": 23.18, "4": 19.55, "5": 4.77, "6": 18.86, "7": 29.55, "8": 6.59, "9": 2.05, "10": 3.18, "11": 3.41, "12": 1.36, "13": 0.0, "14": 0.0 }, "Matplotlib\u2b50": { "0": 56.36, "1": 54.09, "2": 48.85, "3": 26.36, "4": 21.36, "5": 10.0, "6": 23.18, "7": 29.17, "8": 6.82, "9": 0.0, "10": 2.5, "11": 0.0, "12": 0.68, "13": 0.0, "14": 0.0 }, "SVG": { "0": 89.81, "1": 56.84, "2": 44.05, "3": 33.49, "4": 25.71, "5": 3.3, "6": 32.31, "7": 25.71, "8": 19.34, "9": 9.43, "10": 9.43, "11": 4.01, "12": 3.3, "13": 0.0, "14": 0.0 }, "SVG\u2b50": { "0": 87.26, "1": 59.43, "2": 43.1, "3": 32.78, "4": 21.93, "5": 12.74, "6": 31.37, "7": 24.76, "8": 21.23, "9": 7.55, "10": 8.25, "11": 4.72, "12": 3.54, "13": 0.0, "14": 0.0 }, "TikZ": { "0": 54.47, "1": 54.5, "2": 40.75, "3": 16.75, "4": 19.75, "5": 10.5, "6": 9.0, "7": 9.5, "8": 1.0, "9": 0.0, "10": 0.75, "11": 1.0, "12": 0.0, "13": 0.0, "14": 0.0 }, "TikZ\u2b50": { "0": 65.26, "1": 52.5, "2": 39.25, "3": 20.75, "4": 23.97, "5": 32.0, "6": 12.0, "7": 7.0, "8": 1.5, "9": 0.0, "10": 1.75, "11": 2.0, "12": 0.25, "13": 0.0, "14": 0.0 }, "Webpage": { "0": 91.12, "1": 88.12, "2": 81.56, "3": 80.99, "4": 82.79, "5": 33.81, "6": 68.85, "7": 58.26, "8": 35.86, "9": 25.0, "10": 32.58, "11": 20.08, "12": 1.23, "13": 0.0, "14": 0.0 }, "Webpage\u2b50": { "0": 90.7, "1": 87.5, "2": 79.3, "3": 79.34, "4": 84.63, "5": 40.78, "6": 62.91, "7": 65.32, "8": 33.61, "9": 18.39, "10": 33.61, "11": 7.99, "12": 1.23, "13": 0.0, "14": 0.0 } }