{ "random": { "model_size": "0B", "model_link": "https://huggingface.co/spaces/SeaEval/SeaEval_Leaderboard", "zero_shot": { "cross_mmlu": { "prompt_1": { "overall_acc": 0.23809523809523808, "language_acc": { "Malay": 0.24, "English": 0.18, "Vietnamese": 0.25333333333333335, "Spanish": 0.26666666666666666, "Indonesian": 0.26, "Filipino": 0.24666666666666667, "Chinese": 0.22 }, "consistency_score_2": 0.2482539682539682, "consistency_score_3": 0.06038095238095238, "consistency_score_4": 0.01447619047619047, "consistency_score_5": 0.0025396825396825397, "consistency_score_6": 0.0, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.23333333333333334, "Malay,Vietnamese": 0.24, "Malay,Spanish": 0.2866666666666667, "Malay,Indonesian": 0.22666666666666666, "Malay,Filipino": 0.2, "Malay,Chinese": 0.2733333333333333, "English,Vietnamese": 0.2866666666666667, "English,Spanish": 0.22, "English,Indonesian": 0.29333333333333333, "English,Filipino": 0.2733333333333333, "English,Chinese": 0.22666666666666666, "Vietnamese,Spanish": 0.26, "Vietnamese,Indonesian": 0.26666666666666666, "Vietnamese,Filipino": 0.26, "Vietnamese,Chinese": 0.25333333333333335, "Spanish,Indonesian": 0.20666666666666667, "Spanish,Filipino": 0.20666666666666667, "Spanish,Chinese": 0.26666666666666666, "Indonesian,Filipino": 0.21333333333333335, "Indonesian,Chinese": 0.25333333333333335, "Filipino,Chinese": 0.26666666666666666 }, "3_combine": { "Malay,English,Vietnamese": 0.07333333333333333, "Malay,English,Spanish": 0.08, "Malay,English,Indonesian": 0.06666666666666667, "Malay,English,Filipino": 0.07333333333333333, "Malay,English,Chinese": 0.05333333333333334, "Malay,Vietnamese,Spanish": 0.06, "Malay,Vietnamese,Indonesian": 0.07333333333333333, "Malay,Vietnamese,Filipino": 0.04, "Malay,Vietnamese,Chinese": 0.06, "Malay,Spanish,Indonesian": 0.03333333333333333, "Malay,Spanish,Filipino": 0.04, "Malay,Spanish,Chinese": 0.08666666666666667, "Malay,Indonesian,Filipino": 0.04, "Malay,Indonesian,Chinese": 0.06, "Malay,Filipino,Chinese": 0.06, "English,Vietnamese,Spanish": 0.08, "English,Vietnamese,Indonesian": 0.1, "English,Vietnamese,Filipino": 0.08666666666666667, "English,Vietnamese,Chinese": 0.04666666666666667, "English,Spanish,Indonesian": 0.05333333333333334, "English,Spanish,Filipino": 0.04666666666666667, "English,Spanish,Chinese": 0.04, "English,Indonesian,Filipino": 0.06, "English,Indonesian,Chinese": 0.05333333333333334, "English,Filipino,Chinese": 0.06, "Vietnamese,Spanish,Indonesian": 0.04, "Vietnamese,Spanish,Filipino": 0.05333333333333334, "Vietnamese,Spanish,Chinese": 0.08666666666666667, "Vietnamese,Indonesian,Filipino": 0.06, "Vietnamese,Indonesian,Chinese": 0.06, "Vietnamese,Filipino,Chinese": 0.09333333333333334, "Spanish,Indonesian,Filipino": 0.03333333333333333, "Spanish,Indonesian,Chinese": 0.06, "Spanish,Filipino,Chinese": 0.04, "Indonesian,Filipino,Chinese": 0.06 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.02, "Malay,English,Vietnamese,Indonesian": 0.02666666666666667, "Malay,English,Vietnamese,Filipino": 0.02, "Malay,English,Vietnamese,Chinese": 0.013333333333333334, "Malay,English,Spanish,Indonesian": 0.02, "Malay,English,Spanish,Filipino": 0.02, "Malay,English,Spanish,Chinese": 0.02666666666666667, "Malay,English,Indonesian,Filipino": 0.02, "Malay,English,Indonesian,Chinese": 0.006666666666666667, "Malay,English,Filipino,Chinese": 0.013333333333333334, "Malay,Vietnamese,Spanish,Indonesian": 0.006666666666666667, "Malay,Vietnamese,Spanish,Filipino": 0.0, "Malay,Vietnamese,Spanish,Chinese": 0.02, "Malay,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "Malay,Vietnamese,Indonesian,Chinese": 0.006666666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.013333333333333334, "Malay,Spanish,Indonesian,Filipino": 0.013333333333333334, "Malay,Spanish,Indonesian,Chinese": 0.02, "Malay,Spanish,Filipino,Chinese": 0.013333333333333334, "Malay,Indonesian,Filipino,Chinese": 0.013333333333333334, "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, "English,Vietnamese,Spanish,Filipino": 0.02, "English,Vietnamese,Spanish,Chinese": 0.013333333333333334, "English,Vietnamese,Indonesian,Filipino": 0.02, "English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, "English,Vietnamese,Filipino,Chinese": 0.02, "English,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Spanish,Indonesian,Chinese": 0.013333333333333334, "English,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, "Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, "Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.006666666666666667, "Malay,English,Vietnamese,Spanish,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Chinese": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, "Malay,English,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.24306850672610034, "AC3_3": 0.09633208736554347, "AC3_4": 0.027292968458633007, "AC3_5": 0.005025757002560285, "AC3_6": 0.0, "AC3_7": 0.0 }, "prompt_2": { "overall_acc": 0.25523809523809526, "language_acc": { "Malay": 0.28, "English": 0.24666666666666667, "Vietnamese": 0.2866666666666667, "Spanish": 0.26, "Indonesian": 0.23333333333333334, "Filipino": 0.22666666666666666, "Chinese": 0.25333333333333335 }, "consistency_score_2": 0.25079365079365074, "consistency_score_3": 0.06266666666666666, "consistency_score_4": 0.015428571428571427, "consistency_score_5": 0.0028571428571428576, "consistency_score_6": 0.0, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.22, "Malay,Vietnamese": 0.26666666666666666, "Malay,Spanish": 0.2866666666666667, "Malay,Indonesian": 0.23333333333333334, "Malay,Filipino": 0.2866666666666667, "Malay,Chinese": 0.20666666666666667, "English,Vietnamese": 0.3, "English,Spanish": 0.24666666666666667, "English,Indonesian": 0.24666666666666667, "English,Filipino": 0.20666666666666667, "English,Chinese": 0.30666666666666664, "Vietnamese,Spanish": 0.2, "Vietnamese,Indonesian": 0.3, "Vietnamese,Filipino": 0.26666666666666666, "Vietnamese,Chinese": 0.22666666666666666, "Spanish,Indonesian": 0.29333333333333333, "Spanish,Filipino": 0.24666666666666667, "Spanish,Chinese": 0.18, "Indonesian,Filipino": 0.25333333333333335, "Indonesian,Chinese": 0.23333333333333334, "Filipino,Chinese": 0.26 }, "3_combine": { "Malay,English,Vietnamese": 0.08, "Malay,English,Spanish": 0.09333333333333334, "Malay,English,Indonesian": 0.04666666666666667, "Malay,English,Filipino": 0.03333333333333333, "Malay,English,Chinese": 0.03333333333333333, "Malay,Vietnamese,Spanish": 0.08666666666666667, "Malay,Vietnamese,Indonesian": 0.07333333333333333, "Malay,Vietnamese,Filipino": 0.06666666666666667, "Malay,Vietnamese,Chinese": 0.04, "Malay,Spanish,Indonesian": 0.08, "Malay,Spanish,Filipino": 0.08666666666666667, "Malay,Spanish,Chinese": 0.04, "Malay,Indonesian,Filipino": 0.07333333333333333, "Malay,Indonesian,Chinese": 0.04666666666666667, "Malay,Filipino,Chinese": 0.04666666666666667, "English,Vietnamese,Spanish": 0.06, "English,Vietnamese,Indonesian": 0.10666666666666667, "English,Vietnamese,Filipino": 0.07333333333333333, "English,Vietnamese,Chinese": 0.08666666666666667, "English,Spanish,Indonesian": 0.07333333333333333, "English,Spanish,Filipino": 0.06, "English,Spanish,Chinese": 0.04, "English,Indonesian,Filipino": 0.02666666666666667, "English,Indonesian,Chinese": 0.06, "English,Filipino,Chinese": 0.07333333333333333, "Vietnamese,Spanish,Indonesian": 0.08666666666666667, "Vietnamese,Spanish,Filipino": 0.04666666666666667, "Vietnamese,Spanish,Chinese": 0.02666666666666667, "Vietnamese,Indonesian,Filipino": 0.08666666666666667, "Vietnamese,Indonesian,Chinese": 0.04, "Vietnamese,Filipino,Chinese": 0.06666666666666667, "Spanish,Indonesian,Filipino": 0.07333333333333333, "Spanish,Indonesian,Chinese": 0.05333333333333334, "Spanish,Filipino,Chinese": 0.06666666666666667, "Indonesian,Filipino,Chinese": 0.06 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.03333333333333333, "Malay,English,Vietnamese,Indonesian": 0.03333333333333333, "Malay,English,Vietnamese,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Chinese": 0.006666666666666667, "Malay,English,Spanish,Indonesian": 0.02666666666666667, "Malay,English,Spanish,Filipino": 0.02666666666666667, "Malay,English,Spanish,Chinese": 0.013333333333333334, "Malay,English,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Indonesian,Chinese": 0.0, "Malay,English,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian": 0.04, "Malay,Vietnamese,Spanish,Filipino": 0.013333333333333334, "Malay,Vietnamese,Spanish,Chinese": 0.006666666666666667, "Malay,Vietnamese,Indonesian,Filipino": 0.02666666666666667, "Malay,Vietnamese,Indonesian,Chinese": 0.0, "Malay,Vietnamese,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino": 0.02666666666666667, "Malay,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,Spanish,Filipino,Chinese": 0.013333333333333334, "Malay,Indonesian,Filipino,Chinese": 0.013333333333333334, "English,Vietnamese,Spanish,Indonesian": 0.02, "English,Vietnamese,Spanish,Filipino": 0.013333333333333334, "English,Vietnamese,Spanish,Chinese": 0.006666666666666667, "English,Vietnamese,Indonesian,Filipino": 0.02, "English,Vietnamese,Indonesian,Chinese": 0.02, "English,Vietnamese,Filipino,Chinese": 0.02666666666666667, "English,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Spanish,Indonesian,Chinese": 0.013333333333333334, "English,Spanish,Filipino,Chinese": 0.02, "English,Indonesian,Filipino,Chinese": 0.013333333333333334, "Vietnamese,Spanish,Indonesian,Filipino": 0.02, "Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, "Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "Spanish,Indonesian,Filipino,Chinese": 0.02 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, "Malay,English,Vietnamese,Spanish,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, "Malay,English,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.25299635533029896, "AC3_3": 0.10062712199750595, "AC3_4": 0.029098220558265206, "AC3_5": 0.005651027936661305, "AC3_6": 0.0, "AC3_7": 0.0 }, "prompt_3": { "overall_acc": 0.24476190476190476, "language_acc": { "Malay": 0.26, "English": 0.23333333333333334, "Vietnamese": 0.23333333333333334, "Spanish": 0.22666666666666666, "Indonesian": 0.26666666666666666, "Filipino": 0.23333333333333334, "Chinese": 0.26 }, "consistency_score_2": 0.2504761904761905, "consistency_score_3": 0.0636190476190476, "consistency_score_4": 0.01771428571428571, "consistency_score_5": 0.005079365079365079, "consistency_score_6": 0.0009523809523809525, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.3, "Malay,Vietnamese": 0.24666666666666667, "Malay,Spanish": 0.26, "Malay,Indonesian": 0.3, "Malay,Filipino": 0.24, "Malay,Chinese": 0.22, "English,Vietnamese": 0.26, "English,Spanish": 0.23333333333333334, "English,Indonesian": 0.21333333333333335, "English,Filipino": 0.23333333333333334, "English,Chinese": 0.23333333333333334, "Vietnamese,Spanish": 0.25333333333333335, "Vietnamese,Indonesian": 0.28, "Vietnamese,Filipino": 0.22, "Vietnamese,Chinese": 0.26666666666666666, "Spanish,Indonesian": 0.22, "Spanish,Filipino": 0.23333333333333334, "Spanish,Chinese": 0.32666666666666666, "Indonesian,Filipino": 0.24, "Indonesian,Chinese": 0.26, "Filipino,Chinese": 0.22 }, "3_combine": { "Malay,English,Vietnamese": 0.06666666666666667, "Malay,English,Spanish": 0.08666666666666667, "Malay,English,Indonesian": 0.08, "Malay,English,Filipino": 0.08, "Malay,English,Chinese": 0.06, "Malay,Vietnamese,Spanish": 0.06666666666666667, "Malay,Vietnamese,Indonesian": 0.08666666666666667, "Malay,Vietnamese,Filipino": 0.04666666666666667, "Malay,Vietnamese,Chinese": 0.04666666666666667, "Malay,Spanish,Indonesian": 0.05333333333333334, "Malay,Spanish,Filipino": 0.05333333333333334, "Malay,Spanish,Chinese": 0.05333333333333334, "Malay,Indonesian,Filipino": 0.06666666666666667, "Malay,Indonesian,Chinese": 0.06666666666666667, "Malay,Filipino,Chinese": 0.05333333333333334, "English,Vietnamese,Spanish": 0.04, "English,Vietnamese,Indonesian": 0.06666666666666667, "English,Vietnamese,Filipino": 0.05333333333333334, "English,Vietnamese,Chinese": 0.06666666666666667, "English,Spanish,Indonesian": 0.05333333333333334, "English,Spanish,Filipino": 0.04666666666666667, "English,Spanish,Chinese": 0.08, "English,Indonesian,Filipino": 0.07333333333333333, "English,Indonesian,Chinese": 0.05333333333333334, "English,Filipino,Chinese": 0.05333333333333334, "Vietnamese,Spanish,Indonesian": 0.07333333333333333, "Vietnamese,Spanish,Filipino": 0.05333333333333334, "Vietnamese,Spanish,Chinese": 0.09333333333333334, "Vietnamese,Indonesian,Filipino": 0.06, "Vietnamese,Indonesian,Chinese": 0.06666666666666667, "Vietnamese,Filipino,Chinese": 0.05333333333333334, "Spanish,Indonesian,Filipino": 0.07333333333333333, "Spanish,Indonesian,Chinese": 0.08, "Spanish,Filipino,Chinese": 0.06, "Indonesian,Filipino,Chinese": 0.06 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.02, "Malay,English,Vietnamese,Indonesian": 0.03333333333333333, "Malay,English,Vietnamese,Filipino": 0.013333333333333334, "Malay,English,Vietnamese,Chinese": 0.006666666666666667, "Malay,English,Spanish,Indonesian": 0.013333333333333334, "Malay,English,Spanish,Filipino": 0.02, "Malay,English,Spanish,Chinese": 0.013333333333333334, "Malay,English,Indonesian,Filipino": 0.02666666666666667, "Malay,English,Indonesian,Chinese": 0.013333333333333334, "Malay,English,Filipino,Chinese": 0.02, "Malay,Vietnamese,Spanish,Indonesian": 0.02, "Malay,Vietnamese,Spanish,Filipino": 0.02666666666666667, "Malay,Vietnamese,Spanish,Chinese": 0.013333333333333334, "Malay,Vietnamese,Indonesian,Filipino": 0.02, "Malay,Vietnamese,Indonesian,Chinese": 0.013333333333333334, "Malay,Vietnamese,Filipino,Chinese": 0.02, "Malay,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,Spanish,Filipino,Chinese": 0.02, "Malay,Indonesian,Filipino,Chinese": 0.013333333333333334, "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, "English,Vietnamese,Spanish,Filipino": 0.013333333333333334, "English,Vietnamese,Spanish,Chinese": 0.013333333333333334, "English,Vietnamese,Indonesian,Filipino": 0.013333333333333334, "English,Vietnamese,Indonesian,Chinese": 0.013333333333333334, "English,Vietnamese,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino": 0.013333333333333334, "English,Spanish,Indonesian,Chinese": 0.02, "English,Spanish,Filipino,Chinese": 0.013333333333333334, "English,Indonesian,Filipino,Chinese": 0.02666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, "Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, "Vietnamese,Spanish,Filipino,Chinese": 0.02, "Vietnamese,Indonesian,Filipino,Chinese": 0.02, "Spanish,Indonesian,Filipino,Chinese": 0.02 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, "Malay,English,Vietnamese,Spanish,Filipino": 0.013333333333333334, "Malay,English,Vietnamese,Spanish,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, "Malay,English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, "English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "English,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.2475860805360873, "AC3_3": 0.10098885258056405, "AC3_4": 0.03303752849663975, "AC3_5": 0.009952199427250329, "AC3_6": 0.0018973791059092432, "AC3_7": 0.0 }, "prompt_4": { "overall_acc": 0.24000000000000002, "language_acc": { "Malay": 0.24666666666666667, "English": 0.24666666666666667, "Vietnamese": 0.25333333333333335, "Spanish": 0.22, "Indonesian": 0.22666666666666666, "Filipino": 0.28, "Chinese": 0.20666666666666667 }, "consistency_score_2": 0.2498412698412698, "consistency_score_3": 0.06876190476190477, "consistency_score_4": 0.02019047619047619, "consistency_score_5": 0.005396825396825397, "consistency_score_6": 0.0009523809523809525, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.26, "Malay,Vietnamese": 0.25333333333333335, "Malay,Spanish": 0.2733333333333333, "Malay,Indonesian": 0.2866666666666667, "Malay,Filipino": 0.32666666666666666, "Malay,Chinese": 0.26, "English,Vietnamese": 0.24666666666666667, "English,Spanish": 0.20666666666666667, "English,Indonesian": 0.26, "English,Filipino": 0.22666666666666666, "English,Chinese": 0.20666666666666667, "Vietnamese,Spanish": 0.3, "Vietnamese,Indonesian": 0.29333333333333333, "Vietnamese,Filipino": 0.28, "Vietnamese,Chinese": 0.23333333333333334, "Spanish,Indonesian": 0.25333333333333335, "Spanish,Filipino": 0.22, "Spanish,Chinese": 0.19333333333333333, "Indonesian,Filipino": 0.28, "Indonesian,Chinese": 0.21333333333333335, "Filipino,Chinese": 0.17333333333333334 }, "3_combine": { "Malay,English,Vietnamese": 0.06, "Malay,English,Spanish": 0.09333333333333334, "Malay,English,Indonesian": 0.10666666666666667, "Malay,English,Filipino": 0.06666666666666667, "Malay,English,Chinese": 0.06, "Malay,Vietnamese,Spanish": 0.08666666666666667, "Malay,Vietnamese,Indonesian": 0.1, "Malay,Vietnamese,Filipino": 0.12666666666666668, "Malay,Vietnamese,Chinese": 0.04, "Malay,Spanish,Indonesian": 0.08666666666666667, "Malay,Spanish,Filipino": 0.08, "Malay,Spanish,Chinese": 0.08, "Malay,Indonesian,Filipino": 0.11333333333333333, "Malay,Indonesian,Chinese": 0.06, "Malay,Filipino,Chinese": 0.04666666666666667, "English,Vietnamese,Spanish": 0.06666666666666667, "English,Vietnamese,Indonesian": 0.08, "English,Vietnamese,Filipino": 0.04, "English,Vietnamese,Chinese": 0.05333333333333334, "English,Spanish,Indonesian": 0.07333333333333333, "English,Spanish,Filipino": 0.05333333333333334, "English,Spanish,Chinese": 0.04, "English,Indonesian,Filipino": 0.06666666666666667, "English,Indonesian,Chinese": 0.04666666666666667, "English,Filipino,Chinese": 0.04, "Vietnamese,Spanish,Indonesian": 0.10666666666666667, "Vietnamese,Spanish,Filipino": 0.1, "Vietnamese,Spanish,Chinese": 0.06, "Vietnamese,Indonesian,Filipino": 0.08, "Vietnamese,Indonesian,Chinese": 0.06666666666666667, "Vietnamese,Filipino,Chinese": 0.04666666666666667, "Spanish,Indonesian,Filipino": 0.06, "Spanish,Indonesian,Chinese": 0.05333333333333334, "Spanish,Filipino,Chinese": 0.02, "Indonesian,Filipino,Chinese": 0.04666666666666667 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.02666666666666667, "Malay,English,Vietnamese,Indonesian": 0.03333333333333333, "Malay,English,Vietnamese,Filipino": 0.02, "Malay,English,Vietnamese,Chinese": 0.006666666666666667, "Malay,English,Spanish,Indonesian": 0.04, "Malay,English,Spanish,Filipino": 0.02, "Malay,English,Spanish,Chinese": 0.02666666666666667, "Malay,English,Indonesian,Filipino": 0.03333333333333333, "Malay,English,Indonesian,Chinese": 0.02666666666666667, "Malay,English,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian": 0.04, "Malay,Vietnamese,Spanish,Filipino": 0.04666666666666667, "Malay,Vietnamese,Spanish,Chinese": 0.02, "Malay,Vietnamese,Indonesian,Filipino": 0.05333333333333334, "Malay,Vietnamese,Indonesian,Chinese": 0.02, "Malay,Vietnamese,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino": 0.03333333333333333, "Malay,Spanish,Indonesian,Chinese": 0.02666666666666667, "Malay,Spanish,Filipino,Chinese": 0.006666666666666667, "Malay,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, "English,Vietnamese,Spanish,Filipino": 0.02666666666666667, "English,Vietnamese,Spanish,Chinese": 0.013333333333333334, "English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, "English,Vietnamese,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino": 0.02, "English,Spanish,Indonesian,Chinese": 0.013333333333333334, "English,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.02, "Vietnamese,Spanish,Indonesian,Chinese": 0.02666666666666667, "Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, "Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, "Spanish,Indonesian,Filipino,Chinese": 0.0 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, "Malay,English,Vietnamese,Spanish,Filipino": 0.013333333333333334, "Malay,English,Vietnamese,Spanish,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, "Malay,English,Vietnamese,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino": 0.013333333333333334, "Malay,English,Spanish,Indonesian,Chinese": 0.013333333333333334, "Malay,English,Spanish,Filipino,Chinese": 0.0, "Malay,English,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.24482177571152372, "AC3_3": 0.10689697713996242, "AC3_4": 0.0372474377602087, "AC3_5": 0.010556274251843174, "AC3_6": 0.0018972332007936386, "AC3_7": 0.0 }, "prompt_5": { "overall_acc": 0.2580952380952381, "language_acc": { "Malay": 0.23333333333333334, "English": 0.26, "Vietnamese": 0.2733333333333333, "Spanish": 0.2733333333333333, "Indonesian": 0.25333333333333335, "Filipino": 0.22666666666666666, "Chinese": 0.2866666666666667 }, "consistency_score_2": 0.24412698412698414, "consistency_score_3": 0.05752380952380952, "consistency_score_4": 0.01314285714285714, "consistency_score_5": 0.0025396825396825397, "consistency_score_6": 0.0, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.22, "Malay,Vietnamese": 0.29333333333333333, "Malay,Spanish": 0.24, "Malay,Indonesian": 0.22, "Malay,Filipino": 0.22666666666666666, "Malay,Chinese": 0.19333333333333333, "English,Vietnamese": 0.2733333333333333, "English,Spanish": 0.2866666666666667, "English,Indonesian": 0.24, "English,Filipino": 0.24, "English,Chinese": 0.19333333333333333, "Vietnamese,Spanish": 0.29333333333333333, "Vietnamese,Indonesian": 0.25333333333333335, "Vietnamese,Filipino": 0.22, "Vietnamese,Chinese": 0.23333333333333334, "Spanish,Indonesian": 0.22, "Spanish,Filipino": 0.25333333333333335, "Spanish,Chinese": 0.26, "Indonesian,Filipino": 0.21333333333333335, "Indonesian,Chinese": 0.31333333333333335, "Filipino,Chinese": 0.24 }, "3_combine": { "Malay,English,Vietnamese": 0.06, "Malay,English,Spanish": 0.06666666666666667, "Malay,English,Indonesian": 0.03333333333333333, "Malay,English,Filipino": 0.06, "Malay,English,Chinese": 0.02666666666666667, "Malay,Vietnamese,Spanish": 0.08666666666666667, "Malay,Vietnamese,Indonesian": 0.05333333333333334, "Malay,Vietnamese,Filipino": 0.05333333333333334, "Malay,Vietnamese,Chinese": 0.05333333333333334, "Malay,Spanish,Indonesian": 0.04, "Malay,Spanish,Filipino": 0.04666666666666667, "Malay,Spanish,Chinese": 0.04666666666666667, "Malay,Indonesian,Filipino": 0.02, "Malay,Indonesian,Chinese": 0.06, "Malay,Filipino,Chinese": 0.04666666666666667, "English,Vietnamese,Spanish": 0.09333333333333334, "English,Vietnamese,Indonesian": 0.07333333333333333, "English,Vietnamese,Filipino": 0.05333333333333334, "English,Vietnamese,Chinese": 0.06666666666666667, "English,Spanish,Indonesian": 0.04666666666666667, "English,Spanish,Filipino": 0.08666666666666667, "English,Spanish,Chinese": 0.06, "English,Indonesian,Filipino": 0.04, "English,Indonesian,Chinese": 0.05333333333333334, "English,Filipino,Chinese": 0.03333333333333333, "Vietnamese,Spanish,Indonesian": 0.05333333333333334, "Vietnamese,Spanish,Filipino": 0.06, "Vietnamese,Spanish,Chinese": 0.07333333333333333, "Vietnamese,Indonesian,Filipino": 0.04666666666666667, "Vietnamese,Indonesian,Chinese": 0.08, "Vietnamese,Filipino,Chinese": 0.04666666666666667, "Spanish,Indonesian,Filipino": 0.06666666666666667, "Spanish,Indonesian,Chinese": 0.06666666666666667, "Spanish,Filipino,Chinese": 0.08666666666666667, "Indonesian,Filipino,Chinese": 0.07333333333333333 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.03333333333333333, "Malay,English,Vietnamese,Indonesian": 0.0, "Malay,English,Vietnamese,Filipino": 0.013333333333333334, "Malay,English,Vietnamese,Chinese": 0.006666666666666667, "Malay,English,Spanish,Indonesian": 0.0, "Malay,English,Spanish,Filipino": 0.02, "Malay,English,Spanish,Chinese": 0.006666666666666667, "Malay,English,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Indonesian,Chinese": 0.0, "Malay,English,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian": 0.013333333333333334, "Malay,Vietnamese,Spanish,Filipino": 0.013333333333333334, "Malay,Vietnamese,Spanish,Chinese": 0.013333333333333334, "Malay,Vietnamese,Indonesian,Filipino": 0.0, "Malay,Vietnamese,Indonesian,Chinese": 0.02, "Malay,Vietnamese,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,Spanish,Indonesian,Chinese": 0.02, "Malay,Spanish,Filipino,Chinese": 0.013333333333333334, "Malay,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian": 0.02, "English,Vietnamese,Spanish,Filipino": 0.02666666666666667, "English,Vietnamese,Spanish,Chinese": 0.02666666666666667, "English,Vietnamese,Indonesian,Filipino": 0.013333333333333334, "English,Vietnamese,Indonesian,Chinese": 0.02, "English,Vietnamese,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Spanish,Indonesian,Chinese": 0.02, "English,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Indonesian,Filipino,Chinese": 0.013333333333333334, "Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, "Vietnamese,Spanish,Indonesian,Chinese": 0.02, "Vietnamese,Spanish,Filipino,Chinese": 0.02, "Vietnamese,Indonesian,Filipino,Chinese": 0.02, "Spanish,Indonesian,Filipino,Chinese": 0.02 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.0, "Malay,English,Vietnamese,Spanish,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Spanish,Chinese": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Spanish,Filipino,Chinese": 0.0, "Malay,English,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, "English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.250916862270188, "AC3_3": 0.09407937462892704, "AC3_4": 0.025012038514053044, "AC3_5": 0.005029870655224603, "AC3_6": 0.0, "AC3_7": 0.0 } }, "cross_logiqa": { "prompt_1": { "overall_acc": 0.24837662337662336, "language_acc": { "English": 0.26136363636363635, "Vietnamese": 0.23295454545454544, "Chinese": 0.26704545454545453, "Indonesian": 0.2727272727272727, "Filipino": 0.23295454545454544, "Spanish": 0.25, "Malay": 0.2215909090909091 }, "consistency_score_2": 0.2462121212121212, "consistency_score_3": 0.06120129870129869, "consistency_score_4": 0.014772727272727267, "consistency_score_5": 0.0032467532467532465, "consistency_score_6": 0.0008116883116883117, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.23295454545454544, "English,Chinese": 0.29545454545454547, "English,Indonesian": 0.2556818181818182, "English,Filipino": 0.26136363636363635, "English,Spanish": 0.23863636363636365, "English,Malay": 0.23863636363636365, "Vietnamese,Chinese": 0.24431818181818182, "Vietnamese,Indonesian": 0.26136363636363635, "Vietnamese,Filipino": 0.23295454545454544, "Vietnamese,Spanish": 0.2215909090909091, "Vietnamese,Malay": 0.21022727272727273, "Chinese,Indonesian": 0.26704545454545453, "Chinese,Filipino": 0.2215909090909091, "Chinese,Spanish": 0.2727272727272727, "Chinese,Malay": 0.26704545454545453, "Indonesian,Filipino": 0.21022727272727273, "Indonesian,Spanish": 0.23863636363636365, "Indonesian,Malay": 0.2727272727272727, "Filipino,Spanish": 0.2159090909090909, "Filipino,Malay": 0.20454545454545456, "Spanish,Malay": 0.3068181818181818 }, "3_combine": { "English,Vietnamese,Chinese": 0.056818181818181816, "English,Vietnamese,Indonesian": 0.03977272727272727, "English,Vietnamese,Filipino": 0.03977272727272727, "English,Vietnamese,Spanish": 0.06818181818181818, "English,Vietnamese,Malay": 0.056818181818181816, "English,Chinese,Indonesian": 0.08522727272727272, "English,Chinese,Filipino": 0.08522727272727272, "English,Chinese,Spanish": 0.07386363636363637, "English,Chinese,Malay": 0.09090909090909091, "English,Indonesian,Filipino": 0.05113636363636364, "English,Indonesian,Spanish": 0.056818181818181816, "English,Indonesian,Malay": 0.06818181818181818, "English,Filipino,Spanish": 0.06818181818181818, "English,Filipino,Malay": 0.03409090909090909, "English,Spanish,Malay": 0.0625, "Vietnamese,Chinese,Indonesian": 0.07954545454545454, "Vietnamese,Chinese,Filipino": 0.03977272727272727, "Vietnamese,Chinese,Spanish": 0.0625, "Vietnamese,Chinese,Malay": 0.06818181818181818, "Vietnamese,Indonesian,Filipino": 0.0625, "Vietnamese,Indonesian,Spanish": 0.0625, "Vietnamese,Indonesian,Malay": 0.06818181818181818, "Vietnamese,Filipino,Spanish": 0.03409090909090909, "Vietnamese,Filipino,Malay": 0.03977272727272727, "Vietnamese,Spanish,Malay": 0.03977272727272727, "Chinese,Indonesian,Filipino": 0.045454545454545456, "Chinese,Indonesian,Spanish": 0.0625, "Chinese,Indonesian,Malay": 0.09090909090909091, "Chinese,Filipino,Spanish": 0.0625, "Chinese,Filipino,Malay": 0.056818181818181816, "Chinese,Spanish,Malay": 0.09090909090909091, "Indonesian,Filipino,Spanish": 0.03409090909090909, "Indonesian,Filipino,Malay": 0.05113636363636364, "Indonesian,Spanish,Malay": 0.07954545454545454, "Filipino,Spanish,Malay": 0.07386363636363637 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.011363636363636364, "English,Vietnamese,Chinese,Filipino": 0.005681818181818182, "English,Vietnamese,Chinese,Spanish": 0.011363636363636364, "English,Vietnamese,Chinese,Malay": 0.005681818181818182, "English,Vietnamese,Indonesian,Filipino": 0.005681818181818182, "English,Vietnamese,Indonesian,Spanish": 0.011363636363636364, "English,Vietnamese,Indonesian,Malay": 0.005681818181818182, "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, "English,Vietnamese,Filipino,Malay": 0.0, "English,Vietnamese,Spanish,Malay": 0.011363636363636364, "English,Chinese,Indonesian,Filipino": 0.011363636363636364, "English,Chinese,Indonesian,Spanish": 0.005681818181818182, "English,Chinese,Indonesian,Malay": 0.028409090909090908, "English,Chinese,Filipino,Spanish": 0.022727272727272728, "English,Chinese,Filipino,Malay": 0.022727272727272728, "English,Chinese,Spanish,Malay": 0.028409090909090908, "English,Indonesian,Filipino,Spanish": 0.005681818181818182, "English,Indonesian,Filipino,Malay": 0.017045454545454544, "English,Indonesian,Spanish,Malay": 0.017045454545454544, "English,Filipino,Spanish,Malay": 0.011363636363636364, "Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, "Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, "Vietnamese,Chinese,Indonesian,Malay": 0.03409090909090909, "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, "Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, "Vietnamese,Indonesian,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, "Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, "Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, "Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, "Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, "Chinese,Filipino,Spanish,Malay": 0.028409090909090908, "Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, "English,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 } }, "AC3_2": 0.2472896359157857, "AC3_3": 0.09820449604901942, "AC3_4": 0.02788682630312594, "AC3_5": 0.006409719310397773, "AC3_6": 0.0016180887509898965, "AC3_7": 0.0 }, "prompt_2": { "overall_acc": 0.2362012987012987, "language_acc": { "English": 0.25, "Vietnamese": 0.1875, "Chinese": 0.23295454545454544, "Indonesian": 0.22727272727272727, "Filipino": 0.2556818181818182, "Spanish": 0.25, "Malay": 0.25 }, "consistency_score_2": 0.24891774891774893, "consistency_score_3": 0.06282467532467532, "consistency_score_4": 0.016396103896103895, "consistency_score_5": 0.004329004329004328, "consistency_score_6": 0.0008116883116883117, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.26704545454545453, "English,Chinese": 0.2784090909090909, "English,Indonesian": 0.21022727272727273, "English,Filipino": 0.26136363636363635, "English,Spanish": 0.25, "English,Malay": 0.23863636363636365, "Vietnamese,Chinese": 0.25, "Vietnamese,Indonesian": 0.21022727272727273, "Vietnamese,Filipino": 0.23295454545454544, "Vietnamese,Spanish": 0.25, "Vietnamese,Malay": 0.26704545454545453, "Chinese,Indonesian": 0.2727272727272727, "Chinese,Filipino": 0.26704545454545453, "Chinese,Spanish": 0.2784090909090909, "Chinese,Malay": 0.20454545454545456, "Indonesian,Filipino": 0.22727272727272727, "Indonesian,Spanish": 0.26704545454545453, "Indonesian,Malay": 0.25, "Filipino,Spanish": 0.24431818181818182, "Filipino,Malay": 0.24431818181818182, "Spanish,Malay": 0.2556818181818182 }, "3_combine": { "English,Vietnamese,Chinese": 0.05113636363636364, "English,Vietnamese,Indonesian": 0.03409090909090909, "English,Vietnamese,Filipino": 0.07954545454545454, "English,Vietnamese,Spanish": 0.07954545454545454, "English,Vietnamese,Malay": 0.056818181818181816, "English,Chinese,Indonesian": 0.09659090909090909, "English,Chinese,Filipino": 0.06818181818181818, "English,Chinese,Spanish": 0.10795454545454546, "English,Chinese,Malay": 0.05113636363636364, "English,Indonesian,Filipino": 0.03409090909090909, "English,Indonesian,Spanish": 0.08522727272727272, "English,Indonesian,Malay": 0.056818181818181816, "English,Filipino,Spanish": 0.056818181818181816, "English,Filipino,Malay": 0.045454545454545456, "English,Spanish,Malay": 0.06818181818181818, "Vietnamese,Chinese,Indonesian": 0.07954545454545454, "Vietnamese,Chinese,Filipino": 0.0625, "Vietnamese,Chinese,Spanish": 0.0625, "Vietnamese,Chinese,Malay": 0.05113636363636364, "Vietnamese,Indonesian,Filipino": 0.03409090909090909, "Vietnamese,Indonesian,Spanish": 0.03977272727272727, "Vietnamese,Indonesian,Malay": 0.056818181818181816, "Vietnamese,Filipino,Spanish": 0.08522727272727272, "Vietnamese,Filipino,Malay": 0.06818181818181818, "Vietnamese,Spanish,Malay": 0.07386363636363637, "Chinese,Indonesian,Filipino": 0.07386363636363637, "Chinese,Indonesian,Spanish": 0.10227272727272728, "Chinese,Indonesian,Malay": 0.03977272727272727, "Chinese,Filipino,Spanish": 0.07954545454545454, "Chinese,Filipino,Malay": 0.06818181818181818, "Chinese,Spanish,Malay": 0.05113636363636364, "Indonesian,Filipino,Spanish": 0.03409090909090909, "Indonesian,Filipino,Malay": 0.045454545454545456, "Indonesian,Spanish,Malay": 0.056818181818181816, "Filipino,Spanish,Malay": 0.0625 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.011363636363636364, "English,Vietnamese,Chinese,Filipino": 0.028409090909090908, "English,Vietnamese,Chinese,Spanish": 0.017045454545454544, "English,Vietnamese,Chinese,Malay": 0.011363636363636364, "English,Vietnamese,Indonesian,Filipino": 0.0, "English,Vietnamese,Indonesian,Spanish": 0.005681818181818182, "English,Vietnamese,Indonesian,Malay": 0.017045454545454544, "English,Vietnamese,Filipino,Spanish": 0.03409090909090909, "English,Vietnamese,Filipino,Malay": 0.022727272727272728, "English,Vietnamese,Spanish,Malay": 0.03409090909090909, "English,Chinese,Indonesian,Filipino": 0.017045454545454544, "English,Chinese,Indonesian,Spanish": 0.056818181818181816, "English,Chinese,Indonesian,Malay": 0.011363636363636364, "English,Chinese,Filipino,Spanish": 0.028409090909090908, "English,Chinese,Filipino,Malay": 0.011363636363636364, "English,Chinese,Spanish,Malay": 0.011363636363636364, "English,Indonesian,Filipino,Spanish": 0.005681818181818182, "English,Indonesian,Filipino,Malay": 0.0, "English,Indonesian,Spanish,Malay": 0.017045454545454544, "English,Filipino,Spanish,Malay": 0.028409090909090908, "Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, "Vietnamese,Chinese,Indonesian,Malay": 0.017045454545454544, "Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, "Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, "Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, "Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, "Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, "Vietnamese,Indonesian,Spanish,Malay": 0.011363636363636364, "Vietnamese,Filipino,Spanish,Malay": 0.03409090909090909, "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, "Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, "Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, "Chinese,Filipino,Spanish,Malay": 0.017045454545454544, "Indonesian,Filipino,Spanish,Malay": 0.0 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, "English,Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Filipino,Spanish,Malay": 0.022727272727272728, "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, "English,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, "English,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "English,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 } }, "AC3_2": 0.2423928552820977, "AC3_3": 0.09925070854162146, "AC3_4": 0.03066366472928889, "AC3_5": 0.008502183976714454, "AC3_6": 0.001617817113709871, "AC3_7": 0.0 }, "prompt_3": { "overall_acc": 0.23782467532467533, "language_acc": { "English": 0.22727272727272727, "Vietnamese": 0.30113636363636365, "Chinese": 0.23295454545454544, "Indonesian": 0.24431818181818182, "Filipino": 0.1590909090909091, "Spanish": 0.25, "Malay": 0.25 }, "consistency_score_2": 0.25135281385281383, "consistency_score_3": 0.060714285714285714, "consistency_score_4": 0.011850649350649344, "consistency_score_5": 0.001352813852813853, "consistency_score_6": 0.0, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.17613636363636365, "English,Chinese": 0.24431818181818182, "English,Indonesian": 0.24431818181818182, "English,Filipino": 0.29545454545454547, "English,Spanish": 0.25, "English,Malay": 0.30113636363636365, "Vietnamese,Chinese": 0.30113636363636365, "Vietnamese,Indonesian": 0.2727272727272727, "Vietnamese,Filipino": 0.19318181818181818, "Vietnamese,Spanish": 0.24431818181818182, "Vietnamese,Malay": 0.30113636363636365, "Chinese,Indonesian": 0.20454545454545456, "Chinese,Filipino": 0.24431818181818182, "Chinese,Spanish": 0.2727272727272727, "Chinese,Malay": 0.2840909090909091, "Indonesian,Filipino": 0.2159090909090909, "Indonesian,Spanish": 0.21022727272727273, "Indonesian,Malay": 0.23863636363636365, "Filipino,Spanish": 0.2840909090909091, "Filipino,Malay": 0.30113636363636365, "Spanish,Malay": 0.19886363636363635 }, "3_combine": { "English,Vietnamese,Chinese": 0.056818181818181816, "English,Vietnamese,Indonesian": 0.028409090909090908, "English,Vietnamese,Filipino": 0.0625, "English,Vietnamese,Spanish": 0.045454545454545456, "English,Vietnamese,Malay": 0.05113636363636364, "English,Chinese,Indonesian": 0.05113636363636364, "English,Chinese,Filipino": 0.07954545454545454, "English,Chinese,Spanish": 0.05113636363636364, "English,Chinese,Malay": 0.06818181818181818, "English,Indonesian,Filipino": 0.045454545454545456, "English,Indonesian,Spanish": 0.05113636363636364, "English,Indonesian,Malay": 0.06818181818181818, "English,Filipino,Spanish": 0.09090909090909091, "English,Filipino,Malay": 0.13068181818181818, "English,Spanish,Malay": 0.0625, "Vietnamese,Chinese,Indonesian": 0.08522727272727272, "Vietnamese,Chinese,Filipino": 0.056818181818181816, "Vietnamese,Chinese,Spanish": 0.06818181818181818, "Vietnamese,Chinese,Malay": 0.10795454545454546, "Vietnamese,Indonesian,Filipino": 0.03977272727272727, "Vietnamese,Indonesian,Spanish": 0.07386363636363637, "Vietnamese,Indonesian,Malay": 0.06818181818181818, "Vietnamese,Filipino,Spanish": 0.03409090909090909, "Vietnamese,Filipino,Malay": 0.0625, "Vietnamese,Spanish,Malay": 0.056818181818181816, "Chinese,Indonesian,Filipino": 0.03409090909090909, "Chinese,Indonesian,Spanish": 0.028409090909090908, "Chinese,Indonesian,Malay": 0.05113636363636364, "Chinese,Filipino,Spanish": 0.06818181818181818, "Chinese,Filipino,Malay": 0.07386363636363637, "Chinese,Spanish,Malay": 0.045454545454545456, "Indonesian,Filipino,Spanish": 0.07386363636363637, "Indonesian,Filipino,Malay": 0.05113636363636364, "Indonesian,Spanish,Malay": 0.028409090909090908, "Filipino,Spanish,Malay": 0.07386363636363637 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.0, "English,Vietnamese,Chinese,Filipino": 0.017045454545454544, "English,Vietnamese,Chinese,Spanish": 0.011363636363636364, "English,Vietnamese,Chinese,Malay": 0.022727272727272728, "English,Vietnamese,Indonesian,Filipino": 0.005681818181818182, "English,Vietnamese,Indonesian,Spanish": 0.011363636363636364, "English,Vietnamese,Indonesian,Malay": 0.0, "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, "English,Vietnamese,Filipino,Malay": 0.017045454545454544, "English,Vietnamese,Spanish,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Filipino": 0.005681818181818182, "English,Chinese,Indonesian,Spanish": 0.005681818181818182, "English,Chinese,Indonesian,Malay": 0.011363636363636364, "English,Chinese,Filipino,Spanish": 0.022727272727272728, "English,Chinese,Filipino,Malay": 0.017045454545454544, "English,Chinese,Spanish,Malay": 0.005681818181818182, "English,Indonesian,Filipino,Spanish": 0.017045454545454544, "English,Indonesian,Filipino,Malay": 0.011363636363636364, "English,Indonesian,Spanish,Malay": 0.005681818181818182, "English,Filipino,Spanish,Malay": 0.03409090909090909, "Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, "Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, "Vietnamese,Chinese,Indonesian,Malay": 0.022727272727272728, "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, "Vietnamese,Chinese,Spanish,Malay": 0.017045454545454544, "Vietnamese,Indonesian,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, "Vietnamese,Indonesian,Spanish,Malay": 0.011363636363636364, "Vietnamese,Filipino,Spanish,Malay": 0.0, "Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, "Chinese,Indonesian,Filipino,Malay": 0.0, "Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, "Chinese,Filipino,Spanish,Malay": 0.011363636363636364, "Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, "English,Vietnamese,Chinese,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "English,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 } }, "AC3_2": 0.24440168510117027, "AC3_3": 0.09673347312847706, "AC3_4": 0.022576334579646005, "AC3_5": 0.0026903243803760293, "AC3_6": 0.0, "AC3_7": 0.0 }, "prompt_4": { "overall_acc": 0.23701298701298704, "language_acc": { "English": 0.2556818181818182, "Vietnamese": 0.2159090909090909, "Chinese": 0.24431818181818182, "Indonesian": 0.23863636363636365, "Filipino": 0.2159090909090909, "Spanish": 0.2215909090909091, "Malay": 0.26704545454545453 }, "consistency_score_2": 0.25243506493506496, "consistency_score_3": 0.0642857142857143, "consistency_score_4": 0.015746753246753245, "consistency_score_5": 0.0027056277056277064, "consistency_score_6": 0.0, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.23295454545454544, "English,Chinese": 0.25, "English,Indonesian": 0.2840909090909091, "English,Filipino": 0.2784090909090909, "English,Spanish": 0.19886363636363635, "English,Malay": 0.2727272727272727, "Vietnamese,Chinese": 0.3125, "Vietnamese,Indonesian": 0.2556818181818182, "Vietnamese,Filipino": 0.26136363636363635, "Vietnamese,Spanish": 0.24431818181818182, "Vietnamese,Malay": 0.20454545454545456, "Chinese,Indonesian": 0.26136363636363635, "Chinese,Filipino": 0.24431818181818182, "Chinese,Spanish": 0.23863636363636365, "Chinese,Malay": 0.24431818181818182, "Indonesian,Filipino": 0.29545454545454547, "Indonesian,Spanish": 0.2556818181818182, "Indonesian,Malay": 0.19886363636363635, "Filipino,Spanish": 0.22727272727272727, "Filipino,Malay": 0.23295454545454544, "Spanish,Malay": 0.3068181818181818 }, "3_combine": { "English,Vietnamese,Chinese": 0.07386363636363637, "English,Vietnamese,Indonesian": 0.07386363636363637, "English,Vietnamese,Filipino": 0.0625, "English,Vietnamese,Spanish": 0.05113636363636364, "English,Vietnamese,Malay": 0.0625, "English,Chinese,Indonesian": 0.0625, "English,Chinese,Filipino": 0.0625, "English,Chinese,Spanish": 0.03977272727272727, "English,Chinese,Malay": 0.06818181818181818, "English,Indonesian,Filipino": 0.09090909090909091, "English,Indonesian,Spanish": 0.05113636363636364, "English,Indonesian,Malay": 0.045454545454545456, "English,Filipino,Spanish": 0.06818181818181818, "English,Filipino,Malay": 0.07386363636363637, "English,Spanish,Malay": 0.09659090909090909, "Vietnamese,Chinese,Indonesian": 0.09659090909090909, "Vietnamese,Chinese,Filipino": 0.07954545454545454, "Vietnamese,Chinese,Spanish": 0.07954545454545454, "Vietnamese,Chinese,Malay": 0.07954545454545454, "Vietnamese,Indonesian,Filipino": 0.09090909090909091, "Vietnamese,Indonesian,Spanish": 0.03409090909090909, "Vietnamese,Indonesian,Malay": 0.028409090909090908, "Vietnamese,Filipino,Spanish": 0.05113636363636364, "Vietnamese,Filipino,Malay": 0.022727272727272728, "Vietnamese,Spanish,Malay": 0.07954545454545454, "Chinese,Indonesian,Filipino": 0.07954545454545454, "Chinese,Indonesian,Spanish": 0.07386363636363637, "Chinese,Indonesian,Malay": 0.0625, "Chinese,Filipino,Spanish": 0.03977272727272727, "Chinese,Filipino,Malay": 0.045454545454545456, "Chinese,Spanish,Malay": 0.08522727272727272, "Indonesian,Filipino,Spanish": 0.056818181818181816, "Indonesian,Filipino,Malay": 0.05113636363636364, "Indonesian,Spanish,Malay": 0.0625, "Filipino,Spanish,Malay": 0.06818181818181818 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.017045454545454544, "English,Vietnamese,Chinese,Filipino": 0.017045454545454544, "English,Vietnamese,Chinese,Spanish": 0.011363636363636364, "English,Vietnamese,Chinese,Malay": 0.028409090909090908, "English,Vietnamese,Indonesian,Filipino": 0.022727272727272728, "English,Vietnamese,Indonesian,Spanish": 0.005681818181818182, "English,Vietnamese,Indonesian,Malay": 0.011363636363636364, "English,Vietnamese,Filipino,Spanish": 0.017045454545454544, "English,Vietnamese,Filipino,Malay": 0.005681818181818182, "English,Vietnamese,Spanish,Malay": 0.022727272727272728, "English,Chinese,Indonesian,Filipino": 0.022727272727272728, "English,Chinese,Indonesian,Spanish": 0.017045454545454544, "English,Chinese,Indonesian,Malay": 0.011363636363636364, "English,Chinese,Filipino,Spanish": 0.011363636363636364, "English,Chinese,Filipino,Malay": 0.005681818181818182, "English,Chinese,Spanish,Malay": 0.017045454545454544, "English,Indonesian,Filipino,Spanish": 0.017045454545454544, "English,Indonesian,Filipino,Malay": 0.017045454545454544, "English,Indonesian,Spanish,Malay": 0.011363636363636364, "English,Filipino,Spanish,Malay": 0.022727272727272728, "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, "Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, "Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, "Vietnamese,Chinese,Filipino,Malay": 0.0, "Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, "Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, "Vietnamese,Indonesian,Filipino,Malay": 0.0, "Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, "Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, "Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, "Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, "Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "Indonesian,Filipino,Spanish,Malay": 0.0 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, "English,Vietnamese,Chinese,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, "English,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, "English,Chinese,Filipino,Spanish,Malay": 0.0, "English,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 } }, "AC3_2": 0.2444810578639434, "AC3_3": 0.10113916252800861, "AC3_4": 0.0295314832770251, "AC3_5": 0.005350180291515028, "AC3_6": 0.0, "AC3_7": 0.0 }, "prompt_5": { "overall_acc": 0.2573051948051948, "language_acc": { "English": 0.3125, "Vietnamese": 0.3068181818181818, "Chinese": 0.22727272727272727, "Indonesian": 0.25, "Filipino": 0.2215909090909091, "Spanish": 0.2159090909090909, "Malay": 0.26704545454545453 }, "consistency_score_2": 0.2489177489177489, "consistency_score_3": 0.06217532467532468, "consistency_score_4": 0.015584415584415577, "consistency_score_5": 0.003517316017316017, "consistency_score_6": 0.0008116883116883117, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.26704545454545453, "English,Chinese": 0.2556818181818182, "English,Indonesian": 0.20454545454545456, "English,Filipino": 0.24431818181818182, "English,Spanish": 0.2159090909090909, "English,Malay": 0.2784090909090909, "Vietnamese,Chinese": 0.25, "Vietnamese,Indonesian": 0.2784090909090909, "Vietnamese,Filipino": 0.23863636363636365, "Vietnamese,Spanish": 0.2840909090909091, "Vietnamese,Malay": 0.22727272727272727, "Chinese,Indonesian": 0.25, "Chinese,Filipino": 0.22727272727272727, "Chinese,Spanish": 0.25, "Chinese,Malay": 0.2840909090909091, "Indonesian,Filipino": 0.20454545454545456, "Indonesian,Spanish": 0.23863636363636365, "Indonesian,Malay": 0.24431818181818182, "Filipino,Spanish": 0.2897727272727273, "Filipino,Malay": 0.21022727272727273, "Spanish,Malay": 0.2840909090909091 }, "3_combine": { "English,Vietnamese,Chinese": 0.0625, "English,Vietnamese,Indonesian": 0.0625, "English,Vietnamese,Filipino": 0.0625, "English,Vietnamese,Spanish": 0.07386363636363637, "English,Vietnamese,Malay": 0.09090909090909091, "English,Chinese,Indonesian": 0.03409090909090909, "English,Chinese,Filipino": 0.0625, "English,Chinese,Spanish": 0.0625, "English,Chinese,Malay": 0.06818181818181818, "English,Indonesian,Filipino": 0.03977272727272727, "English,Indonesian,Spanish": 0.056818181818181816, "English,Indonesian,Malay": 0.06818181818181818, "English,Filipino,Spanish": 0.056818181818181816, "English,Filipino,Malay": 0.056818181818181816, "English,Spanish,Malay": 0.07954545454545454, "Vietnamese,Chinese,Indonesian": 0.06818181818181818, "Vietnamese,Chinese,Filipino": 0.028409090909090908, "Vietnamese,Chinese,Spanish": 0.07386363636363637, "Vietnamese,Chinese,Malay": 0.07386363636363637, "Vietnamese,Indonesian,Filipino": 0.06818181818181818, "Vietnamese,Indonesian,Spanish": 0.07954545454545454, "Vietnamese,Indonesian,Malay": 0.05113636363636364, "Vietnamese,Filipino,Spanish": 0.07386363636363637, "Vietnamese,Filipino,Malay": 0.03409090909090909, "Vietnamese,Spanish,Malay": 0.06818181818181818, "Chinese,Indonesian,Filipino": 0.03977272727272727, "Chinese,Indonesian,Spanish": 0.06818181818181818, "Chinese,Indonesian,Malay": 0.045454545454545456, "Chinese,Filipino,Spanish": 0.07954545454545454, "Chinese,Filipino,Malay": 0.0625, "Chinese,Spanish,Malay": 0.07954545454545454, "Indonesian,Filipino,Spanish": 0.056818181818181816, "Indonesian,Filipino,Malay": 0.05113636363636364, "Indonesian,Spanish,Malay": 0.07386363636363637, "Filipino,Spanish,Malay": 0.0625 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.011363636363636364, "English,Vietnamese,Chinese,Filipino": 0.005681818181818182, "English,Vietnamese,Chinese,Spanish": 0.011363636363636364, "English,Vietnamese,Chinese,Malay": 0.028409090909090908, "English,Vietnamese,Indonesian,Filipino": 0.011363636363636364, "English,Vietnamese,Indonesian,Spanish": 0.017045454545454544, "English,Vietnamese,Indonesian,Malay": 0.028409090909090908, "English,Vietnamese,Filipino,Spanish": 0.017045454545454544, "English,Vietnamese,Filipino,Malay": 0.017045454545454544, "English,Vietnamese,Spanish,Malay": 0.03977272727272727, "English,Chinese,Indonesian,Filipino": 0.005681818181818182, "English,Chinese,Indonesian,Spanish": 0.011363636363636364, "English,Chinese,Indonesian,Malay": 0.0, "English,Chinese,Filipino,Spanish": 0.022727272727272728, "English,Chinese,Filipino,Malay": 0.011363636363636364, "English,Chinese,Spanish,Malay": 0.028409090909090908, "English,Indonesian,Filipino,Spanish": 0.017045454545454544, "English,Indonesian,Filipino,Malay": 0.017045454545454544, "English,Indonesian,Spanish,Malay": 0.028409090909090908, "English,Filipino,Spanish,Malay": 0.011363636363636364, "Vietnamese,Chinese,Indonesian,Filipino": 0.0, "Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, "Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Malay": 0.0, "Vietnamese,Chinese,Spanish,Malay": 0.017045454545454544, "Vietnamese,Indonesian,Filipino,Spanish": 0.022727272727272728, "Vietnamese,Indonesian,Filipino,Malay": 0.017045454545454544, "Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, "Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, "Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, "Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, "Chinese,Indonesian,Spanish,Malay": 0.0, "Chinese,Filipino,Spanish,Malay": 0.028409090909090908, "Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Spanish,Malay": 0.011363636363636364, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, "English,Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, "English,Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, "English,Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, "English,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "English,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 } }, "AC3_2": 0.25304198735864997, "AC3_3": 0.1001502942881538, "AC3_4": 0.029388814624921992, "AC3_5": 0.006939766662209674, "AC3_6": 0.0016182716648145236, "AC3_7": 0.0 } }, "sg_eval": { "prompt_1": { "accuracy": 0.3106796116504854 }, "prompt_2": { "accuracy": 0.3592233009708738 }, "prompt_3": { "accuracy": 0.2815533980582524 }, "prompt_4": { "accuracy": 0.22330097087378642 }, "prompt_5": { "accuracy": 0.2815533980582524 } }, "cn_eval": { "prompt_1": { "accuracy": 0.21904761904761905 }, "prompt_2": { "accuracy": 0.20952380952380953 }, "prompt_3": { "accuracy": 0.22857142857142856 }, "prompt_4": { "accuracy": 0.26666666666666666 }, "prompt_5": { "accuracy": 0.2571428571428571 } }, "us_eval": { "prompt_1": { "accuracy": 0.2523364485981308 }, "prompt_2": { "accuracy": 0.14018691588785046 }, "prompt_3": { "accuracy": 0.205607476635514 }, "prompt_4": { "accuracy": 0.308411214953271 }, "prompt_5": { "accuracy": 0.19626168224299065 } }, "ph_eval": { "prompt_1": { "accuracy": 0.27, "category_acc": { "brand": 0.4, "demographics": 0.4, "biology": 0.2, "history": 0.3333333333333333, "literature": 0.2, "politics": 0.2, "culture": 0.3, "film": 0.4, "law": 0.2, "geography": 0.1 } }, "prompt_2": { "accuracy": 0.22, "category_acc": { "brand": 0.4, "demographics": 0.2, "biology": 0.1, "history": 0.2, "literature": 0.4, "politics": 0.1, "culture": 0.2, "film": 0.4, "law": 0.1, "geography": 0.1 } }, "prompt_3": { "accuracy": 0.19, "category_acc": { "brand": 0.4, "demographics": 0.2, "biology": 0.1, "history": 0.26666666666666666, "literature": 0.1, "politics": 0.2, "culture": 0.2, "film": 0.1, "law": 0.1, "geography": 0.2 } }, "prompt_4": { "accuracy": 0.29, "category_acc": { "brand": 0.5, "demographics": 0.2, "biology": 0.2, "history": 0.26666666666666666, "literature": 0.3, "politics": 0.2, "culture": 0.3, "film": 0.5, "law": 0.2, "geography": 0.2 } }, "prompt_5": { "accuracy": 0.24, "category_acc": { "brand": 0.1, "demographics": 0.2, "biology": 0.5, "history": 0.3333333333333333, "literature": 0.3, "politics": 0.5, "culture": 0.1, "film": 0.2, "law": 0.1, "geography": 0.0 } } }, "sing2eng": { "prompt_1": { "bleu_score": 0.008508397135511301 }, "prompt_2": { "bleu_score": 0.008508397135511301 }, "prompt_3": { "bleu_score": 0.008508397135511301 }, "prompt_4": { "bleu_score": 0.008508397135511301 }, "prompt_5": { "bleu_score": 0.008508397135511301 } }, "flores_ind2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 }, "prompt_2": { "bleu_score": 0.008557988557763307 }, "prompt_3": { "bleu_score": 0.008557988557763307 }, "prompt_4": { "bleu_score": 0.008557988557763307 }, "prompt_5": { "bleu_score": 0.008557988557763307 } }, "flores_vie2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 }, "prompt_2": { "bleu_score": 0.008557988557763307 }, "prompt_3": { "bleu_score": 0.008557988557763307 }, "prompt_4": { "bleu_score": 0.008557988557763307 }, "prompt_5": { "bleu_score": 0.008557988557763307 } }, "flores_zho2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 }, "prompt_2": { "bleu_score": 0.008557988557763307 }, "prompt_3": { "bleu_score": 0.008557988557763307 }, "prompt_4": { "bleu_score": 0.008557988557763307 }, "prompt_5": { "bleu_score": 0.008557988557763307 } }, "flores_zsm2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 }, "prompt_2": { "bleu_score": 0.008557988557763307 }, "prompt_3": { "bleu_score": 0.008557988557763307 }, "prompt_4": { "bleu_score": 0.008557988557763307 }, "prompt_5": { "bleu_score": 0.008557988557763307 } }, "mmlu": { "prompt_1": { "accuracy": 0.26487747957992996 }, "prompt_2": { "accuracy": 0.24970828471411902 }, "prompt_3": { "accuracy": 0.25670945157526254 }, "prompt_4": { "accuracy": 0.2613768961493582 }, "prompt_5": { "accuracy": 0.2532088681446908 } }, "mmlu_full": { "prompt_1": { "accuracy": 0.24590632820879513, "category_acc": { "high_school_european_history": 0.25609756097560976, "business_ethics": 0.31313131313131315, "clinical_knowledge": 0.24621212121212122, "medical_genetics": 0.1919191919191919, "high_school_us_history": 0.19704433497536947, "high_school_physics": 0.23333333333333334, "high_school_world_history": 0.2711864406779661, "virology": 0.21818181818181817, "high_school_microeconomics": 0.25316455696202533, "econometrics": 0.23008849557522124, "college_computer_science": 0.2727272727272727, "high_school_biology": 0.255663430420712, "abstract_algebra": 0.23232323232323232, "professional_accounting": 0.2526690391459075, "philosophy": 0.1935483870967742, "professional_medicine": 0.28413284132841327, "nutrition": 0.25573770491803277, "global_facts": 0.18181818181818182, "machine_learning": 0.16216216216216217, "security_studies": 0.26639344262295084, "public_relations": 0.25688073394495414, "professional_psychology": 0.23731587561374795, "prehistory": 0.21052631578947367, "anatomy": 0.23134328358208955, "human_sexuality": 0.25384615384615383, "college_medicine": 0.2558139534883721, "high_school_government_and_politics": 0.2552083333333333, "college_chemistry": 0.24242424242424243, "logical_fallacies": 0.2345679012345679, "high_school_geography": 0.23857868020304568, "elementary_mathematics": 0.23872679045092837, "human_aging": 0.22522522522522523, "college_mathematics": 0.1717171717171717, "high_school_psychology": 0.23345588235294118, "formal_logic": 0.24, "high_school_statistics": 0.2651162790697674, "international_law": 0.18333333333333332, "high_school_mathematics": 0.3048327137546468, "high_school_computer_science": 0.26262626262626265, "conceptual_physics": 0.1794871794871795, "miscellaneous": 0.23145780051150894, "high_school_chemistry": 0.2871287128712871, "marketing": 0.26180257510729615, "professional_law": 0.24266144814090018, "management": 0.23529411764705882, "college_physics": 0.24752475247524752, "jurisprudence": 0.2803738317757009, "world_religions": 0.21176470588235294, "sociology": 0.27, "us_foreign_policy": 0.32323232323232326, "high_school_macroeconomics": 0.2982005141388175, "computer_security": 0.2222222222222222, "moral_scenarios": 0.27293064876957496, "moral_disputes": 0.2753623188405797, "electrical_engineering": 0.2708333333333333, "astronomy": 0.19205298013245034, "college_biology": 0.25874125874125875 } }, "prompt_2": { "accuracy": 0.2462638541294244, "category_acc": { "high_school_european_history": 0.2682926829268293, "business_ethics": 0.1919191919191919, "clinical_knowledge": 0.2196969696969697, "medical_genetics": 0.32323232323232326, "high_school_us_history": 0.2413793103448276, "high_school_physics": 0.2733333333333333, "high_school_world_history": 0.2288135593220339, "virology": 0.21818181818181817, "high_school_microeconomics": 0.23628691983122363, "econometrics": 0.18584070796460178, "college_computer_science": 0.18181818181818182, "high_school_biology": 0.24271844660194175, "abstract_algebra": 0.2828282828282828, "professional_accounting": 0.2669039145907473, "philosophy": 0.24838709677419354, "professional_medicine": 0.30996309963099633, "nutrition": 0.23934426229508196, "global_facts": 0.24242424242424243, "machine_learning": 0.2882882882882883, "security_studies": 0.21721311475409835, "public_relations": 0.21100917431192662, "professional_psychology": 0.2635024549918167, "prehistory": 0.24458204334365324, "anatomy": 0.3208955223880597, "human_sexuality": 0.2076923076923077, "college_medicine": 0.23255813953488372, "high_school_government_and_politics": 0.21875, "college_chemistry": 0.1717171717171717, "logical_fallacies": 0.19753086419753085, "high_school_geography": 0.2233502538071066, "elementary_mathematics": 0.23607427055702918, "human_aging": 0.22972972972972974, "college_mathematics": 0.2222222222222222, "high_school_psychology": 0.2536764705882353, "formal_logic": 0.224, "high_school_statistics": 0.2, "international_law": 0.20833333333333334, "high_school_mathematics": 0.23048327137546468, "high_school_computer_science": 0.31313131313131315, "conceptual_physics": 0.32051282051282054, "miscellaneous": 0.24808184143222506, "high_school_chemistry": 0.2623762376237624, "marketing": 0.2875536480686695, "professional_law": 0.25244618395303325, "management": 0.3333333333333333, "college_physics": 0.2079207920792079, "jurisprudence": 0.19626168224299065, "world_religions": 0.24705882352941178, "sociology": 0.23, "us_foreign_policy": 0.24242424242424243, "high_school_macroeconomics": 0.23393316195372751, "computer_security": 0.24242424242424243, "moral_scenarios": 0.26174496644295303, "moral_disputes": 0.22318840579710145, "electrical_engineering": 0.2569444444444444, "astronomy": 0.24503311258278146, "college_biology": 0.23776223776223776 } }, "prompt_3": { "accuracy": 0.25048265999284947, "category_acc": { "high_school_european_history": 0.2621951219512195, "business_ethics": 0.2828282828282828, "clinical_knowledge": 0.2689393939393939, "medical_genetics": 0.25252525252525254, "high_school_us_history": 0.2512315270935961, "high_school_physics": 0.26666666666666666, "high_school_world_history": 0.24152542372881355, "virology": 0.2545454545454545, "high_school_microeconomics": 0.25738396624472576, "econometrics": 0.21238938053097345, "college_computer_science": 0.24242424242424243, "high_school_biology": 0.26537216828478966, "abstract_algebra": 0.2222222222222222, "professional_accounting": 0.2277580071174377, "philosophy": 0.23870967741935484, "professional_medicine": 0.2730627306273063, "nutrition": 0.25245901639344265, "global_facts": 0.23232323232323232, "machine_learning": 0.22522522522522523, "security_studies": 0.25, "public_relations": 0.1926605504587156, "professional_psychology": 0.2520458265139116, "prehistory": 0.23529411764705882, "anatomy": 0.30597014925373134, "human_sexuality": 0.2692307692307692, "college_medicine": 0.23255813953488372, "high_school_government_and_politics": 0.22916666666666666, "college_chemistry": 0.23232323232323232, "logical_fallacies": 0.2839506172839506, "high_school_geography": 0.26903553299492383, "elementary_mathematics": 0.2625994694960212, "human_aging": 0.22972972972972974, "college_mathematics": 0.2828282828282828, "high_school_psychology": 0.2665441176470588, "formal_logic": 0.192, "high_school_statistics": 0.2744186046511628, "international_law": 0.3, "high_school_mathematics": 0.24535315985130113, "high_school_computer_science": 0.2727272727272727, "conceptual_physics": 0.2564102564102564, "miscellaneous": 0.22762148337595908, "high_school_chemistry": 0.2524752475247525, "marketing": 0.31759656652360513, "professional_law": 0.2491846053489889, "management": 0.2549019607843137, "college_physics": 0.26732673267326734, "jurisprudence": 0.1588785046728972, "world_religions": 0.18235294117647058, "sociology": 0.275, "us_foreign_policy": 0.20202020202020202, "high_school_macroeconomics": 0.2185089974293059, "computer_security": 0.18181818181818182, "moral_scenarios": 0.26174496644295303, "moral_disputes": 0.2753623188405797, "electrical_engineering": 0.25, "astronomy": 0.31125827814569534, "college_biology": 0.21678321678321677 } }, "prompt_4": { "accuracy": 0.254486950303897, "category_acc": { "high_school_european_history": 0.3475609756097561, "business_ethics": 0.20202020202020202, "clinical_knowledge": 0.25, "medical_genetics": 0.2828282828282828, "high_school_us_history": 0.2561576354679803, "high_school_physics": 0.24666666666666667, "high_school_world_history": 0.2838983050847458, "virology": 0.3090909090909091, "high_school_microeconomics": 0.24472573839662448, "econometrics": 0.19469026548672566, "college_computer_science": 0.30303030303030304, "high_school_biology": 0.255663430420712, "abstract_algebra": 0.25252525252525254, "professional_accounting": 0.23843416370106763, "philosophy": 0.267741935483871, "professional_medicine": 0.2140221402214022, "nutrition": 0.23934426229508196, "global_facts": 0.25252525252525254, "machine_learning": 0.24324324324324326, "security_studies": 0.20081967213114754, "public_relations": 0.21100917431192662, "professional_psychology": 0.265139116202946, "prehistory": 0.30340557275541796, "anatomy": 0.2462686567164179, "human_sexuality": 0.27692307692307694, "college_medicine": 0.27906976744186046, "high_school_government_and_politics": 0.23958333333333334, "college_chemistry": 0.26262626262626265, "logical_fallacies": 0.24074074074074073, "high_school_geography": 0.3096446700507614, "elementary_mathematics": 0.246684350132626, "human_aging": 0.23873873873873874, "college_mathematics": 0.21212121212121213, "high_school_psychology": 0.2702205882352941, "formal_logic": 0.304, "high_school_statistics": 0.22790697674418606, "international_law": 0.25, "high_school_mathematics": 0.2527881040892193, "high_school_computer_science": 0.1919191919191919, "conceptual_physics": 0.23504273504273504, "miscellaneous": 0.24552429667519182, "high_school_chemistry": 0.2524752475247525, "marketing": 0.2703862660944206, "professional_law": 0.24983692106979777, "management": 0.27450980392156865, "college_physics": 0.3069306930693069, "jurisprudence": 0.2336448598130841, "world_religions": 0.22941176470588234, "sociology": 0.215, "us_foreign_policy": 0.23232323232323232, "high_school_macroeconomics": 0.2159383033419023, "computer_security": 0.25252525252525254, "moral_scenarios": 0.2639821029082774, "moral_disputes": 0.3188405797101449, "electrical_engineering": 0.2708333333333333, "astronomy": 0.25165562913907286, "college_biology": 0.2097902097902098 } }, "prompt_5": { "accuracy": 0.25176975330711476, "category_acc": { "high_school_european_history": 0.2804878048780488, "business_ethics": 0.21212121212121213, "clinical_knowledge": 0.24621212121212122, "medical_genetics": 0.2222222222222222, "high_school_us_history": 0.23645320197044334, "high_school_physics": 0.21333333333333335, "high_school_world_history": 0.2669491525423729, "virology": 0.26666666666666666, "high_school_microeconomics": 0.25738396624472576, "econometrics": 0.23893805309734514, "college_computer_science": 0.2727272727272727, "high_school_biology": 0.26537216828478966, "abstract_algebra": 0.25252525252525254, "professional_accounting": 0.28825622775800713, "philosophy": 0.24516129032258063, "professional_medicine": 0.23247232472324722, "nutrition": 0.26229508196721313, "global_facts": 0.2828282828282828, "machine_learning": 0.27927927927927926, "security_studies": 0.2459016393442623, "public_relations": 0.3394495412844037, "professional_psychology": 0.2553191489361702, "prehistory": 0.2724458204334365, "anatomy": 0.2462686567164179, "human_sexuality": 0.26153846153846155, "college_medicine": 0.27325581395348836, "high_school_government_and_politics": 0.3072916666666667, "college_chemistry": 0.21212121212121213, "logical_fallacies": 0.29012345679012347, "high_school_geography": 0.2893401015228426, "elementary_mathematics": 0.22811671087533156, "human_aging": 0.2702702702702703, "college_mathematics": 0.2727272727272727, "high_school_psychology": 0.23897058823529413, "formal_logic": 0.24, "high_school_statistics": 0.2372093023255814, "international_law": 0.225, "high_school_mathematics": 0.32342007434944237, "high_school_computer_science": 0.21212121212121213, "conceptual_physics": 0.28205128205128205, "miscellaneous": 0.21994884910485935, "high_school_chemistry": 0.2376237623762376, "marketing": 0.24034334763948498, "professional_law": 0.24331376386170905, "management": 0.23529411764705882, "college_physics": 0.27722772277227725, "jurisprudence": 0.2616822429906542, "world_religions": 0.3058823529411765, "sociology": 0.255, "us_foreign_policy": 0.18181818181818182, "high_school_macroeconomics": 0.2467866323907455, "computer_security": 0.20202020202020202, "moral_scenarios": 0.2360178970917226, "moral_disputes": 0.27246376811594203, "electrical_engineering": 0.24305555555555555, "astronomy": 0.2119205298013245, "college_biology": 0.25874125874125875 } } }, "c_eval": { "prompt_1": { "accuracy": 0.24219910846953938 }, "prompt_2": { "accuracy": 0.24219910846953938 }, "prompt_3": { "accuracy": 0.25482912332838037 }, "prompt_4": { "accuracy": 0.2288261515601783 }, "prompt_5": { "accuracy": 0.2563150074294205 } }, "c_eval_full": { "prompt_1": { "accuracy": 0.24719800747198006, "category_acc": { "computer_network": 0.20833333333333334, "operating_system": 0.2916666666666667, "computer_architecture": 0.3076923076923077, "college_programming": 0.35714285714285715, "college_physics": 0.125, "college_chemistry": 0.27586206896551724, "advanced_mathematics": 0.2916666666666667, "probability_and_statistics": 0.30434782608695654, "discrete_mathematics": 0.3333333333333333, "electrical_engineer": 0.21428571428571427, "metrology_engineer": 0.10344827586206896, "high_school_mathematics": 0.30434782608695654, "high_school_physics": 0.4583333333333333, "high_school_chemistry": 0.25, "high_school_biology": 0.20833333333333334, "middle_school_mathematics": 0.20833333333333334, "middle_school_biology": 0.3076923076923077, "middle_school_physics": 0.20833333333333334, "middle_school_chemistry": 0.12, "veterinary_medicine": 0.21428571428571427, "college_economics": 0.26666666666666666, "business_administration": 0.21052631578947367, "marxism": 0.25, "mao_zedong_thought": 0.2413793103448276, "education_science": 0.20588235294117646, "teacher_qualification": 0.24489795918367346, "high_school_politics": 0.041666666666666664, "high_school_geography": 0.20833333333333334, "middle_school_politics": 0.23076923076923078, "middle_school_geography": 0.29411764705882354, "modern_chinese_history": 0.03571428571428571, "ideological_and_moral_cultivation": 0.16666666666666666, "logic": 0.4444444444444444, "law": 0.20689655172413793, "chinese_language_and_literature": 0.32142857142857145, "art_studies": 0.21052631578947367, "professional_tour_guide": 0.29411764705882354, "legal_professional": 0.21428571428571427, "high_school_chinese": 0.20833333333333334, "high_school_history": 0.08, "middle_school_history": 0.1111111111111111, "civil_servant": 0.34615384615384615, "sports_science": 0.20833333333333334, "plant_protection": 0.2222222222222222, "basic_medicine": 0.2916666666666667, "clinical_medicine": 0.25925925925925924, "urban_and_rural_planner": 0.29411764705882354, "accountant": 0.2962962962962963, "fire_engineer": 0.3611111111111111, "environmental_impact_assessment_engineer": 0.2222222222222222, "tax_accountant": 0.3148148148148148, "physician": 0.2037037037037037 } }, "prompt_2": { "accuracy": 0.24844333748443337, "category_acc": { "computer_network": 0.25, "operating_system": 0.4166666666666667, "computer_architecture": 0.4230769230769231, "college_programming": 0.30952380952380953, "college_physics": 0.3333333333333333, "college_chemistry": 0.3103448275862069, "advanced_mathematics": 0.16666666666666666, "probability_and_statistics": 0.2608695652173913, "discrete_mathematics": 0.19047619047619047, "electrical_engineer": 0.3333333333333333, "metrology_engineer": 0.20689655172413793, "high_school_mathematics": 0.21739130434782608, "high_school_physics": 0.20833333333333334, "high_school_chemistry": 0.25, "high_school_biology": 0.25, "middle_school_mathematics": 0.16666666666666666, "middle_school_biology": 0.2692307692307692, "middle_school_physics": 0.25, "middle_school_chemistry": 0.12, "veterinary_medicine": 0.14285714285714285, "college_economics": 0.31666666666666665, "business_administration": 0.21052631578947367, "marxism": 0.20833333333333334, "mao_zedong_thought": 0.3103448275862069, "education_science": 0.4117647058823529, "teacher_qualification": 0.30612244897959184, "high_school_politics": 0.20833333333333334, "high_school_geography": 0.16666666666666666, "middle_school_politics": 0.11538461538461539, "middle_school_geography": 0.35294117647058826, "modern_chinese_history": 0.2857142857142857, "ideological_and_moral_cultivation": 0.5, "logic": 0.18518518518518517, "law": 0.13793103448275862, "chinese_language_and_literature": 0.35714285714285715, "art_studies": 0.21052631578947367, "professional_tour_guide": 0.14705882352941177, "legal_professional": 0.17857142857142858, "high_school_chinese": 0.16666666666666666, "high_school_history": 0.24, "middle_school_history": 0.18518518518518517, "civil_servant": 0.19230769230769232, "sports_science": 0.20833333333333334, "plant_protection": 0.14814814814814814, "basic_medicine": 0.25, "clinical_medicine": 0.2962962962962963, "urban_and_rural_planner": 0.3333333333333333, "accountant": 0.3148148148148148, "fire_engineer": 0.1388888888888889, "environmental_impact_assessment_engineer": 0.19444444444444445, "tax_accountant": 0.25925925925925924, "physician": 0.16666666666666666 } }, "prompt_3": { "accuracy": 0.2559153175591532, "category_acc": { "computer_network": 0.125, "operating_system": 0.25, "computer_architecture": 0.3076923076923077, "college_programming": 0.16666666666666666, "college_physics": 0.3333333333333333, "college_chemistry": 0.27586206896551724, "advanced_mathematics": 0.25, "probability_and_statistics": 0.30434782608695654, "discrete_mathematics": 0.09523809523809523, "electrical_engineer": 0.2619047619047619, "metrology_engineer": 0.3793103448275862, "high_school_mathematics": 0.30434782608695654, "high_school_physics": 0.375, "high_school_chemistry": 0.25, "high_school_biology": 0.16666666666666666, "middle_school_mathematics": 0.3333333333333333, "middle_school_biology": 0.23076923076923078, "middle_school_physics": 0.25, "middle_school_chemistry": 0.24, "veterinary_medicine": 0.2857142857142857, "college_economics": 0.2833333333333333, "business_administration": 0.21052631578947367, "marxism": 0.25, "mao_zedong_thought": 0.3793103448275862, "education_science": 0.29411764705882354, "teacher_qualification": 0.22448979591836735, "high_school_politics": 0.2916666666666667, "high_school_geography": 0.16666666666666666, "middle_school_politics": 0.2692307692307692, "middle_school_geography": 0.23529411764705882, "modern_chinese_history": 0.17857142857142858, "ideological_and_moral_cultivation": 0.3333333333333333, "logic": 0.3333333333333333, "law": 0.4482758620689655, "chinese_language_and_literature": 0.42857142857142855, "art_studies": 0.21052631578947367, "professional_tour_guide": 0.11764705882352941, "legal_professional": 0.2857142857142857, "high_school_chinese": 0.20833333333333334, "high_school_history": 0.32, "middle_school_history": 0.2962962962962963, "civil_servant": 0.21153846153846154, "sports_science": 0.20833333333333334, "plant_protection": 0.2222222222222222, "basic_medicine": 0.375, "clinical_medicine": 0.18518518518518517, "urban_and_rural_planner": 0.17647058823529413, "accountant": 0.16666666666666666, "fire_engineer": 0.2222222222222222, "environmental_impact_assessment_engineer": 0.2777777777777778, "tax_accountant": 0.25925925925925924, "physician": 0.2777777777777778 } }, "prompt_4": { "accuracy": 0.24782067247820672, "category_acc": { "computer_network": 0.25, "operating_system": 0.25, "computer_architecture": 0.4230769230769231, "college_programming": 0.30952380952380953, "college_physics": 0.20833333333333334, "college_chemistry": 0.20689655172413793, "advanced_mathematics": 0.2916666666666667, "probability_and_statistics": 0.13043478260869565, "discrete_mathematics": 0.23809523809523808, "electrical_engineer": 0.30952380952380953, "metrology_engineer": 0.10344827586206896, "high_school_mathematics": 0.17391304347826086, "high_school_physics": 0.20833333333333334, "high_school_chemistry": 0.25, "high_school_biology": 0.3333333333333333, "middle_school_mathematics": 0.4166666666666667, "middle_school_biology": 0.38461538461538464, "middle_school_physics": 0.25, "middle_school_chemistry": 0.24, "veterinary_medicine": 0.25, "college_economics": 0.23333333333333334, "business_administration": 0.2631578947368421, "marxism": 0.25, "mao_zedong_thought": 0.1724137931034483, "education_science": 0.2647058823529412, "teacher_qualification": 0.2653061224489796, "high_school_politics": 0.2916666666666667, "high_school_geography": 0.125, "middle_school_politics": 0.23076923076923078, "middle_school_geography": 0.23529411764705882, "modern_chinese_history": 0.14285714285714285, "ideological_and_moral_cultivation": 0.4166666666666667, "logic": 0.3333333333333333, "law": 0.20689655172413793, "chinese_language_and_literature": 0.14285714285714285, "art_studies": 0.21052631578947367, "professional_tour_guide": 0.2647058823529412, "legal_professional": 0.2857142857142857, "high_school_chinese": 0.041666666666666664, "high_school_history": 0.16, "middle_school_history": 0.18518518518518517, "civil_servant": 0.2692307692307692, "sports_science": 0.3333333333333333, "plant_protection": 0.18518518518518517, "basic_medicine": 0.3333333333333333, "clinical_medicine": 0.18518518518518517, "urban_and_rural_planner": 0.23529411764705882, "accountant": 0.14814814814814814, "fire_engineer": 0.3333333333333333, "environmental_impact_assessment_engineer": 0.3333333333333333, "tax_accountant": 0.2962962962962963, "physician": 0.24074074074074073 } }, "prompt_5": { "accuracy": 0.24906600249066002, "category_acc": { "computer_network": 0.16666666666666666, "operating_system": 0.3333333333333333, "computer_architecture": 0.4230769230769231, "college_programming": 0.23809523809523808, "college_physics": 0.125, "college_chemistry": 0.20689655172413793, "advanced_mathematics": 0.08333333333333333, "probability_and_statistics": 0.30434782608695654, "discrete_mathematics": 0.38095238095238093, "electrical_engineer": 0.16666666666666666, "metrology_engineer": 0.20689655172413793, "high_school_mathematics": 0.13043478260869565, "high_school_physics": 0.3333333333333333, "high_school_chemistry": 0.16666666666666666, "high_school_biology": 0.2916666666666667, "middle_school_mathematics": 0.16666666666666666, "middle_school_biology": 0.3076923076923077, "middle_school_physics": 0.4583333333333333, "middle_school_chemistry": 0.24, "veterinary_medicine": 0.25, "college_economics": 0.3, "business_administration": 0.2894736842105263, "marxism": 0.125, "mao_zedong_thought": 0.2413793103448276, "education_science": 0.17647058823529413, "teacher_qualification": 0.2857142857142857, "high_school_politics": 0.16666666666666666, "high_school_geography": 0.25, "middle_school_politics": 0.23076923076923078, "middle_school_geography": 0.35294117647058826, "modern_chinese_history": 0.39285714285714285, "ideological_and_moral_cultivation": 0.25, "logic": 0.37037037037037035, "law": 0.27586206896551724, "chinese_language_and_literature": 0.21428571428571427, "art_studies": 0.3157894736842105, "professional_tour_guide": 0.2647058823529412, "legal_professional": 0.2857142857142857, "high_school_chinese": 0.20833333333333334, "high_school_history": 0.16, "middle_school_history": 0.3333333333333333, "civil_servant": 0.11538461538461539, "sports_science": 0.20833333333333334, "plant_protection": 0.2962962962962963, "basic_medicine": 0.16666666666666666, "clinical_medicine": 0.37037037037037035, "urban_and_rural_planner": 0.2549019607843137, "accountant": 0.24074074074074073, "fire_engineer": 0.25, "environmental_impact_assessment_engineer": 0.3055555555555556, "tax_accountant": 0.16666666666666666, "physician": 0.24074074074074073 } } }, "cmmlu": { "prompt_1": { "accuracy": 0.25089605734767023 }, "prompt_2": { "accuracy": 0.2867383512544803 }, "prompt_3": { "accuracy": 0.26523297491039427 }, "prompt_4": { "accuracy": 0.3046594982078853 }, "prompt_5": { "accuracy": 0.26523297491039427 } }, "cmmlu_full": { "prompt_1": { "accuracy": 0.2528060783975134, "category_acc": { "agronomy": 0.2781065088757396, "anatomy": 0.24324324324324326, "ancient_chinese": 0.21951219512195122, "arts": 0.2375, "astronomy": 0.22424242424242424, "business_ethics": 0.20095693779904306, "chinese_civil_service_exam": 0.24375, "chinese_driving_rule": 0.2595419847328244, "chinese_food_culture": 0.25, "chinese_foreign_policy": 0.2803738317757009, "chinese_history": 0.26625386996904027, "chinese_literature": 0.2549019607843137, "chinese_teacher_qualification": 0.2122905027932961, "clinical_knowledge": 0.26582278481012656, "college_actuarial_science": 0.20754716981132076, "college_education": 0.2336448598130841, "college_engineering_hydrology": 0.2358490566037736, "college_law": 0.2037037037037037, "college_mathematics": 0.2857142857142857, "college_medical_statistics": 0.2641509433962264, "college_medicine": 0.2087912087912088, "computer_science": 0.21568627450980393, "computer_security": 0.29239766081871343, "conceptual_physics": 0.23129251700680273, "construction_project_management": 0.2805755395683453, "economics": 0.2578616352201258, "education": 0.25153374233128833, "electrical_engineering": 0.23837209302325582, "elementary_chinese": 0.24206349206349206, "elementary_commonsense": 0.30303030303030304, "elementary_information_and_technology": 0.25210084033613445, "elementary_mathematics": 0.25217391304347825, "ethnology": 0.35555555555555557, "food_science": 0.3356643356643357, "genetics": 0.23863636363636365, "global_facts": 0.2214765100671141, "high_school_biology": 0.26627218934911245, "high_school_chemistry": 0.22727272727272727, "high_school_geography": 0.2711864406779661, "high_school_mathematics": 0.22560975609756098, "high_school_physics": 0.24545454545454545, "high_school_politics": 0.1888111888111888, "human_sexuality": 0.20634920634920634, "international_law": 0.22702702702702704, "journalism": 0.23255813953488372, "jurisprudence": 0.25547445255474455, "legal_and_moral_basis": 0.24299065420560748, "logical": 0.22764227642276422, "machine_learning": 0.2540983606557377, "management": 0.2619047619047619, "marketing": 0.22777777777777777, "marxist_theory": 0.23809523809523808, "modern_chinese": 0.21551724137931033, "nutrition": 0.25517241379310346, "philosophy": 0.3333333333333333, "professional_accounting": 0.2342857142857143, "professional_law": 0.24170616113744076, "professional_medicine": 0.29521276595744683, "professional_psychology": 0.2672413793103448, "public_relations": 0.3275862068965517, "security_study": 0.3333333333333333, "sociology": 0.26991150442477874, "sports_science": 0.2727272727272727, "traditional_chinese_medicine": 0.20540540540540542, "virology": 0.3254437869822485, "world_history": 0.2795031055900621, "world_religions": 0.225 } }, "prompt_2": { "accuracy": 0.2487480573303402, "category_acc": { "agronomy": 0.2781065088757396, "anatomy": 0.2905405405405405, "ancient_chinese": 0.23170731707317074, "arts": 0.30625, "astronomy": 0.23030303030303031, "business_ethics": 0.3014354066985646, "chinese_civil_service_exam": 0.23125, "chinese_driving_rule": 0.26717557251908397, "chinese_food_culture": 0.19117647058823528, "chinese_foreign_policy": 0.21495327102803738, "chinese_history": 0.20743034055727555, "chinese_literature": 0.2647058823529412, "chinese_teacher_qualification": 0.20670391061452514, "clinical_knowledge": 0.23628691983122363, "college_actuarial_science": 0.18867924528301888, "college_education": 0.2616822429906542, "college_engineering_hydrology": 0.18867924528301888, "college_law": 0.2222222222222222, "college_mathematics": 0.29523809523809524, "college_medical_statistics": 0.22641509433962265, "college_medicine": 0.26373626373626374, "computer_science": 0.23039215686274508, "computer_security": 0.2046783625730994, "conceptual_physics": 0.20408163265306123, "construction_project_management": 0.2733812949640288, "economics": 0.24528301886792453, "education": 0.24539877300613497, "electrical_engineering": 0.20348837209302326, "elementary_chinese": 0.2698412698412698, "elementary_commonsense": 0.22727272727272727, "elementary_information_and_technology": 0.2689075630252101, "elementary_mathematics": 0.32608695652173914, "ethnology": 0.2222222222222222, "food_science": 0.16783216783216784, "genetics": 0.30113636363636365, "global_facts": 0.2483221476510067, "high_school_biology": 0.3254437869822485, "high_school_chemistry": 0.25757575757575757, "high_school_geography": 0.2711864406779661, "high_school_mathematics": 0.29878048780487804, "high_school_physics": 0.21818181818181817, "high_school_politics": 0.23076923076923078, "human_sexuality": 0.2777777777777778, "international_law": 0.2918918918918919, "journalism": 0.26744186046511625, "jurisprudence": 0.25060827250608275, "legal_and_moral_basis": 0.205607476635514, "logical": 0.1951219512195122, "machine_learning": 0.27049180327868855, "management": 0.21904761904761905, "marketing": 0.17222222222222222, "marxist_theory": 0.19576719576719576, "modern_chinese": 0.29310344827586204, "nutrition": 0.18620689655172415, "philosophy": 0.2, "professional_accounting": 0.26285714285714284, "professional_law": 0.26066350710900477, "professional_medicine": 0.31382978723404253, "professional_psychology": 0.28879310344827586, "public_relations": 0.20114942528735633, "security_study": 0.3111111111111111, "sociology": 0.3053097345132743, "sports_science": 0.26666666666666666, "traditional_chinese_medicine": 0.21621621621621623, "virology": 0.21301775147928995, "world_history": 0.2546583850931677, "world_religions": 0.2125 } }, "prompt_3": { "accuracy": 0.24633051286479019, "category_acc": { "agronomy": 0.21893491124260356, "anatomy": 0.2635135135135135, "ancient_chinese": 0.23780487804878048, "arts": 0.25, "astronomy": 0.24848484848484848, "business_ethics": 0.2727272727272727, "chinese_civil_service_exam": 0.275, "chinese_driving_rule": 0.22137404580152673, "chinese_food_culture": 0.3088235294117647, "chinese_foreign_policy": 0.2616822429906542, "chinese_history": 0.26006191950464397, "chinese_literature": 0.20588235294117646, "chinese_teacher_qualification": 0.18994413407821228, "clinical_knowledge": 0.22362869198312235, "college_actuarial_science": 0.27358490566037735, "college_education": 0.18691588785046728, "college_engineering_hydrology": 0.22641509433962265, "college_law": 0.28703703703703703, "college_mathematics": 0.22857142857142856, "college_medical_statistics": 0.22641509433962265, "college_medicine": 0.2600732600732601, "computer_science": 0.15196078431372548, "computer_security": 0.30994152046783624, "conceptual_physics": 0.2585034013605442, "construction_project_management": 0.31654676258992803, "economics": 0.24528301886792453, "education": 0.25153374233128833, "electrical_engineering": 0.23255813953488372, "elementary_chinese": 0.2777777777777778, "elementary_commonsense": 0.2474747474747475, "elementary_information_and_technology": 0.22268907563025211, "elementary_mathematics": 0.27391304347826084, "ethnology": 0.25925925925925924, "food_science": 0.23076923076923078, "genetics": 0.3068181818181818, "global_facts": 0.20134228187919462, "high_school_biology": 0.2781065088757396, "high_school_chemistry": 0.22727272727272727, "high_school_geography": 0.2542372881355932, "high_school_mathematics": 0.20121951219512196, "high_school_physics": 0.23636363636363636, "high_school_politics": 0.25874125874125875, "human_sexuality": 0.2222222222222222, "international_law": 0.22162162162162163, "journalism": 0.27906976744186046, "jurisprudence": 0.2773722627737226, "legal_and_moral_basis": 0.29906542056074764, "logical": 0.21951219512195122, "machine_learning": 0.2786885245901639, "management": 0.23809523809523808, "marketing": 0.19444444444444445, "marxist_theory": 0.25396825396825395, "modern_chinese": 0.1810344827586207, "nutrition": 0.2689655172413793, "philosophy": 0.26666666666666666, "professional_accounting": 0.2342857142857143, "professional_law": 0.22748815165876776, "professional_medicine": 0.2579787234042553, "professional_psychology": 0.2801724137931034, "public_relations": 0.23563218390804597, "security_study": 0.25925925925925924, "sociology": 0.1902654867256637, "sports_science": 0.23636363636363636, "traditional_chinese_medicine": 0.22702702702702704, "virology": 0.21893491124260356, "world_history": 0.2732919254658385, "world_religions": 0.225 } }, "prompt_4": { "accuracy": 0.24969780694180624, "category_acc": { "agronomy": 0.2603550295857988, "anatomy": 0.2702702702702703, "ancient_chinese": 0.2865853658536585, "arts": 0.1875, "astronomy": 0.21818181818181817, "business_ethics": 0.31100478468899523, "chinese_civil_service_exam": 0.25625, "chinese_driving_rule": 0.2824427480916031, "chinese_food_culture": 0.27205882352941174, "chinese_foreign_policy": 0.205607476635514, "chinese_history": 0.23839009287925697, "chinese_literature": 0.28431372549019607, "chinese_teacher_qualification": 0.2849162011173184, "clinical_knowledge": 0.25738396624472576, "college_actuarial_science": 0.2830188679245283, "college_education": 0.24299065420560748, "college_engineering_hydrology": 0.3018867924528302, "college_law": 0.18518518518518517, "college_mathematics": 0.19047619047619047, "college_medical_statistics": 0.25471698113207547, "college_medicine": 0.27106227106227104, "computer_science": 0.22058823529411764, "computer_security": 0.22807017543859648, "conceptual_physics": 0.2653061224489796, "construction_project_management": 0.22302158273381295, "economics": 0.2830188679245283, "education": 0.2883435582822086, "electrical_engineering": 0.22674418604651161, "elementary_chinese": 0.23015873015873015, "elementary_commonsense": 0.1919191919191919, "elementary_information_and_technology": 0.23109243697478993, "elementary_mathematics": 0.2608695652173913, "ethnology": 0.2962962962962963, "food_science": 0.25874125874125875, "genetics": 0.23863636363636365, "global_facts": 0.2684563758389262, "high_school_biology": 0.17751479289940827, "high_school_chemistry": 0.30303030303030304, "high_school_geography": 0.2457627118644068, "high_school_mathematics": 0.25, "high_school_physics": 0.23636363636363636, "high_school_politics": 0.2937062937062937, "human_sexuality": 0.24603174603174602, "international_law": 0.2648648648648649, "journalism": 0.28488372093023256, "jurisprudence": 0.25304136253041365, "legal_and_moral_basis": 0.22897196261682243, "logical": 0.21951219512195122, "machine_learning": 0.28688524590163933, "management": 0.23809523809523808, "marketing": 0.2722222222222222, "marxist_theory": 0.24867724867724866, "modern_chinese": 0.23275862068965517, "nutrition": 0.19310344827586207, "philosophy": 0.2571428571428571, "professional_accounting": 0.2914285714285714, "professional_law": 0.23696682464454977, "professional_medicine": 0.2632978723404255, "professional_psychology": 0.21982758620689655, "public_relations": 0.2413793103448276, "security_study": 0.3037037037037037, "sociology": 0.23008849557522124, "sports_science": 0.24848484848484848, "traditional_chinese_medicine": 0.23783783783783785, "virology": 0.24260355029585798, "world_history": 0.19875776397515527, "world_religions": 0.2375 } }, "prompt_5": { "accuracy": 0.25090657917458126, "category_acc": { "agronomy": 0.27218934911242604, "anatomy": 0.25675675675675674, "ancient_chinese": 0.2926829268292683, "arts": 0.23125, "astronomy": 0.2727272727272727, "business_ethics": 0.2822966507177033, "chinese_civil_service_exam": 0.275, "chinese_driving_rule": 0.3053435114503817, "chinese_food_culture": 0.23529411764705882, "chinese_foreign_policy": 0.2336448598130841, "chinese_history": 0.2260061919504644, "chinese_literature": 0.2549019607843137, "chinese_teacher_qualification": 0.24022346368715083, "clinical_knowledge": 0.25738396624472576, "college_actuarial_science": 0.2641509433962264, "college_education": 0.2336448598130841, "college_engineering_hydrology": 0.20754716981132076, "college_law": 0.2037037037037037, "college_mathematics": 0.23809523809523808, "college_medical_statistics": 0.22641509433962265, "college_medicine": 0.22344322344322345, "computer_science": 0.22058823529411764, "computer_security": 0.24561403508771928, "conceptual_physics": 0.2108843537414966, "construction_project_management": 0.23741007194244604, "economics": 0.27672955974842767, "education": 0.22699386503067484, "electrical_engineering": 0.26744186046511625, "elementary_chinese": 0.21031746031746032, "elementary_commonsense": 0.23737373737373738, "elementary_information_and_technology": 0.28991596638655465, "elementary_mathematics": 0.26521739130434785, "ethnology": 0.21481481481481482, "food_science": 0.25874125874125875, "genetics": 0.29545454545454547, "global_facts": 0.3825503355704698, "high_school_biology": 0.21301775147928995, "high_school_chemistry": 0.22727272727272727, "high_school_geography": 0.23728813559322035, "high_school_mathematics": 0.21341463414634146, "high_school_physics": 0.13636363636363635, "high_school_politics": 0.2727272727272727, "human_sexuality": 0.30158730158730157, "international_law": 0.23783783783783785, "journalism": 0.23837209302325582, "jurisprudence": 0.2384428223844282, "legal_and_moral_basis": 0.2803738317757009, "logical": 0.24390243902439024, "machine_learning": 0.22131147540983606, "management": 0.24761904761904763, "marketing": 0.25555555555555554, "marxist_theory": 0.25396825396825395, "modern_chinese": 0.2672413793103448, "nutrition": 0.2689655172413793, "philosophy": 0.29523809523809524, "professional_accounting": 0.22285714285714286, "professional_law": 0.22748815165876776, "professional_medicine": 0.24468085106382978, "professional_psychology": 0.2629310344827586, "public_relations": 0.21839080459770116, "security_study": 0.2518518518518518, "sociology": 0.252212389380531, "sports_science": 0.3212121212121212, "traditional_chinese_medicine": 0.2918918918918919, "virology": 0.22485207100591717, "world_history": 0.2857142857142857, "world_religions": 0.28125 } } }, "zbench": { "prompt_1": { "accuracy": 0.30303030303030304 }, "prompt_2": { "accuracy": 0.3333333333333333 }, "prompt_3": { "accuracy": 0.21212121212121213 }, "prompt_4": { "accuracy": 0.15151515151515152 }, "prompt_5": { "accuracy": 0.30303030303030304 } }, "ind_emotion": { "prompt_1": { "accuracy": 0.19090909090909092 }, "prompt_2": { "accuracy": 0.17272727272727273 }, "prompt_3": { "accuracy": 0.17727272727272728 }, "prompt_4": { "accuracy": 0.17727272727272728 }, "prompt_5": { "accuracy": 0.18863636363636363 } }, "ocnli": { "prompt_1": { "accuracy": 0.32949152542372884 }, "prompt_2": { "accuracy": 0.3142372881355932 }, "prompt_3": { "accuracy": 0.33661016949152545 }, "prompt_4": { "accuracy": 0.32440677966101694 }, "prompt_5": { "accuracy": 0.3494915254237288 } }, "c3": { "prompt_1": { "accuracy": 0.26925953627524307 }, "prompt_2": { "accuracy": 0.2677636499626028 }, "prompt_3": { "accuracy": 0.2857142857142857 }, "prompt_4": { "accuracy": 0.28721017202692595 }, "prompt_5": { "accuracy": 0.28421839940164545 } }, "dream": { "prompt_1": { "accuracy": 0.34149926506614403 }, "prompt_2": { "accuracy": 0.3145516903478687 }, "prompt_3": { "accuracy": 0.3400293973542381 }, "prompt_4": { "accuracy": 0.3419892209701127 }, "prompt_5": { "accuracy": 0.3370896619304263 } }, "samsum": { "prompt_1": { "rouge1": 0.030176401898066273, "rouge2": 0.0, "rougeL": 0.029301949331489687, "avg_rouge": 0.019826117076518653 }, "prompt_2": { "rouge1": 0.030176401898066273, "rouge2": 0.0, "rougeL": 0.029301949331489687, "avg_rouge": 0.019826117076518653 }, "prompt_3": { "rouge1": 0.030176401898066273, "rouge2": 0.0, "rougeL": 0.029301949331489687, "avg_rouge": 0.019826117076518653 }, "prompt_4": { "rouge1": 0.030176401898066273, "rouge2": 0.0, "rougeL": 0.029301949331489687, "avg_rouge": 0.019826117076518653 }, "prompt_5": { "rouge1": 0.030176401898066273, "rouge2": 0.0, "rougeL": 0.029301949331489687, "avg_rouge": 0.019826117076518653 } }, "dialogsum": { "prompt_1": { "rouge1": 0.03339865867499272, "rouge2": 0.0, "rougeL": 0.03201375559955033, "avg_rouge": 0.021804138091514352 }, "prompt_2": { "rouge1": 0.03339865867499272, "rouge2": 0.0, "rougeL": 0.03201375559955033, "avg_rouge": 0.021804138091514352 }, "prompt_3": { "rouge1": 0.03339865867499272, "rouge2": 0.0, "rougeL": 0.03201375559955033, "avg_rouge": 0.021804138091514352 }, "prompt_4": { "rouge1": 0.03339865867499272, "rouge2": 0.0, "rougeL": 0.03201375559955033, "avg_rouge": 0.021804138091514352 }, "prompt_5": { "rouge1": 0.03339865867499272, "rouge2": 0.0, "rougeL": 0.03201375559955033, "avg_rouge": 0.021804138091514352 } }, "sst2": { "prompt_1": { "accuracy": 0.5252293577981652 }, "prompt_2": { "accuracy": 0.48853211009174313 }, "prompt_3": { "accuracy": 0.5011467889908257 }, "prompt_4": { "accuracy": 0.4827981651376147 }, "prompt_5": { "accuracy": 0.5126146788990825 } }, "cola": { "prompt_1": { "accuracy": 0.4956855225311601 }, "prompt_2": { "accuracy": 0.4966442953020134 }, "prompt_3": { "accuracy": 0.5091083413231065 }, "prompt_4": { "accuracy": 0.48705656759348037 }, "prompt_5": { "accuracy": 0.4813039309683605 } }, "qqp": { "prompt_1": { "accuracy": 0.4985654217165471 }, "prompt_2": { "accuracy": 0.49849121939154095 }, "prompt_3": { "accuracy": 0.4992332426416028 }, "prompt_4": { "accuracy": 0.4981696759831808 }, "prompt_5": { "accuracy": 0.5001484046500123 } }, "mnli": { "prompt_1": { "accuracy": 0.3344021988089785 }, "prompt_2": { "accuracy": 0.3386267623555759 }, "prompt_3": { "accuracy": 0.32493510459612157 }, "prompt_4": { "accuracy": 0.3382195755077111 }, "prompt_5": { "accuracy": 0.3359291494884715 } }, "qnli": { "prompt_1": { "accuracy": 0.49990847519677833 }, "prompt_2": { "accuracy": 0.49807797913234486 }, "prompt_3": { "accuracy": 0.49697968149368477 }, "prompt_4": { "accuracy": 0.4911220940874977 }, "prompt_5": { "accuracy": 0.49478308621636463 } }, "wnli": { "prompt_1": { "accuracy": 0.5211267605633803 }, "prompt_2": { "accuracy": 0.4507042253521127 }, "prompt_3": { "accuracy": 0.5492957746478874 }, "prompt_4": { "accuracy": 0.4084507042253521 }, "prompt_5": { "accuracy": 0.6056338028169014 } }, "rte": { "prompt_1": { "accuracy": 0.5270758122743683 }, "prompt_2": { "accuracy": 0.4981949458483754 }, "prompt_3": { "accuracy": 0.44404332129963897 }, "prompt_4": { "accuracy": 0.5270758122743683 }, "prompt_5": { "accuracy": 0.49097472924187724 } }, "mrpc": { "prompt_1": { "accuracy": 0.47794117647058826 }, "prompt_2": { "accuracy": 0.4681372549019608 }, "prompt_3": { "accuracy": 0.5073529411764706 }, "prompt_4": { "accuracy": 0.5318627450980392 }, "prompt_5": { "accuracy": 0.4681372549019608 } } }, "five_shot": { "cross_mmlu": { "prompt_1": { "overall_acc": 0.25142857142857145, "language_acc": { "Malay": 0.26666666666666666, "English": 0.25333333333333335, "Vietnamese": 0.26666666666666666, "Spanish": 0.21333333333333335, "Indonesian": 0.24666666666666667, "Filipino": 0.28, "Chinese": 0.23333333333333334 }, "consistency_score_2": 0.2460317460317461, "consistency_score_3": 0.05904761904761904, "consistency_score_4": 0.013904761904761899, "consistency_score_5": 0.003492063492063492, "consistency_score_6": 0.0009523809523809525, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.23333333333333334, "Malay,Vietnamese": 0.26, "Malay,Spanish": 0.18666666666666668, "Malay,Indonesian": 0.26, "Malay,Filipino": 0.25333333333333335, "Malay,Chinese": 0.26, "English,Vietnamese": 0.30666666666666664, "English,Spanish": 0.20666666666666667, "English,Indonesian": 0.2, "English,Filipino": 0.28, "English,Chinese": 0.2866666666666667, "Vietnamese,Spanish": 0.23333333333333334, "Vietnamese,Indonesian": 0.22, "Vietnamese,Filipino": 0.3, "Vietnamese,Chinese": 0.18666666666666668, "Spanish,Indonesian": 0.26666666666666666, "Spanish,Filipino": 0.24666666666666667, "Spanish,Chinese": 0.28, "Indonesian,Filipino": 0.22, "Indonesian,Chinese": 0.23333333333333334, "Filipino,Chinese": 0.24666666666666667 }, "3_combine": { "Malay,English,Vietnamese": 0.08, "Malay,English,Spanish": 0.02666666666666667, "Malay,English,Indonesian": 0.04666666666666667, "Malay,English,Filipino": 0.07333333333333333, "Malay,English,Chinese": 0.06666666666666667, "Malay,Vietnamese,Spanish": 0.03333333333333333, "Malay,Vietnamese,Indonesian": 0.07333333333333333, "Malay,Vietnamese,Filipino": 0.07333333333333333, "Malay,Vietnamese,Chinese": 0.08, "Malay,Spanish,Indonesian": 0.06, "Malay,Spanish,Filipino": 0.06, "Malay,Spanish,Chinese": 0.04666666666666667, "Malay,Indonesian,Filipino": 0.04, "Malay,Indonesian,Chinese": 0.08, "Malay,Filipino,Chinese": 0.05333333333333334, "English,Vietnamese,Spanish": 0.08, "English,Vietnamese,Indonesian": 0.04, "English,Vietnamese,Filipino": 0.12666666666666668, "English,Vietnamese,Chinese": 0.1, "English,Spanish,Indonesian": 0.02666666666666667, "English,Spanish,Filipino": 0.04, "English,Spanish,Chinese": 0.08, "English,Indonesian,Filipino": 0.02666666666666667, "English,Indonesian,Chinese": 0.05333333333333334, "English,Filipino,Chinese": 0.06, "Vietnamese,Spanish,Indonesian": 0.03333333333333333, "Vietnamese,Spanish,Filipino": 0.05333333333333334, "Vietnamese,Spanish,Chinese": 0.04, "Vietnamese,Indonesian,Filipino": 0.04666666666666667, "Vietnamese,Indonesian,Chinese": 0.03333333333333333, "Vietnamese,Filipino,Chinese": 0.06666666666666667, "Spanish,Indonesian,Filipino": 0.07333333333333333, "Spanish,Indonesian,Chinese": 0.06666666666666667, "Spanish,Filipino,Chinese": 0.07333333333333333, "Indonesian,Filipino,Chinese": 0.05333333333333334 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian": 0.02, "Malay,English,Vietnamese,Filipino": 0.02666666666666667, "Malay,English,Vietnamese,Chinese": 0.04, "Malay,English,Spanish,Indonesian": 0.0, "Malay,English,Spanish,Filipino": 0.013333333333333334, "Malay,English,Spanish,Chinese": 0.006666666666666667, "Malay,English,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Indonesian,Chinese": 0.02, "Malay,English,Filipino,Chinese": 0.013333333333333334, "Malay,Vietnamese,Spanish,Indonesian": 0.013333333333333334, "Malay,Vietnamese,Spanish,Filipino": 0.006666666666666667, "Malay,Vietnamese,Spanish,Chinese": 0.013333333333333334, "Malay,Vietnamese,Indonesian,Filipino": 0.013333333333333334, "Malay,Vietnamese,Indonesian,Chinese": 0.02, "Malay,Vietnamese,Filipino,Chinese": 0.013333333333333334, "Malay,Spanish,Indonesian,Filipino": 0.013333333333333334, "Malay,Spanish,Indonesian,Chinese": 0.02, "Malay,Spanish,Filipino,Chinese": 0.006666666666666667, "Malay,Indonesian,Filipino,Chinese": 0.013333333333333334, "English,Vietnamese,Spanish,Indonesian": 0.006666666666666667, "English,Vietnamese,Spanish,Filipino": 0.013333333333333334, "English,Vietnamese,Spanish,Chinese": 0.02, "English,Vietnamese,Indonesian,Filipino": 0.02, "English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, "English,Vietnamese,Filipino,Chinese": 0.04, "English,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Spanish,Indonesian,Chinese": 0.006666666666666667, "English,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, "Vietnamese,Spanish,Filipino,Chinese": 0.02, "Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, "Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.0, "Malay,English,Vietnamese,Spanish,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Chinese": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, "Malay,English,Vietnamese,Filipino,Chinese": 0.013333333333333334, "Malay,English,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Spanish,Filipino,Chinese": 0.0, "Malay,English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.24870088426033501, "AC3_3": 0.09563540750644521, "AC3_4": 0.026352168998375104, "AC3_5": 0.0068884540090394875, "AC3_6": 0.0018975741232373495, "AC3_7": 0.0 } }, "cross_logiqa": { "prompt_1": { "overall_acc": 0.2313311688311688, "language_acc": { "English": 0.23863636363636365, "Vietnamese": 0.2159090909090909, "Chinese": 0.2784090909090909, "Indonesian": 0.2159090909090909, "Filipino": 0.26136363636363635, "Spanish": 0.17613636363636365, "Malay": 0.23295454545454544 }, "consistency_score_2": 0.24864718614718612, "consistency_score_3": 0.060227272727272727, "consistency_score_4": 0.014772727272727264, "consistency_score_5": 0.003787878787878787, "consistency_score_6": 0.0008116883116883117, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.24431818181818182, "English,Chinese": 0.23863636363636365, "English,Indonesian": 0.21022727272727273, "English,Filipino": 0.26136363636363635, "English,Spanish": 0.2840909090909091, "English,Malay": 0.3068181818181818, "Vietnamese,Chinese": 0.2556818181818182, "Vietnamese,Indonesian": 0.24431818181818182, "Vietnamese,Filipino": 0.1875, "Vietnamese,Spanish": 0.20454545454545456, "Vietnamese,Malay": 0.2215909090909091, "Chinese,Indonesian": 0.26136363636363635, "Chinese,Filipino": 0.2784090909090909, "Chinese,Spanish": 0.2727272727272727, "Chinese,Malay": 0.2784090909090909, "Indonesian,Filipino": 0.2159090909090909, "Indonesian,Spanish": 0.2784090909090909, "Indonesian,Malay": 0.21022727272727273, "Filipino,Spanish": 0.26704545454545453, "Filipino,Malay": 0.25, "Spanish,Malay": 0.25 }, "3_combine": { "English,Vietnamese,Chinese": 0.056818181818181816, "English,Vietnamese,Indonesian": 0.05113636363636364, "English,Vietnamese,Filipino": 0.03977272727272727, "English,Vietnamese,Spanish": 0.05113636363636364, "English,Vietnamese,Malay": 0.056818181818181816, "English,Chinese,Indonesian": 0.03977272727272727, "English,Chinese,Filipino": 0.09090909090909091, "English,Chinese,Spanish": 0.07954545454545454, "English,Chinese,Malay": 0.07954545454545454, "English,Indonesian,Filipino": 0.03409090909090909, "English,Indonesian,Spanish": 0.0625, "English,Indonesian,Malay": 0.056818181818181816, "English,Filipino,Spanish": 0.0625, "English,Filipino,Malay": 0.10227272727272728, "English,Spanish,Malay": 0.09090909090909091, "Vietnamese,Chinese,Indonesian": 0.07954545454545454, "Vietnamese,Chinese,Filipino": 0.05113636363636364, "Vietnamese,Chinese,Spanish": 0.056818181818181816, "Vietnamese,Chinese,Malay": 0.0625, "Vietnamese,Indonesian,Filipino": 0.056818181818181816, "Vietnamese,Indonesian,Spanish": 0.03409090909090909, "Vietnamese,Indonesian,Malay": 0.05113636363636364, "Vietnamese,Filipino,Spanish": 0.056818181818181816, "Vietnamese,Filipino,Malay": 0.017045454545454544, "Vietnamese,Spanish,Malay": 0.03977272727272727, "Chinese,Indonesian,Filipino": 0.03977272727272727, "Chinese,Indonesian,Spanish": 0.0625, "Chinese,Indonesian,Malay": 0.06818181818181818, "Chinese,Filipino,Spanish": 0.09090909090909091, "Chinese,Filipino,Malay": 0.09659090909090909, "Chinese,Spanish,Malay": 0.07386363636363637, "Indonesian,Filipino,Spanish": 0.056818181818181816, "Indonesian,Filipino,Malay": 0.03977272727272727, "Indonesian,Spanish,Malay": 0.07386363636363637, "Filipino,Spanish,Malay": 0.045454545454545456 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.017045454545454544, "English,Vietnamese,Chinese,Filipino": 0.005681818181818182, "English,Vietnamese,Chinese,Spanish": 0.011363636363636364, "English,Vietnamese,Chinese,Malay": 0.011363636363636364, "English,Vietnamese,Indonesian,Filipino": 0.011363636363636364, "English,Vietnamese,Indonesian,Spanish": 0.005681818181818182, "English,Vietnamese,Indonesian,Malay": 0.017045454545454544, "English,Vietnamese,Filipino,Spanish": 0.005681818181818182, "English,Vietnamese,Filipino,Malay": 0.0, "English,Vietnamese,Spanish,Malay": 0.022727272727272728, "English,Chinese,Indonesian,Filipino": 0.011363636363636364, "English,Chinese,Indonesian,Spanish": 0.017045454545454544, "English,Chinese,Indonesian,Malay": 0.017045454545454544, "English,Chinese,Filipino,Spanish": 0.03409090909090909, "English,Chinese,Filipino,Malay": 0.03977272727272727, "English,Chinese,Spanish,Malay": 0.028409090909090908, "English,Indonesian,Filipino,Spanish": 0.011363636363636364, "English,Indonesian,Filipino,Malay": 0.011363636363636364, "English,Indonesian,Spanish,Malay": 0.028409090909090908, "English,Filipino,Spanish,Malay": 0.011363636363636364, "Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Spanish": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Malay": 0.028409090909090908, "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, "Vietnamese,Chinese,Spanish,Malay": 0.011363636363636364, "Vietnamese,Indonesian,Filipino,Spanish": 0.011363636363636364, "Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, "Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, "Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, "Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, "Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, "Chinese,Filipino,Spanish,Malay": 0.03409090909090909, "Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, "English,Vietnamese,Chinese,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, "English,Vietnamese,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, "English,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, "English,Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, "English,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, "English,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 } }, "AC3_2": 0.2396768253794271, "AC3_3": 0.09557223118811513, "AC3_4": 0.02777194769799814, "AC3_5": 0.007453708543748948, "AC3_6": 0.0016177004806399893, "AC3_7": 0.0 } }, "sg_eval": { "prompt_1": { "accuracy": 0.22330097087378642 } }, "cn_eval": { "prompt_1": { "accuracy": 0.18095238095238095 } }, "us_eval": { "prompt_1": { "accuracy": 0.32710280373831774 } }, "ph_eval": { "prompt_1": { "accuracy": 0.32, "category_acc": { "brand": 0.3, "demographics": 0.4, "biology": 0.2, "history": 0.4666666666666667, "literature": 0.4, "politics": 0.2, "culture": 0.2, "film": 0.2, "law": 0.3, "geography": 0.5 } } }, "sing2eng": { "prompt_1": { "bleu_score": 0.008508397135511301 } }, "flores_ind2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 } }, "flores_vie2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 } }, "flores_zho2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 } }, "flores_zsm2eng": { "prompt_1": { "bleu_score": 0.008557988557763307 } }, "mmlu": { "prompt_1": { "accuracy": 0.2590431738623104 } }, "mmlu_full": { "prompt_1": { "accuracy": 0.25176975330711476, "category_acc": { "high_school_european_history": 0.23170731707317074, "business_ethics": 0.1919191919191919, "clinical_knowledge": 0.23863636363636365, "medical_genetics": 0.20202020202020202, "high_school_us_history": 0.2413793103448276, "high_school_physics": 0.31333333333333335, "high_school_world_history": 0.2245762711864407, "virology": 0.3515151515151515, "high_school_microeconomics": 0.2742616033755274, "econometrics": 0.20353982300884957, "college_computer_science": 0.2222222222222222, "high_school_biology": 0.22330097087378642, "abstract_algebra": 0.21212121212121213, "professional_accounting": 0.20640569395017794, "philosophy": 0.26129032258064516, "professional_medicine": 0.28044280442804426, "nutrition": 0.2262295081967213, "global_facts": 0.2222222222222222, "machine_learning": 0.21621621621621623, "security_studies": 0.21311475409836064, "public_relations": 0.23853211009174313, "professional_psychology": 0.24058919803600654, "prehistory": 0.2755417956656347, "anatomy": 0.23880597014925373, "human_sexuality": 0.25384615384615383, "college_medicine": 0.25, "high_school_government_and_politics": 0.25, "college_chemistry": 0.2727272727272727, "logical_fallacies": 0.22839506172839505, "high_school_geography": 0.27918781725888325, "elementary_mathematics": 0.22546419098143236, "human_aging": 0.22072072072072071, "college_mathematics": 0.2828282828282828, "high_school_psychology": 0.2775735294117647, "formal_logic": 0.256, "high_school_statistics": 0.2, "international_law": 0.25833333333333336, "high_school_mathematics": 0.2862453531598513, "high_school_computer_science": 0.32323232323232326, "conceptual_physics": 0.24786324786324787, "miscellaneous": 0.2506393861892583, "high_school_chemistry": 0.2722772277227723, "marketing": 0.2317596566523605, "professional_law": 0.2654924983692107, "management": 0.30392156862745096, "college_physics": 0.33663366336633666, "jurisprudence": 0.3644859813084112, "world_religions": 0.27647058823529413, "sociology": 0.22, "us_foreign_policy": 0.26262626262626265, "high_school_macroeconomics": 0.2647814910025707, "computer_security": 0.23232323232323232, "moral_scenarios": 0.23825503355704697, "moral_disputes": 0.25507246376811593, "electrical_engineering": 0.24305555555555555, "astronomy": 0.23841059602649006, "college_biology": 0.26573426573426573 } } }, "c_eval": { "prompt_1": { "accuracy": 0.2585438335809807 } }, "c_eval_full": { "prompt_1": { "accuracy": 0.2266500622665006, "category_acc": { "computer_network": 0.16666666666666666, "operating_system": 0.16666666666666666, "computer_architecture": 0.3076923076923077, "college_programming": 0.23809523809523808, "college_physics": 0.3333333333333333, "college_chemistry": 0.06896551724137931, "advanced_mathematics": 0.20833333333333334, "probability_and_statistics": 0.30434782608695654, "discrete_mathematics": 0.19047619047619047, "electrical_engineer": 0.23809523809523808, "metrology_engineer": 0.27586206896551724, "high_school_mathematics": 0.17391304347826086, "high_school_physics": 0.25, "high_school_chemistry": 0.20833333333333334, "high_school_biology": 0.25, "middle_school_mathematics": 0.16666666666666666, "middle_school_biology": 0.23076923076923078, "middle_school_physics": 0.125, "middle_school_chemistry": 0.28, "veterinary_medicine": 0.2857142857142857, "college_economics": 0.16666666666666666, "business_administration": 0.18421052631578946, "marxism": 0.2916666666666667, "mao_zedong_thought": 0.13793103448275862, "education_science": 0.29411764705882354, "teacher_qualification": 0.22448979591836735, "high_school_politics": 0.16666666666666666, "high_school_geography": 0.3333333333333333, "middle_school_politics": 0.15384615384615385, "middle_school_geography": 0.058823529411764705, "modern_chinese_history": 0.21428571428571427, "ideological_and_moral_cultivation": 0.20833333333333334, "logic": 0.2962962962962963, "law": 0.034482758620689655, "chinese_language_and_literature": 0.25, "art_studies": 0.23684210526315788, "professional_tour_guide": 0.17647058823529413, "legal_professional": 0.4642857142857143, "high_school_chinese": 0.25, "high_school_history": 0.4, "middle_school_history": 0.2962962962962963, "civil_servant": 0.17307692307692307, "sports_science": 0.2916666666666667, "plant_protection": 0.2962962962962963, "basic_medicine": 0.16666666666666666, "clinical_medicine": 0.18518518518518517, "urban_and_rural_planner": 0.27450980392156865, "accountant": 0.2037037037037037, "fire_engineer": 0.19444444444444445, "environmental_impact_assessment_engineer": 0.19444444444444445, "tax_accountant": 0.2962962962962963, "physician": 0.2222222222222222 } } }, "cmmlu": { "prompt_1": { "accuracy": 0.26881720430107525 } }, "cmmlu_full": { "prompt_1": { "accuracy": 0.2528060783975134, "category_acc": { "agronomy": 0.2958579881656805, "anatomy": 0.23648648648648649, "ancient_chinese": 0.2865853658536585, "arts": 0.275, "astronomy": 0.2909090909090909, "business_ethics": 0.2631578947368421, "chinese_civil_service_exam": 0.25625, "chinese_driving_rule": 0.24427480916030533, "chinese_food_culture": 0.25, "chinese_foreign_policy": 0.2897196261682243, "chinese_history": 0.2755417956656347, "chinese_literature": 0.2549019607843137, "chinese_teacher_qualification": 0.22346368715083798, "clinical_knowledge": 0.1940928270042194, "college_actuarial_science": 0.2358490566037736, "college_education": 0.2523364485981308, "college_engineering_hydrology": 0.32075471698113206, "college_law": 0.26851851851851855, "college_mathematics": 0.21904761904761905, "college_medical_statistics": 0.2830188679245283, "college_medicine": 0.2490842490842491, "computer_science": 0.28431372549019607, "computer_security": 0.24561403508771928, "conceptual_physics": 0.24489795918367346, "construction_project_management": 0.2517985611510791, "economics": 0.2641509433962264, "education": 0.2331288343558282, "electrical_engineering": 0.25, "elementary_chinese": 0.25, "elementary_commonsense": 0.26262626262626265, "elementary_information_and_technology": 0.23109243697478993, "elementary_mathematics": 0.23478260869565218, "ethnology": 0.26666666666666666, "food_science": 0.27972027972027974, "genetics": 0.30113636363636365, "global_facts": 0.21476510067114093, "high_school_biology": 0.30177514792899407, "high_school_chemistry": 0.26515151515151514, "high_school_geography": 0.22033898305084745, "high_school_mathematics": 0.2073170731707317, "high_school_physics": 0.2545454545454545, "high_school_politics": 0.27972027972027974, "human_sexuality": 0.20634920634920634, "international_law": 0.2648648648648649, "journalism": 0.2558139534883721, "jurisprudence": 0.24330900243309003, "legal_and_moral_basis": 0.24766355140186916, "logical": 0.2764227642276423, "machine_learning": 0.28688524590163933, "management": 0.28095238095238095, "marketing": 0.23333333333333334, "marxist_theory": 0.2698412698412698, "modern_chinese": 0.27586206896551724, "nutrition": 0.2413793103448276, "philosophy": 0.24761904761904763, "professional_accounting": 0.19428571428571428, "professional_law": 0.2843601895734597, "professional_medicine": 0.2526595744680851, "professional_psychology": 0.25, "public_relations": 0.2413793103448276, "security_study": 0.23703703703703705, "sociology": 0.23008849557522124, "sports_science": 0.23030303030303031, "traditional_chinese_medicine": 0.22162162162162163, "virology": 0.22485207100591717, "world_history": 0.2360248447204969, "world_religions": 0.25625 } } }, "zbench": { "prompt_1": { "accuracy": 0.18181818181818182 } }, "ind_emotion": { "prompt_1": { "accuracy": 0.16136363636363638 } }, "ocnli": { "prompt_1": { "accuracy": 0.3277966101694915 } }, "c3": { "prompt_1": { "accuracy": 0.2801047120418848 } }, "dream": { "prompt_1": { "accuracy": 0.3365997060264576 } }, "samsum": { "prompt_1": { "rouge1": 0.030176401898066273, "rouge2": 0.0, "rougeL": 0.029301949331489687, "avg_rouge": 0.019826117076518653 } }, "dialogsum": { "prompt_1": { "rouge1": 0.03339865867499272, "rouge2": 0.0, "rougeL": 0.03201375559955033, "avg_rouge": 0.021804138091514352 } }, "sst2": { "prompt_1": { "accuracy": 0.49655963302752293 } }, "cola": { "prompt_1": { "accuracy": 0.5110258868648131 } }, "qqp": { "prompt_1": { "accuracy": 0.4981696759831808 } }, "mnli": { "prompt_1": { "accuracy": 0.3377614903038632 } }, "qnli": { "prompt_1": { "accuracy": 0.49881017755811824 } }, "wnli": { "prompt_1": { "accuracy": 0.43661971830985913 } }, "rte": { "prompt_1": { "accuracy": 0.48014440433212996 } }, "mrpc": { "prompt_1": { "accuracy": 0.49019607843137253 } } } }, "alpaca-7b": { "model_size": "7B", "model_link": "https://github.com/tatsu-lab/stanford_alpaca", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "flan-t5-small": { "model_size": "0.06B", "model_link": "https://huggingface.co/google/flan-t5-small", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "flan-t5-base": { "model_size": "0.25B", "model_link": "https://huggingface.co/google/flan-t5-base", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "flan-t5-large": { "model_size": "0.78B", "model_link": "https://huggingface.co/google/flan-t5-large", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "flan-t5-xl": { "model_size": "3B", "model_link": "https://huggingface.co/google/flan-t5-xl", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "flan-t5-xxl": { "model_size": "11B", "model_link": "https://huggingface.co/google/flan-t5-xxl", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "flan-ul2": { "model_size": "20B", "model_link": "https://huggingface.co/google/flan-t5-ul2", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "vicuna-7b": { "model_size": "7B", "model_link": "https://huggingface.co/lmsys/vicuna-7b-v1.3", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "vicuna-13b": { "model_size": "13B", "model_link": "https://huggingface.co/lmsys/vicuna-13b-v1.3", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "vicuna-33b": { "model_size": "33B", "model_link": "https://huggingface.co/lmsys/vicuna-33b-v1.3", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-7b": { "model_size": "7B", "model_link": "https://huggingface.co/huggyllama/llama-7b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-13b": { "model_size": "13B", "model_link": "https://huggingface.co/huggyllama/llama-13b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-30b": { "model_size": "30B", "model_link": "https://huggingface.co/huggyllama/llama-30b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-65b": { "model_size": "65B", "model_link": "https://huggingface.co/huggyllama/llama-65b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-2-7b": { "model_size": "7B", "model_link": "https://huggingface.co/meta-llama/Llama-2-7b-hf", "zero_shot": { "cross_mmlu": { "prompt_1": { "overall_acc": 0.2942857142857143, "language_acc": { "Malay": 0.24666666666666667, "English": 0.36, "Vietnamese": 0.32, "Spanish": 0.26, "Indonesian": 0.29333333333333333, "Filipino": 0.34, "Chinese": 0.24 }, "consistency_score_2": 0.6158730158730159, "consistency_score_3": 0.4632380952380953, "consistency_score_4": 0.376952380952381, "consistency_score_5": 0.31968253968253973, "consistency_score_6": 0.2780952380952381, "consistency_score_7": 0.24666666666666667, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.5733333333333334, "Malay,Vietnamese": 0.6066666666666667, "Malay,Spanish": 0.6133333333333333, "Malay,Indonesian": 0.7133333333333334, "Malay,Filipino": 0.64, "Malay,Chinese": 0.66, "English,Vietnamese": 0.54, "English,Spanish": 0.6066666666666667, "English,Indonesian": 0.5666666666666667, "English,Filipino": 0.58, "English,Chinese": 0.5666666666666667, "Vietnamese,Spanish": 0.6333333333333333, "Vietnamese,Indonesian": 0.5866666666666667, "Vietnamese,Filipino": 0.6, "Vietnamese,Chinese": 0.6266666666666667, "Spanish,Indonesian": 0.6466666666666666, "Spanish,Filipino": 0.6133333333333333, "Spanish,Chinese": 0.68, "Indonesian,Filipino": 0.62, "Indonesian,Chinese": 0.6733333333333333, "Filipino,Chinese": 0.5866666666666667 }, "3_combine": { "Malay,English,Vietnamese": 0.3933333333333333, "Malay,English,Spanish": 0.44666666666666666, "Malay,English,Indonesian": 0.4666666666666667, "Malay,English,Filipino": 0.44, "Malay,English,Chinese": 0.44, "Malay,Vietnamese,Spanish": 0.48, "Malay,Vietnamese,Indonesian": 0.4866666666666667, "Malay,Vietnamese,Filipino": 0.46, "Malay,Vietnamese,Chinese": 0.5, "Malay,Spanish,Indonesian": 0.5133333333333333, "Malay,Spanish,Filipino": 0.47333333333333333, "Malay,Spanish,Chinese": 0.52, "Malay,Indonesian,Filipino": 0.52, "Malay,Indonesian,Chinese": 0.54, "Malay,Filipino,Chinese": 0.4866666666666667, "English,Vietnamese,Spanish": 0.43333333333333335, "English,Vietnamese,Indonesian": 0.3933333333333333, "English,Vietnamese,Filipino": 0.4066666666666667, "English,Vietnamese,Chinese": 0.42, "English,Spanish,Indonesian": 0.44, "English,Spanish,Filipino": 0.44666666666666666, "English,Spanish,Chinese": 0.4666666666666667, "English,Indonesian,Filipino": 0.43333333333333335, "English,Indonesian,Chinese": 0.43333333333333335, "English,Filipino,Chinese": 0.4066666666666667, "Vietnamese,Spanish,Indonesian": 0.48, "Vietnamese,Spanish,Filipino": 0.4666666666666667, "Vietnamese,Spanish,Chinese": 0.52, "Vietnamese,Indonesian,Filipino": 0.4533333333333333, "Vietnamese,Indonesian,Chinese": 0.48, "Vietnamese,Filipino,Chinese": 0.44666666666666666, "Spanish,Indonesian,Filipino": 0.46, "Spanish,Indonesian,Chinese": 0.52, "Spanish,Filipino,Chinese": 0.47333333333333333, "Indonesian,Filipino,Chinese": 0.4666666666666667 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.36, "Malay,English,Vietnamese,Indonesian": 0.3466666666666667, "Malay,English,Vietnamese,Filipino": 0.32666666666666666, "Malay,English,Vietnamese,Chinese": 0.3466666666666667, "Malay,English,Spanish,Indonesian": 0.38666666666666666, "Malay,English,Spanish,Filipino": 0.36666666666666664, "Malay,English,Spanish,Chinese": 0.38666666666666666, "Malay,English,Indonesian,Filipino": 0.38666666666666666, "Malay,English,Indonesian,Chinese": 0.38, "Malay,English,Filipino,Chinese": 0.35333333333333333, "Malay,Vietnamese,Spanish,Indonesian": 0.41333333333333333, "Malay,Vietnamese,Spanish,Filipino": 0.37333333333333335, "Malay,Vietnamese,Spanish,Chinese": 0.44, "Malay,Vietnamese,Indonesian,Filipino": 0.38666666666666666, "Malay,Vietnamese,Indonesian,Chinese": 0.4266666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.4, "Malay,Spanish,Indonesian,Filipino": 0.4066666666666667, "Malay,Spanish,Indonesian,Chinese": 0.43333333333333335, "Malay,Spanish,Filipino,Chinese": 0.42, "Malay,Indonesian,Filipino,Chinese": 0.42, "English,Vietnamese,Spanish,Indonesian": 0.3466666666666667, "English,Vietnamese,Spanish,Filipino": 0.35333333333333333, "English,Vietnamese,Spanish,Chinese": 0.37333333333333335, "English,Vietnamese,Indonesian,Filipino": 0.32666666666666666, "English,Vietnamese,Indonesian,Chinese": 0.3333333333333333, "English,Vietnamese,Filipino,Chinese": 0.32666666666666666, "English,Spanish,Indonesian,Filipino": 0.35333333333333333, "English,Spanish,Indonesian,Chinese": 0.36666666666666664, "English,Spanish,Filipino,Chinese": 0.36666666666666664, "English,Indonesian,Filipino,Chinese": 0.34, "Vietnamese,Spanish,Indonesian,Filipino": 0.36666666666666664, "Vietnamese,Spanish,Indonesian,Chinese": 0.41333333333333333, "Vietnamese,Spanish,Filipino,Chinese": 0.3933333333333333, "Vietnamese,Indonesian,Filipino,Chinese": 0.38, "Spanish,Indonesian,Filipino,Chinese": 0.3933333333333333 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.32, "Malay,English,Vietnamese,Spanish,Filipino": 0.3, "Malay,English,Vietnamese,Spanish,Chinese": 0.3333333333333333, "Malay,English,Vietnamese,Indonesian,Filipino": 0.29333333333333333, "Malay,English,Vietnamese,Indonesian,Chinese": 0.30666666666666664, "Malay,English,Vietnamese,Filipino,Chinese": 0.29333333333333333, "Malay,English,Spanish,Indonesian,Filipino": 0.32666666666666666, "Malay,English,Spanish,Indonesian,Chinese": 0.3333333333333333, "Malay,English,Spanish,Filipino,Chinese": 0.32666666666666666, "Malay,English,Indonesian,Filipino,Chinese": 0.31333333333333335, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.32, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.37333333333333335, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.35333333333333333, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.3466666666666667, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.36, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.30666666666666664, "English,Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.28, "English,Spanish,Indonesian,Filipino,Chinese": 0.30666666666666664, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.29333333333333333, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.28, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.3, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 } }, "AC3_2": 0.3982659823170972, "AC3_3": 0.3599209741254101, "AC3_4": 0.33052862002534894, "AC3_5": 0.3064588565020135, "AC3_6": 0.2859614927002183, "AC3_7": 0.26838028164052835 }, "prompt_2": { "overall_acc": 0.2714285714285714, "language_acc": { "Malay": 0.29333333333333333, "English": 0.32, "Vietnamese": 0.2733333333333333, "Spanish": 0.25333333333333335, "Indonesian": 0.24666666666666667, "Filipino": 0.24666666666666667, "Chinese": 0.26666666666666666 }, "consistency_score_2": 0.6857142857142859, "consistency_score_3": 0.556, "consistency_score_4": 0.47447619047619055, "consistency_score_5": 0.41682539682539677, "consistency_score_6": 0.37333333333333335, "consistency_score_7": 0.34, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.6666666666666666, "Malay,Vietnamese": 0.6466666666666666, "Malay,Spanish": 0.78, "Malay,Indonesian": 0.76, "Malay,Filipino": 0.7466666666666667, "Malay,Chinese": 0.6466666666666666, "English,Vietnamese": 0.62, "English,Spanish": 0.7666666666666667, "English,Indonesian": 0.6933333333333334, "English,Filipino": 0.6666666666666666, "English,Chinese": 0.6666666666666666, "Vietnamese,Spanish": 0.7, "Vietnamese,Indonesian": 0.6133333333333333, "Vietnamese,Filipino": 0.6066666666666667, "Vietnamese,Chinese": 0.6266666666666667, "Spanish,Indonesian": 0.7533333333333333, "Spanish,Filipino": 0.7, "Spanish,Chinese": 0.6866666666666666, "Indonesian,Filipino": 0.6866666666666666, "Indonesian,Chinese": 0.72, "Filipino,Chinese": 0.6466666666666666 }, "3_combine": { "Malay,English,Vietnamese": 0.5, "Malay,English,Spanish": 0.6133333333333333, "Malay,English,Indonesian": 0.5866666666666667, "Malay,English,Filipino": 0.56, "Malay,English,Chinese": 0.5133333333333333, "Malay,Vietnamese,Spanish": 0.58, "Malay,Vietnamese,Indonesian": 0.5266666666666666, "Malay,Vietnamese,Filipino": 0.5266666666666666, "Malay,Vietnamese,Chinese": 0.5, "Malay,Spanish,Indonesian": 0.6666666666666666, "Malay,Spanish,Filipino": 0.6333333333333333, "Malay,Spanish,Chinese": 0.5733333333333334, "Malay,Indonesian,Filipino": 0.62, "Malay,Indonesian,Chinese": 0.58, "Malay,Filipino,Chinese": 0.5533333333333333, "English,Vietnamese,Spanish": 0.5733333333333334, "English,Vietnamese,Indonesian": 0.49333333333333335, "English,Vietnamese,Filipino": 0.48, "English,Vietnamese,Chinese": 0.5066666666666667, "English,Spanish,Indonesian": 0.62, "English,Spanish,Filipino": 0.58, "English,Spanish,Chinese": 0.58, "English,Indonesian,Filipino": 0.5533333333333333, "English,Indonesian,Chinese": 0.5733333333333334, "English,Filipino,Chinese": 0.5266666666666666, "Vietnamese,Spanish,Indonesian": 0.5533333333333333, "Vietnamese,Spanish,Filipino": 0.5266666666666666, "Vietnamese,Spanish,Chinese": 0.54, "Vietnamese,Indonesian,Filipino": 0.4866666666666667, "Vietnamese,Indonesian,Chinese": 0.54, "Vietnamese,Filipino,Chinese": 0.48, "Spanish,Indonesian,Filipino": 0.6066666666666667, "Spanish,Indonesian,Chinese": 0.6, "Spanish,Filipino,Chinese": 0.5466666666666666, "Indonesian,Filipino,Chinese": 0.56 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.48, "Malay,English,Vietnamese,Indonesian": 0.44, "Malay,English,Vietnamese,Filipino": 0.4266666666666667, "Malay,English,Vietnamese,Chinese": 0.4266666666666667, "Malay,English,Spanish,Indonesian": 0.5466666666666666, "Malay,English,Spanish,Filipino": 0.5266666666666666, "Malay,English,Spanish,Chinese": 0.47333333333333333, "Malay,English,Indonesian,Filipino": 0.5, "Malay,English,Indonesian,Chinese": 0.48, "Malay,English,Filipino,Chinese": 0.44666666666666666, "Malay,Vietnamese,Spanish,Indonesian": 0.5066666666666667, "Malay,Vietnamese,Spanish,Filipino": 0.49333333333333335, "Malay,Vietnamese,Spanish,Chinese": 0.47333333333333333, "Malay,Vietnamese,Indonesian,Filipino": 0.4533333333333333, "Malay,Vietnamese,Indonesian,Chinese": 0.4666666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.43333333333333335, "Malay,Spanish,Indonesian,Filipino": 0.5733333333333334, "Malay,Spanish,Indonesian,Chinese": 0.5333333333333333, "Malay,Spanish,Filipino,Chinese": 0.5, "Malay,Indonesian,Filipino,Chinese": 0.49333333333333335, "English,Vietnamese,Spanish,Indonesian": 0.4666666666666667, "English,Vietnamese,Spanish,Filipino": 0.44, "English,Vietnamese,Spanish,Chinese": 0.47333333333333333, "English,Vietnamese,Indonesian,Filipino": 0.4066666666666667, "English,Vietnamese,Indonesian,Chinese": 0.44666666666666666, "English,Vietnamese,Filipino,Chinese": 0.41333333333333333, "English,Spanish,Indonesian,Filipino": 0.5133333333333333, "English,Spanish,Indonesian,Chinese": 0.52, "English,Spanish,Filipino,Chinese": 0.4666666666666667, "English,Indonesian,Filipino,Chinese": 0.4666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.46, "Vietnamese,Spanish,Indonesian,Chinese": 0.48, "Vietnamese,Spanish,Filipino,Chinese": 0.44666666666666666, "Vietnamese,Indonesian,Filipino,Chinese": 0.43333333333333335, "Spanish,Indonesian,Filipino,Chinese": 0.5 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.4266666666666667, "Malay,English,Vietnamese,Spanish,Filipino": 0.41333333333333333, "Malay,English,Vietnamese,Spanish,Chinese": 0.4066666666666667, "Malay,English,Vietnamese,Indonesian,Filipino": 0.38, "Malay,English,Vietnamese,Indonesian,Chinese": 0.4, "Malay,English,Vietnamese,Filipino,Chinese": 0.37333333333333335, "Malay,English,Spanish,Indonesian,Filipino": 0.48, "Malay,English,Spanish,Indonesian,Chinese": 0.4533333333333333, "Malay,English,Spanish,Filipino,Chinese": 0.42, "Malay,English,Indonesian,Filipino,Chinese": 0.41333333333333333, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.44666666666666666, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.44666666666666666, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.42, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.4, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.4666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.38666666666666666, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.42, "English,Vietnamese,Spanish,Filipino,Chinese": 0.38666666666666666, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.37333333333333335, "English,Spanish,Indonesian,Filipino,Chinese": 0.43333333333333335, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.4066666666666667 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.37333333333333335, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.38666666666666666, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.36, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.3466666666666667, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.4, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.3933333333333333, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.35333333333333333 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.34 } }, "AC3_2": 0.38891257991672346, "AC3_3": 0.364779005480776, "AC3_4": 0.3453159199887263, "AC3_5": 0.328769109071894, "AC3_6": 0.314327917233376, "AC3_7": 0.30186915882913357 }, "prompt_3": { "overall_acc": 0.27999999999999997, "language_acc": { "Malay": 0.28, "English": 0.31333333333333335, "Vietnamese": 0.29333333333333333, "Spanish": 0.2866666666666667, "Indonesian": 0.24666666666666667, "Filipino": 0.26, "Chinese": 0.28 }, "consistency_score_2": 0.6517460317460317, "consistency_score_3": 0.5036190476190476, "consistency_score_4": 0.4123809523809524, "consistency_score_5": 0.3473015873015873, "consistency_score_6": 0.29809523809523814, "consistency_score_7": 0.26, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.6266666666666667, "Malay,Vietnamese": 0.6266666666666667, "Malay,Spanish": 0.6333333333333333, "Malay,Indonesian": 0.6866666666666666, "Malay,Filipino": 0.6666666666666666, "Malay,Chinese": 0.6333333333333333, "English,Vietnamese": 0.62, "English,Spanish": 0.7066666666666667, "English,Indonesian": 0.6666666666666666, "English,Filipino": 0.7066666666666667, "English,Chinese": 0.68, "Vietnamese,Spanish": 0.6266666666666667, "Vietnamese,Indonesian": 0.62, "Vietnamese,Filipino": 0.6466666666666666, "Vietnamese,Chinese": 0.6266666666666667, "Spanish,Indonesian": 0.6533333333333333, "Spanish,Filipino": 0.6333333333333333, "Spanish,Chinese": 0.6266666666666667, "Indonesian,Filipino": 0.6933333333333334, "Indonesian,Chinese": 0.68, "Filipino,Chinese": 0.6266666666666667 }, "3_combine": { "Malay,English,Vietnamese": 0.48, "Malay,English,Spanish": 0.49333333333333335, "Malay,English,Indonesian": 0.5133333333333333, "Malay,English,Filipino": 0.5333333333333333, "Malay,English,Chinese": 0.5, "Malay,Vietnamese,Spanish": 0.47333333333333333, "Malay,Vietnamese,Indonesian": 0.4866666666666667, "Malay,Vietnamese,Filipino": 0.5066666666666667, "Malay,Vietnamese,Chinese": 0.47333333333333333, "Malay,Spanish,Indonesian": 0.5066666666666667, "Malay,Spanish,Filipino": 0.4866666666666667, "Malay,Spanish,Chinese": 0.4666666666666667, "Malay,Indonesian,Filipino": 0.54, "Malay,Indonesian,Chinese": 0.5133333333333333, "Malay,Filipino,Chinese": 0.4866666666666667, "English,Vietnamese,Spanish": 0.5, "English,Vietnamese,Indonesian": 0.4866666666666667, "English,Vietnamese,Filipino": 0.5133333333333333, "English,Vietnamese,Chinese": 0.4866666666666667, "English,Spanish,Indonesian": 0.5333333333333333, "English,Spanish,Filipino": 0.54, "English,Spanish,Chinese": 0.5333333333333333, "English,Indonesian,Filipino": 0.5666666666666667, "English,Indonesian,Chinese": 0.54, "English,Filipino,Chinese": 0.54, "Vietnamese,Spanish,Indonesian": 0.47333333333333333, "Vietnamese,Spanish,Filipino": 0.49333333333333335, "Vietnamese,Spanish,Chinese": 0.47333333333333333, "Vietnamese,Indonesian,Filipino": 0.52, "Vietnamese,Indonesian,Chinese": 0.48, "Vietnamese,Filipino,Chinese": 0.48, "Spanish,Indonesian,Filipino": 0.52, "Spanish,Indonesian,Chinese": 0.4866666666666667, "Spanish,Filipino,Chinese": 0.47333333333333333, "Indonesian,Filipino,Chinese": 0.5266666666666666 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.3933333333333333, "Malay,English,Vietnamese,Indonesian": 0.41333333333333333, "Malay,English,Vietnamese,Filipino": 0.4266666666666667, "Malay,English,Vietnamese,Chinese": 0.3933333333333333, "Malay,English,Spanish,Indonesian": 0.4266666666666667, "Malay,English,Spanish,Filipino": 0.4266666666666667, "Malay,English,Spanish,Chinese": 0.41333333333333333, "Malay,English,Indonesian,Filipino": 0.4533333333333333, "Malay,English,Indonesian,Chinese": 0.4266666666666667, "Malay,English,Filipino,Chinese": 0.43333333333333335, "Malay,Vietnamese,Spanish,Indonesian": 0.3933333333333333, "Malay,Vietnamese,Spanish,Filipino": 0.4066666666666667, "Malay,Vietnamese,Spanish,Chinese": 0.36666666666666664, "Malay,Vietnamese,Indonesian,Filipino": 0.4266666666666667, "Malay,Vietnamese,Indonesian,Chinese": 0.38666666666666666, "Malay,Vietnamese,Filipino,Chinese": 0.3933333333333333, "Malay,Spanish,Indonesian,Filipino": 0.42, "Malay,Spanish,Indonesian,Chinese": 0.38666666666666666, "Malay,Spanish,Filipino,Chinese": 0.38, "Malay,Indonesian,Filipino,Chinese": 0.42, "English,Vietnamese,Spanish,Indonesian": 0.41333333333333333, "English,Vietnamese,Spanish,Filipino": 0.4266666666666667, "English,Vietnamese,Spanish,Chinese": 0.4066666666666667, "English,Vietnamese,Indonesian,Filipino": 0.44, "English,Vietnamese,Indonesian,Chinese": 0.3933333333333333, "English,Vietnamese,Filipino,Chinese": 0.42, "English,Spanish,Indonesian,Filipino": 0.46, "English,Spanish,Indonesian,Chinese": 0.4266666666666667, "English,Spanish,Filipino,Chinese": 0.4266666666666667, "English,Indonesian,Filipino,Chinese": 0.46, "Vietnamese,Spanish,Indonesian,Filipino": 0.42, "Vietnamese,Spanish,Indonesian,Chinese": 0.36666666666666664, "Vietnamese,Spanish,Filipino,Chinese": 0.38666666666666666, "Vietnamese,Indonesian,Filipino,Chinese": 0.4, "Spanish,Indonesian,Filipino,Chinese": 0.4 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.35333333333333333, "Malay,English,Vietnamese,Spanish,Filipino": 0.35333333333333333, "Malay,English,Vietnamese,Spanish,Chinese": 0.32666666666666666, "Malay,English,Vietnamese,Indonesian,Filipino": 0.37333333333333335, "Malay,English,Vietnamese,Indonesian,Chinese": 0.34, "Malay,English,Vietnamese,Filipino,Chinese": 0.35333333333333333, "Malay,English,Spanish,Indonesian,Filipino": 0.38, "Malay,English,Spanish,Indonesian,Chinese": 0.35333333333333333, "Malay,English,Spanish,Filipino,Chinese": 0.35333333333333333, "Malay,English,Indonesian,Filipino,Chinese": 0.38, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.35333333333333333, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.30666666666666664, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.32, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.37333333333333335, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, "English,Vietnamese,Spanish,Filipino,Chinese": 0.3466666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.35333333333333333, "English,Spanish,Indonesian,Filipino,Chinese": 0.36666666666666664, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.32 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.32, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.2866666666666667, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.29333333333333333, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.30666666666666664, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.31333333333333335, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 } }, "AC3_2": 0.3917137989358126, "AC3_3": 0.35990277097983736, "AC3_4": 0.33353507560519785, "AC3_5": 0.310040485780535, "AC3_6": 0.2887644151065564, "AC3_7": 0.26962962957969816 }, "prompt_4": { "overall_acc": 0.27904761904761904, "language_acc": { "Malay": 0.25333333333333335, "English": 0.36666666666666664, "Vietnamese": 0.2866666666666667, "Spanish": 0.25333333333333335, "Indonesian": 0.22666666666666666, "Filipino": 0.26666666666666666, "Chinese": 0.3 }, "consistency_score_2": 0.5285714285714286, "consistency_score_3": 0.3571428571428571, "consistency_score_4": 0.2754285714285714, "consistency_score_5": 0.2285714285714286, "consistency_score_6": 0.19714285714285712, "consistency_score_7": 0.17333333333333334, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.48, "Malay,Vietnamese": 0.48, "Malay,Spanish": 0.5933333333333334, "Malay,Indonesian": 0.62, "Malay,Filipino": 0.5266666666666666, "Malay,Chinese": 0.46, "English,Vietnamese": 0.54, "English,Spanish": 0.5466666666666666, "English,Indonesian": 0.46, "English,Filipino": 0.49333333333333335, "English,Chinese": 0.5533333333333333, "Vietnamese,Spanish": 0.5666666666666667, "Vietnamese,Indonesian": 0.48, "Vietnamese,Filipino": 0.5133333333333333, "Vietnamese,Chinese": 0.5933333333333334, "Spanish,Indonesian": 0.5066666666666667, "Spanish,Filipino": 0.5533333333333333, "Spanish,Chinese": 0.5866666666666667, "Indonesian,Filipino": 0.62, "Indonesian,Chinese": 0.46, "Filipino,Chinese": 0.4666666666666667 }, "3_combine": { "Malay,English,Vietnamese": 0.31333333333333335, "Malay,English,Spanish": 0.36666666666666664, "Malay,English,Indonesian": 0.34, "Malay,English,Filipino": 0.32, "Malay,English,Chinese": 0.32666666666666666, "Malay,Vietnamese,Spanish": 0.37333333333333335, "Malay,Vietnamese,Indonesian": 0.36666666666666664, "Malay,Vietnamese,Filipino": 0.3333333333333333, "Malay,Vietnamese,Chinese": 0.32666666666666666, "Malay,Spanish,Indonesian": 0.42, "Malay,Spanish,Filipino": 0.38666666666666666, "Malay,Spanish,Chinese": 0.38, "Malay,Indonesian,Filipino": 0.44666666666666666, "Malay,Indonesian,Chinese": 0.32, "Malay,Filipino,Chinese": 0.3, "English,Vietnamese,Spanish": 0.37333333333333335, "English,Vietnamese,Indonesian": 0.32666666666666666, "English,Vietnamese,Filipino": 0.3466666666666667, "English,Vietnamese,Chinese": 0.4066666666666667, "English,Spanish,Indonesian": 0.32666666666666666, "English,Spanish,Filipino": 0.34, "English,Spanish,Chinese": 0.41333333333333333, "English,Indonesian,Filipino": 0.35333333333333333, "English,Indonesian,Chinese": 0.32, "English,Filipino,Chinese": 0.32666666666666666, "Vietnamese,Spanish,Indonesian": 0.35333333333333333, "Vietnamese,Spanish,Filipino": 0.36666666666666664, "Vietnamese,Spanish,Chinese": 0.4266666666666667, "Vietnamese,Indonesian,Filipino": 0.37333333333333335, "Vietnamese,Indonesian,Chinese": 0.34, "Vietnamese,Filipino,Chinese": 0.36, "Spanish,Indonesian,Filipino": 0.4, "Spanish,Indonesian,Chinese": 0.3333333333333333, "Spanish,Filipino,Chinese": 0.35333333333333333, "Indonesian,Filipino,Chinese": 0.34 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.28, "Malay,English,Vietnamese,Indonesian": 0.25333333333333335, "Malay,English,Vietnamese,Filipino": 0.24, "Malay,English,Vietnamese,Chinese": 0.26666666666666666, "Malay,English,Spanish,Indonesian": 0.2733333333333333, "Malay,English,Spanish,Filipino": 0.26666666666666666, "Malay,English,Spanish,Chinese": 0.3, "Malay,English,Indonesian,Filipino": 0.28, "Malay,English,Indonesian,Chinese": 0.25333333333333335, "Malay,English,Filipino,Chinese": 0.24666666666666667, "Malay,Vietnamese,Spanish,Indonesian": 0.3, "Malay,Vietnamese,Spanish,Filipino": 0.28, "Malay,Vietnamese,Spanish,Chinese": 0.3, "Malay,Vietnamese,Indonesian,Filipino": 0.3, "Malay,Vietnamese,Indonesian,Chinese": 0.26, "Malay,Vietnamese,Filipino,Chinese": 0.24666666666666667, "Malay,Spanish,Indonesian,Filipino": 0.34, "Malay,Spanish,Indonesian,Chinese": 0.28, "Malay,Spanish,Filipino,Chinese": 0.2733333333333333, "Malay,Indonesian,Filipino,Chinese": 0.26, "English,Vietnamese,Spanish,Indonesian": 0.26, "English,Vietnamese,Spanish,Filipino": 0.26666666666666666, "English,Vietnamese,Spanish,Chinese": 0.30666666666666664, "English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, "English,Vietnamese,Indonesian,Chinese": 0.26666666666666666, "English,Vietnamese,Filipino,Chinese": 0.2733333333333333, "English,Spanish,Indonesian,Filipino": 0.2733333333333333, "English,Spanish,Indonesian,Chinese": 0.26666666666666666, "English,Spanish,Filipino,Chinese": 0.26666666666666666, "English,Indonesian,Filipino,Chinese": 0.26666666666666666, "Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, "Vietnamese,Spanish,Indonesian,Chinese": 0.28, "Vietnamese,Spanish,Filipino,Chinese": 0.29333333333333333, "Vietnamese,Indonesian,Filipino,Chinese": 0.28, "Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, "Malay,English,Vietnamese,Spanish,Filipino": 0.22, "Malay,English,Vietnamese,Spanish,Chinese": 0.24666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino": 0.22, "Malay,English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, "Malay,English,Vietnamese,Filipino,Chinese": 0.20666666666666667, "Malay,English,Spanish,Indonesian,Filipino": 0.24, "Malay,English,Spanish,Indonesian,Chinese": 0.23333333333333334, "Malay,English,Spanish,Filipino,Chinese": 0.22666666666666666, "Malay,English,Indonesian,Filipino,Chinese": 0.22, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.24, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.23333333333333334, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.22, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.22, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.22, "English,Vietnamese,Spanish,Filipino,Chinese": 0.22, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.22666666666666666, "English,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 } }, "AC3_2": 0.3652628031892742, "AC3_3": 0.31330196744433764, "AC3_4": 0.27722628448648934, "AC3_5": 0.25129991954309694, "AC3_6": 0.23105142852290775, "AC3_7": 0.2138385964439585 }, "prompt_5": { "overall_acc": 0.30380952380952386, "language_acc": { "Malay": 0.2866666666666667, "English": 0.35333333333333333, "Vietnamese": 0.29333333333333333, "Spanish": 0.3333333333333333, "Indonesian": 0.2866666666666667, "Filipino": 0.31333333333333335, "Chinese": 0.26 }, "consistency_score_2": 0.6752380952380953, "consistency_score_3": 0.5575238095238095, "consistency_score_4": 0.4982857142857142, "consistency_score_5": 0.4619047619047619, "consistency_score_6": 0.43523809523809526, "consistency_score_7": 0.41333333333333333, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.6066666666666667, "Malay,Vietnamese": 0.6666666666666666, "Malay,Spanish": 0.6333333333333333, "Malay,Indonesian": 0.7266666666666667, "Malay,Filipino": 0.7133333333333334, "Malay,Chinese": 0.68, "English,Vietnamese": 0.58, "English,Spanish": 0.6866666666666666, "English,Indonesian": 0.6266666666666667, "English,Filipino": 0.6666666666666666, "English,Chinese": 0.6866666666666666, "Vietnamese,Spanish": 0.7, "Vietnamese,Indonesian": 0.6733333333333333, "Vietnamese,Filipino": 0.6666666666666666, "Vietnamese,Chinese": 0.68, "Spanish,Indonesian": 0.6866666666666666, "Spanish,Filipino": 0.6733333333333333, "Spanish,Chinese": 0.6866666666666666, "Indonesian,Filipino": 0.7133333333333334, "Indonesian,Chinese": 0.6866666666666666, "Filipino,Chinese": 0.74 }, "3_combine": { "Malay,English,Vietnamese": 0.5066666666666667, "Malay,English,Spanish": 0.5133333333333333, "Malay,English,Indonesian": 0.52, "Malay,English,Filipino": 0.5466666666666666, "Malay,English,Chinese": 0.5266666666666666, "Malay,Vietnamese,Spanish": 0.5466666666666666, "Malay,Vietnamese,Indonesian": 0.5866666666666667, "Malay,Vietnamese,Filipino": 0.5733333333333334, "Malay,Vietnamese,Chinese": 0.5533333333333333, "Malay,Spanish,Indonesian": 0.5733333333333334, "Malay,Spanish,Filipino": 0.5533333333333333, "Malay,Spanish,Chinese": 0.54, "Malay,Indonesian,Filipino": 0.6133333333333333, "Malay,Indonesian,Chinese": 0.58, "Malay,Filipino,Chinese": 0.6, "English,Vietnamese,Spanish": 0.5466666666666666, "English,Vietnamese,Indonesian": 0.5066666666666667, "English,Vietnamese,Filipino": 0.5266666666666666, "English,Vietnamese,Chinese": 0.52, "English,Spanish,Indonesian": 0.5266666666666666, "English,Spanish,Filipino": 0.5466666666666666, "English,Spanish,Chinese": 0.5733333333333334, "English,Indonesian,Filipino": 0.5466666666666666, "English,Indonesian,Chinese": 0.5466666666666666, "English,Filipino,Chinese": 0.5733333333333334, "Vietnamese,Spanish,Indonesian": 0.5733333333333334, "Vietnamese,Spanish,Filipino": 0.5866666666666667, "Vietnamese,Spanish,Chinese": 0.5733333333333334, "Vietnamese,Indonesian,Filipino": 0.5666666666666667, "Vietnamese,Indonesian,Chinese": 0.56, "Vietnamese,Filipino,Chinese": 0.58, "Spanish,Indonesian,Filipino": 0.58, "Spanish,Indonesian,Chinese": 0.5666666666666667, "Spanish,Filipino,Chinese": 0.5733333333333334, "Indonesian,Filipino,Chinese": 0.6066666666666667 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.47333333333333333, "Malay,English,Vietnamese,Indonesian": 0.47333333333333333, "Malay,English,Vietnamese,Filipino": 0.49333333333333335, "Malay,English,Vietnamese,Chinese": 0.4666666666666667, "Malay,English,Spanish,Indonesian": 0.48, "Malay,English,Spanish,Filipino": 0.4866666666666667, "Malay,English,Spanish,Chinese": 0.47333333333333333, "Malay,English,Indonesian,Filipino": 0.5, "Malay,English,Indonesian,Chinese": 0.47333333333333333, "Malay,English,Filipino,Chinese": 0.49333333333333335, "Malay,Vietnamese,Spanish,Indonesian": 0.5066666666666667, "Malay,Vietnamese,Spanish,Filipino": 0.52, "Malay,Vietnamese,Spanish,Chinese": 0.49333333333333335, "Malay,Vietnamese,Indonesian,Filipino": 0.5333333333333333, "Malay,Vietnamese,Indonesian,Chinese": 0.5066666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.5266666666666666, "Malay,Spanish,Indonesian,Filipino": 0.52, "Malay,Spanish,Indonesian,Chinese": 0.5, "Malay,Spanish,Filipino,Chinese": 0.5066666666666667, "Malay,Indonesian,Filipino,Chinese": 0.5466666666666666, "English,Vietnamese,Spanish,Indonesian": 0.48, "English,Vietnamese,Spanish,Filipino": 0.5, "English,Vietnamese,Spanish,Chinese": 0.5, "English,Vietnamese,Indonesian,Filipino": 0.48, "English,Vietnamese,Indonesian,Chinese": 0.47333333333333333, "English,Vietnamese,Filipino,Chinese": 0.4866666666666667, "English,Spanish,Indonesian,Filipino": 0.48, "English,Spanish,Indonesian,Chinese": 0.49333333333333335, "English,Spanish,Filipino,Chinese": 0.5066666666666667, "English,Indonesian,Filipino,Chinese": 0.49333333333333335, "Vietnamese,Spanish,Indonesian,Filipino": 0.5133333333333333, "Vietnamese,Spanish,Indonesian,Chinese": 0.5066666666666667, "Vietnamese,Spanish,Filipino,Chinese": 0.52, "Vietnamese,Indonesian,Filipino,Chinese": 0.52, "Spanish,Indonesian,Filipino,Chinese": 0.5133333333333333 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.44666666666666666, "Malay,English,Vietnamese,Spanish,Filipino": 0.4666666666666667, "Malay,English,Vietnamese,Spanish,Chinese": 0.44666666666666666, "Malay,English,Vietnamese,Indonesian,Filipino": 0.4666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.44, "Malay,English,Vietnamese,Filipino,Chinese": 0.46, "Malay,English,Spanish,Indonesian,Filipino": 0.46, "Malay,English,Spanish,Indonesian,Chinese": 0.44666666666666666, "Malay,English,Spanish,Filipino,Chinese": 0.46, "Malay,English,Indonesian,Filipino,Chinese": 0.46, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.4866666666666667, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.46, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.48, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.49333333333333335, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.48, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.4533333333333333, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.4533333333333333, "English,Vietnamese,Spanish,Filipino,Chinese": 0.4666666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.44666666666666666, "English,Spanish,Indonesian,Filipino,Chinese": 0.4533333333333333, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.47333333333333333 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.44, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.42, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.44, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.43333333333333335, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.43333333333333335, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.4533333333333333, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.4266666666666667 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.41333333333333333 } }, "AC3_2": 0.41906800069834893, "AC3_3": 0.3932996355980695, "AC3_4": 0.37747124872316695, "AC3_5": 0.36653636574222853, "AC3_6": 0.3578375060880565, "AC3_7": 0.3502080566134062 } }, "cross_logiqa": { "prompt_1": { "overall_acc": 0.29383116883116883, "language_acc": { "English": 0.32954545454545453, "Vietnamese": 0.32954545454545453, "Chinese": 0.26704545454545453, "Indonesian": 0.2727272727272727, "Filipino": 0.23295454545454544, "Spanish": 0.29545454545454547, "Malay": 0.32954545454545453 }, "consistency_score_2": 0.4491341991341991, "consistency_score_3": 0.25113636363636366, "consistency_score_4": 0.15503246753246755, "consistency_score_5": 0.10200216450216451, "consistency_score_6": 0.0706168831168831, "consistency_score_7": 0.05113636363636364, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.32386363636363635, "English,Chinese": 0.3693181818181818, "English,Indonesian": 0.4375, "English,Filipino": 0.4431818181818182, "English,Spanish": 0.4772727272727273, "English,Malay": 0.3977272727272727, "Vietnamese,Chinese": 0.26136363636363635, "Vietnamese,Indonesian": 0.4034090909090909, "Vietnamese,Filipino": 0.26136363636363635, "Vietnamese,Spanish": 0.2897727272727273, "Vietnamese,Malay": 0.39204545454545453, "Chinese,Indonesian": 0.5113636363636364, "Chinese,Filipino": 0.5056818181818182, "Chinese,Spanish": 0.4943181818181818, "Chinese,Malay": 0.4431818181818182, "Indonesian,Filipino": 0.5340909090909091, "Indonesian,Spanish": 0.5625, "Indonesian,Malay": 0.6477272727272727, "Filipino,Spanish": 0.5625, "Filipino,Malay": 0.5227272727272727, "Spanish,Malay": 0.5909090909090909 }, "3_combine": { "English,Vietnamese,Chinese": 0.10795454545454546, "English,Vietnamese,Indonesian": 0.18181818181818182, "English,Vietnamese,Filipino": 0.125, "English,Vietnamese,Spanish": 0.13636363636363635, "English,Vietnamese,Malay": 0.16477272727272727, "English,Chinese,Indonesian": 0.2159090909090909, "English,Chinese,Filipino": 0.2215909090909091, "English,Chinese,Spanish": 0.22727272727272727, "English,Chinese,Malay": 0.18181818181818182, "English,Indonesian,Filipino": 0.2556818181818182, "English,Indonesian,Spanish": 0.2897727272727273, "English,Indonesian,Malay": 0.30113636363636365, "English,Filipino,Spanish": 0.3125, "English,Filipino,Malay": 0.2556818181818182, "English,Spanish,Malay": 0.29545454545454547, "Vietnamese,Chinese,Indonesian": 0.19886363636363635, "Vietnamese,Chinese,Filipino": 0.14772727272727273, "Vietnamese,Chinese,Spanish": 0.1590909090909091, "Vietnamese,Chinese,Malay": 0.1875, "Vietnamese,Indonesian,Filipino": 0.20454545454545456, "Vietnamese,Indonesian,Spanish": 0.2215909090909091, "Vietnamese,Indonesian,Malay": 0.3125, "Vietnamese,Filipino,Spanish": 0.1590909090909091, "Vietnamese,Filipino,Malay": 0.19318181818181818, "Vietnamese,Spanish,Malay": 0.23295454545454544, "Chinese,Indonesian,Filipino": 0.32386363636363635, "Chinese,Indonesian,Spanish": 0.3352272727272727, "Chinese,Indonesian,Malay": 0.3465909090909091, "Chinese,Filipino,Spanish": 0.32386363636363635, "Chinese,Filipino,Malay": 0.2784090909090909, "Chinese,Spanish,Malay": 0.3181818181818182, "Indonesian,Filipino,Spanish": 0.3693181818181818, "Indonesian,Filipino,Malay": 0.3806818181818182, "Indonesian,Spanish,Malay": 0.4431818181818182, "Filipino,Spanish,Malay": 0.3806818181818182 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.09090909090909091, "English,Vietnamese,Chinese,Filipino": 0.06818181818181818, "English,Vietnamese,Chinese,Spanish": 0.06818181818181818, "English,Vietnamese,Chinese,Malay": 0.07954545454545454, "English,Vietnamese,Indonesian,Filipino": 0.09659090909090909, "English,Vietnamese,Indonesian,Spanish": 0.10795454545454546, "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, "English,Vietnamese,Filipino,Spanish": 0.07954545454545454, "English,Vietnamese,Filipino,Malay": 0.10227272727272728, "English,Vietnamese,Spanish,Malay": 0.11363636363636363, "English,Chinese,Indonesian,Filipino": 0.14204545454545456, "English,Chinese,Indonesian,Spanish": 0.1534090909090909, "English,Chinese,Indonesian,Malay": 0.1590909090909091, "English,Chinese,Filipino,Spanish": 0.16477272727272727, "English,Chinese,Filipino,Malay": 0.125, "English,Chinese,Spanish,Malay": 0.1534090909090909, "English,Indonesian,Filipino,Spanish": 0.19886363636363635, "English,Indonesian,Filipino,Malay": 0.20454545454545456, "English,Indonesian,Spanish,Malay": 0.23295454545454544, "English,Filipino,Spanish,Malay": 0.21022727272727273, "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, "Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, "Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, "Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, "Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, "Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, "Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, "Vietnamese,Indonesian,Spanish,Malay": 0.20454545454545456, "Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, "Chinese,Indonesian,Filipino,Spanish": 0.23295454545454544, "Chinese,Indonesian,Filipino,Malay": 0.23295454545454544, "Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, "Chinese,Filipino,Spanish,Malay": 0.21022727272727273, "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, "English,Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, "English,Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, "English,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, "English,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, "English,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, "English,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 } }, "AC3_2": 0.35525108535621985, "AC3_3": 0.2708113304131119, "AC3_4": 0.2029719827640151, "AC3_5": 0.15143451896290575, "AC3_6": 0.11386775804586828, "AC3_7": 0.08711229943998833 }, "prompt_2": { "overall_acc": 0.30357142857142855, "language_acc": { "English": 0.3352272727272727, "Vietnamese": 0.29545454545454547, "Chinese": 0.3125, "Indonesian": 0.3125, "Filipino": 0.30113636363636365, "Spanish": 0.30113636363636365, "Malay": 0.26704545454545453 }, "consistency_score_2": 0.4626623376623378, "consistency_score_3": 0.26866883116883117, "consistency_score_4": 0.1699675324675325, "consistency_score_5": 0.11228354978354978, "consistency_score_6": 0.07548701298701299, "consistency_score_7": 0.05113636363636364, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.2897727272727273, "English,Chinese": 0.48295454545454547, "English,Indonesian": 0.45454545454545453, "English,Filipino": 0.5, "English,Spanish": 0.4943181818181818, "English,Malay": 0.42045454545454547, "Vietnamese,Chinese": 0.36363636363636365, "Vietnamese,Indonesian": 0.4659090909090909, "Vietnamese,Filipino": 0.3465909090909091, "Vietnamese,Spanish": 0.36363636363636365, "Vietnamese,Malay": 0.4943181818181818, "Chinese,Indonesian": 0.4375, "Chinese,Filipino": 0.5, "Chinese,Spanish": 0.5113636363636364, "Chinese,Malay": 0.48295454545454547, "Indonesian,Filipino": 0.5340909090909091, "Indonesian,Spanish": 0.5113636363636364, "Indonesian,Malay": 0.6534090909090909, "Filipino,Spanish": 0.44886363636363635, "Filipino,Malay": 0.5, "Spanish,Malay": 0.4602272727272727 }, "3_combine": { "English,Vietnamese,Chinese": 0.18181818181818182, "English,Vietnamese,Indonesian": 0.19886363636363635, "English,Vietnamese,Filipino": 0.14772727272727273, "English,Vietnamese,Spanish": 0.1875, "English,Vietnamese,Malay": 0.20454545454545456, "English,Chinese,Indonesian": 0.26704545454545453, "English,Chinese,Filipino": 0.29545454545454547, "English,Chinese,Spanish": 0.3125, "English,Chinese,Malay": 0.2556818181818182, "English,Indonesian,Filipino": 0.3125, "English,Indonesian,Spanish": 0.29545454545454547, "English,Indonesian,Malay": 0.32386363636363635, "English,Filipino,Spanish": 0.29545454545454547, "English,Filipino,Malay": 0.26136363636363635, "English,Spanish,Malay": 0.24431818181818182, "Vietnamese,Chinese,Indonesian": 0.25, "Vietnamese,Chinese,Filipino": 0.20454545454545456, "Vietnamese,Chinese,Spanish": 0.2159090909090909, "Vietnamese,Chinese,Malay": 0.24431818181818182, "Vietnamese,Indonesian,Filipino": 0.25, "Vietnamese,Indonesian,Spanish": 0.26704545454545453, "Vietnamese,Indonesian,Malay": 0.38636363636363635, "Vietnamese,Filipino,Spanish": 0.19318181818181818, "Vietnamese,Filipino,Malay": 0.25, "Vietnamese,Spanish,Malay": 0.25, "Chinese,Indonesian,Filipino": 0.2784090909090909, "Chinese,Indonesian,Spanish": 0.29545454545454547, "Chinese,Indonesian,Malay": 0.3409090909090909, "Chinese,Filipino,Spanish": 0.2784090909090909, "Chinese,Filipino,Malay": 0.2897727272727273, "Chinese,Spanish,Malay": 0.29545454545454547, "Indonesian,Filipino,Spanish": 0.3068181818181818, "Indonesian,Filipino,Malay": 0.39204545454545453, "Indonesian,Spanish,Malay": 0.36363636363636365, "Filipino,Spanish,Malay": 0.26704545454545453 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.14772727272727273, "English,Vietnamese,Chinese,Filipino": 0.09659090909090909, "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, "English,Vietnamese,Chinese,Malay": 0.14204545454545456, "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, "English,Vietnamese,Indonesian,Malay": 0.16477272727272727, "English,Vietnamese,Filipino,Spanish": 0.10795454545454546, "English,Vietnamese,Filipino,Malay": 0.10227272727272728, "English,Vietnamese,Spanish,Malay": 0.125, "English,Chinese,Indonesian,Filipino": 0.17613636363636365, "English,Chinese,Indonesian,Spanish": 0.19886363636363635, "English,Chinese,Indonesian,Malay": 0.21022727272727273, "English,Chinese,Filipino,Spanish": 0.19886363636363635, "English,Chinese,Filipino,Malay": 0.17045454545454544, "English,Chinese,Spanish,Malay": 0.17613636363636365, "English,Indonesian,Filipino,Spanish": 0.2159090909090909, "English,Indonesian,Filipino,Malay": 0.2215909090909091, "English,Indonesian,Spanish,Malay": 0.20454545454545456, "English,Filipino,Spanish,Malay": 0.17045454545454544, "Vietnamese,Chinese,Indonesian,Filipino": 0.14772727272727273, "Vietnamese,Chinese,Indonesian,Spanish": 0.16477272727272727, "Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, "Vietnamese,Chinese,Filipino,Spanish": 0.125, "Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, "Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, "Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, "Vietnamese,Indonesian,Filipino,Malay": 0.21022727272727273, "Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, "Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, "Chinese,Indonesian,Filipino,Malay": 0.2215909090909091, "Chinese,Indonesian,Spanish,Malay": 0.2215909090909091, "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, "English,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, "English,Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, "English,Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, "English,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, "English,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, "English,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, "English,Chinese,Filipino,Spanish,Malay": 0.125, "English,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 } }, "AC3_2": 0.36660108954053316, "AC3_3": 0.28505572436761256, "AC3_4": 0.2179220333542446, "AC3_5": 0.16393252157039442, "AC3_6": 0.12090853468820081, "AC3_7": 0.08752860409431687 }, "prompt_3": { "overall_acc": 0.3092532467532468, "language_acc": { "English": 0.3409090909090909, "Vietnamese": 0.3068181818181818, "Chinese": 0.2897727272727273, "Indonesian": 0.30113636363636365, "Filipino": 0.3068181818181818, "Spanish": 0.3181818181818182, "Malay": 0.30113636363636365 }, "consistency_score_2": 0.46212121212121204, "consistency_score_3": 0.26850649350649347, "consistency_score_4": 0.17159090909090907, "consistency_score_5": 0.11444805194805197, "consistency_score_6": 0.07873376623376624, "consistency_score_7": 0.056818181818181816, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.22727272727272727, "English,Chinese": 0.44886363636363635, "English,Indonesian": 0.4715909090909091, "English,Filipino": 0.42045454545454547, "English,Spanish": 0.4772727272727273, "English,Malay": 0.4034090909090909, "Vietnamese,Chinese": 0.3125, "Vietnamese,Indonesian": 0.5340909090909091, "Vietnamese,Filipino": 0.32954545454545453, "Vietnamese,Spanish": 0.36363636363636365, "Vietnamese,Malay": 0.5625, "Chinese,Indonesian": 0.4375, "Chinese,Filipino": 0.4659090909090909, "Chinese,Spanish": 0.45454545454545453, "Chinese,Malay": 0.4715909090909091, "Indonesian,Filipino": 0.5511363636363636, "Indonesian,Spanish": 0.5340909090909091, "Indonesian,Malay": 0.7386363636363636, "Filipino,Spanish": 0.4602272727272727, "Filipino,Malay": 0.5340909090909091, "Spanish,Malay": 0.5056818181818182 }, "3_combine": { "English,Vietnamese,Chinese": 0.11931818181818182, "English,Vietnamese,Indonesian": 0.19886363636363635, "English,Vietnamese,Filipino": 0.14204545454545456, "English,Vietnamese,Spanish": 0.14204545454545456, "English,Vietnamese,Malay": 0.19318181818181818, "English,Chinese,Indonesian": 0.25, "English,Chinese,Filipino": 0.23863636363636365, "English,Chinese,Spanish": 0.2784090909090909, "English,Chinese,Malay": 0.24431818181818182, "English,Indonesian,Filipino": 0.29545454545454547, "English,Indonesian,Spanish": 0.3068181818181818, "English,Indonesian,Malay": 0.3522727272727273, "English,Filipino,Spanish": 0.2556818181818182, "English,Filipino,Malay": 0.26704545454545453, "English,Spanish,Malay": 0.2897727272727273, "Vietnamese,Chinese,Indonesian": 0.23863636363636365, "Vietnamese,Chinese,Filipino": 0.17045454545454544, "Vietnamese,Chinese,Spanish": 0.1534090909090909, "Vietnamese,Chinese,Malay": 0.25, "Vietnamese,Indonesian,Filipino": 0.26136363636363635, "Vietnamese,Indonesian,Spanish": 0.2784090909090909, "Vietnamese,Indonesian,Malay": 0.45454545454545453, "Vietnamese,Filipino,Spanish": 0.17045454545454544, "Vietnamese,Filipino,Malay": 0.26704545454545453, "Vietnamese,Spanish,Malay": 0.26704545454545453, "Chinese,Indonesian,Filipino": 0.2727272727272727, "Chinese,Indonesian,Spanish": 0.2840909090909091, "Chinese,Indonesian,Malay": 0.36363636363636365, "Chinese,Filipino,Spanish": 0.2727272727272727, "Chinese,Filipino,Malay": 0.3068181818181818, "Chinese,Spanish,Malay": 0.30113636363636365, "Indonesian,Filipino,Spanish": 0.3181818181818182, "Indonesian,Filipino,Malay": 0.4318181818181818, "Indonesian,Spanish,Malay": 0.4318181818181818, "Filipino,Spanish,Malay": 0.32954545454545453 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.10795454545454546, "English,Vietnamese,Chinese,Filipino": 0.08522727272727272, "English,Vietnamese,Chinese,Spanish": 0.07954545454545454, "English,Vietnamese,Chinese,Malay": 0.10227272727272728, "English,Vietnamese,Indonesian,Filipino": 0.13068181818181818, "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, "English,Vietnamese,Filipino,Malay": 0.13068181818181818, "English,Vietnamese,Spanish,Malay": 0.125, "English,Chinese,Indonesian,Filipino": 0.17045454545454544, "English,Chinese,Indonesian,Spanish": 0.1875, "English,Chinese,Indonesian,Malay": 0.20454545454545456, "English,Chinese,Filipino,Spanish": 0.17613636363636365, "English,Chinese,Filipino,Malay": 0.18181818181818182, "English,Chinese,Spanish,Malay": 0.1875, "English,Indonesian,Filipino,Spanish": 0.1875, "English,Indonesian,Filipino,Malay": 0.22727272727272727, "English,Indonesian,Spanish,Malay": 0.2556818181818182, "English,Filipino,Spanish,Malay": 0.19886363636363635, "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, "Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, "Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, "Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, "Vietnamese,Indonesian,Filipino,Spanish": 0.1590909090909091, "Vietnamese,Indonesian,Filipino,Malay": 0.23863636363636365, "Vietnamese,Indonesian,Spanish,Malay": 0.24431818181818182, "Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, "Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, "Chinese,Indonesian,Filipino,Malay": 0.23295454545454544, "Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, "Chinese,Filipino,Spanish,Malay": 0.2159090909090909, "Indonesian,Filipino,Spanish,Malay": 0.2727272727272727 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, "English,Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, "English,Vietnamese,Chinese,Filipino,Spanish": 0.0625, "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, "English,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, "English,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, "English,Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, "English,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, "English,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 } }, "AC3_2": 0.3705398424314764, "AC3_3": 0.2874430289639436, "AC3_4": 0.22071619294409264, "AC3_5": 0.16706784590772575, "AC3_6": 0.12551282396373786, "AC3_7": 0.095998790540194 }, "prompt_4": { "overall_acc": 0.3173701298701298, "language_acc": { "English": 0.30113636363636365, "Vietnamese": 0.32954545454545453, "Chinese": 0.3181818181818182, "Indonesian": 0.2897727272727273, "Filipino": 0.30113636363636365, "Spanish": 0.32386363636363635, "Malay": 0.35795454545454547 }, "consistency_score_2": 0.43100649350649356, "consistency_score_3": 0.22759740259740266, "consistency_score_4": 0.1345779220779221, "consistency_score_5": 0.08603896103896104, "consistency_score_6": 0.05600649350649351, "consistency_score_7": 0.03409090909090909, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.39204545454545453, "English,Chinese": 0.4090909090909091, "English,Indonesian": 0.45454545454545453, "English,Filipino": 0.35795454545454547, "English,Spanish": 0.4715909090909091, "English,Malay": 0.42613636363636365, "Vietnamese,Chinese": 0.3522727272727273, "Vietnamese,Indonesian": 0.48863636363636365, "Vietnamese,Filipino": 0.38636363636363635, "Vietnamese,Spanish": 0.4772727272727273, "Vietnamese,Malay": 0.5113636363636364, "Chinese,Indonesian": 0.38636363636363635, "Chinese,Filipino": 0.4602272727272727, "Chinese,Spanish": 0.4090909090909091, "Chinese,Malay": 0.35795454545454547, "Indonesian,Filipino": 0.4375, "Indonesian,Spanish": 0.38636363636363635, "Indonesian,Malay": 0.5965909090909091, "Filipino,Spanish": 0.42613636363636365, "Filipino,Malay": 0.44886363636363635, "Spanish,Malay": 0.4147727272727273 }, "3_combine": { "English,Vietnamese,Chinese": 0.1875, "English,Vietnamese,Indonesian": 0.26704545454545453, "English,Vietnamese,Filipino": 0.19886363636363635, "English,Vietnamese,Spanish": 0.26136363636363635, "English,Vietnamese,Malay": 0.25, "English,Chinese,Indonesian": 0.2215909090909091, "English,Chinese,Filipino": 0.21022727272727273, "English,Chinese,Spanish": 0.22727272727272727, "English,Chinese,Malay": 0.21022727272727273, "English,Indonesian,Filipino": 0.2159090909090909, "English,Indonesian,Spanish": 0.24431818181818182, "English,Indonesian,Malay": 0.29545454545454547, "English,Filipino,Spanish": 0.19886363636363635, "English,Filipino,Malay": 0.21022727272727273, "English,Spanish,Malay": 0.23863636363636365, "Vietnamese,Chinese,Indonesian": 0.1875, "Vietnamese,Chinese,Filipino": 0.19318181818181818, "Vietnamese,Chinese,Spanish": 0.1875, "Vietnamese,Chinese,Malay": 0.19318181818181818, "Vietnamese,Indonesian,Filipino": 0.23295454545454544, "Vietnamese,Indonesian,Spanish": 0.26136363636363635, "Vietnamese,Indonesian,Malay": 0.3522727272727273, "Vietnamese,Filipino,Spanish": 0.22727272727272727, "Vietnamese,Filipino,Malay": 0.23863636363636365, "Vietnamese,Spanish,Malay": 0.2897727272727273, "Chinese,Indonesian,Filipino": 0.2215909090909091, "Chinese,Indonesian,Spanish": 0.17613636363636365, "Chinese,Indonesian,Malay": 0.23295454545454544, "Chinese,Filipino,Spanish": 0.2215909090909091, "Chinese,Filipino,Malay": 0.19886363636363635, "Chinese,Spanish,Malay": 0.17613636363636365, "Indonesian,Filipino,Spanish": 0.1875, "Indonesian,Filipino,Malay": 0.2840909090909091, "Indonesian,Spanish,Malay": 0.25, "Filipino,Spanish,Malay": 0.2159090909090909 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.13068181818181818, "English,Vietnamese,Chinese,Filipino": 0.125, "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, "English,Vietnamese,Chinese,Malay": 0.11931818181818182, "English,Vietnamese,Indonesian,Filipino": 0.13636363636363635, "English,Vietnamese,Indonesian,Spanish": 0.17045454545454544, "English,Vietnamese,Indonesian,Malay": 0.19886363636363635, "English,Vietnamese,Filipino,Spanish": 0.125, "English,Vietnamese,Filipino,Malay": 0.13068181818181818, "English,Vietnamese,Spanish,Malay": 0.1590909090909091, "English,Chinese,Indonesian,Filipino": 0.125, "English,Chinese,Indonesian,Spanish": 0.13068181818181818, "English,Chinese,Indonesian,Malay": 0.16477272727272727, "English,Chinese,Filipino,Spanish": 0.11363636363636363, "English,Chinese,Filipino,Malay": 0.11363636363636363, "English,Chinese,Spanish,Malay": 0.11931818181818182, "English,Indonesian,Filipino,Spanish": 0.11931818181818182, "English,Indonesian,Filipino,Malay": 0.14772727272727273, "English,Indonesian,Spanish,Malay": 0.1590909090909091, "English,Filipino,Spanish,Malay": 0.125, "Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, "Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, "Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, "Vietnamese,Chinese,Filipino,Spanish": 0.125, "Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, "Vietnamese,Chinese,Spanish,Malay": 0.11363636363636363, "Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, "Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, "Vietnamese,Indonesian,Spanish,Malay": 0.19318181818181818, "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, "Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, "Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, "Chinese,Indonesian,Spanish,Malay": 0.125, "Chinese,Filipino,Spanish,Malay": 0.10227272727272728, "Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.08522727272727272, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, "English,Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, "English,Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, "English,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, "English,Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, "English,Chinese,Filipino,Spanish,Malay": 0.0625, "English,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 } }, "AC3_2": 0.36556082199245205, "AC3_3": 0.2650896169176494, "AC3_4": 0.18900850402595049, "AC3_5": 0.13537719822090355, "AC3_6": 0.09521103893553895, "AC3_7": 0.06156833926444732 }, "prompt_5": { "overall_acc": 0.28733766233766234, "language_acc": { "English": 0.3068181818181818, "Vietnamese": 0.3125, "Chinese": 0.26704545454545453, "Indonesian": 0.30113636363636365, "Filipino": 0.2556818181818182, "Spanish": 0.2727272727272727, "Malay": 0.29545454545454547 }, "consistency_score_2": 0.44480519480519487, "consistency_score_3": 0.24626623376623377, "consistency_score_4": 0.15519480519480522, "consistency_score_5": 0.108495670995671, "consistency_score_6": 0.08279220779220778, "consistency_score_7": 0.06818181818181818, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.32386363636363635, "English,Chinese": 0.45454545454545453, "English,Indonesian": 0.4602272727272727, "English,Filipino": 0.39204545454545453, "English,Spanish": 0.4318181818181818, "English,Malay": 0.44886363636363635, "Vietnamese,Chinese": 0.35795454545454547, "Vietnamese,Indonesian": 0.4090909090909091, "Vietnamese,Filipino": 0.2840909090909091, "Vietnamese,Spanish": 0.3977272727272727, "Vietnamese,Malay": 0.4034090909090909, "Chinese,Indonesian": 0.5284090909090909, "Chinese,Filipino": 0.5056818181818182, "Chinese,Spanish": 0.5738636363636364, "Chinese,Malay": 0.4034090909090909, "Indonesian,Filipino": 0.5, "Indonesian,Spanish": 0.5227272727272727, "Indonesian,Malay": 0.5625, "Filipino,Spanish": 0.4943181818181818, "Filipino,Malay": 0.4375, "Spanish,Malay": 0.44886363636363635 }, "3_combine": { "English,Vietnamese,Chinese": 0.17045454545454544, "English,Vietnamese,Indonesian": 0.20454545454545456, "English,Vietnamese,Filipino": 0.13636363636363635, "English,Vietnamese,Spanish": 0.1875, "English,Vietnamese,Malay": 0.19318181818181818, "English,Chinese,Indonesian": 0.29545454545454547, "English,Chinese,Filipino": 0.26136363636363635, "English,Chinese,Spanish": 0.29545454545454547, "English,Chinese,Malay": 0.22727272727272727, "English,Indonesian,Filipino": 0.24431818181818182, "English,Indonesian,Spanish": 0.26704545454545453, "English,Indonesian,Malay": 0.2840909090909091, "English,Filipino,Spanish": 0.23295454545454544, "English,Filipino,Malay": 0.19886363636363635, "English,Spanish,Malay": 0.22727272727272727, "Vietnamese,Chinese,Indonesian": 0.23295454545454544, "Vietnamese,Chinese,Filipino": 0.1875, "Vietnamese,Chinese,Spanish": 0.24431818181818182, "Vietnamese,Chinese,Malay": 0.19886363636363635, "Vietnamese,Indonesian,Filipino": 0.2159090909090909, "Vietnamese,Indonesian,Spanish": 0.23863636363636365, "Vietnamese,Indonesian,Malay": 0.2727272727272727, "Vietnamese,Filipino,Spanish": 0.17045454545454544, "Vietnamese,Filipino,Malay": 0.1875, "Vietnamese,Spanish,Malay": 0.22727272727272727, "Chinese,Indonesian,Filipino": 0.32954545454545453, "Chinese,Indonesian,Spanish": 0.36363636363636365, "Chinese,Indonesian,Malay": 0.30113636363636365, "Chinese,Filipino,Spanish": 0.3409090909090909, "Chinese,Filipino,Malay": 0.23863636363636365, "Chinese,Spanish,Malay": 0.26136363636363635, "Indonesian,Filipino,Spanish": 0.3181818181818182, "Indonesian,Filipino,Malay": 0.30113636363636365, "Indonesian,Spanish,Malay": 0.3181818181818182, "Filipino,Spanish,Malay": 0.24431818181818182 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, "English,Vietnamese,Chinese,Filipino": 0.11363636363636363, "English,Vietnamese,Chinese,Spanish": 0.14204545454545456, "English,Vietnamese,Chinese,Malay": 0.10795454545454546, "English,Vietnamese,Indonesian,Filipino": 0.11931818181818182, "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, "English,Vietnamese,Indonesian,Malay": 0.13636363636363635, "English,Vietnamese,Filipino,Spanish": 0.10795454545454546, "English,Vietnamese,Filipino,Malay": 0.09659090909090909, "English,Vietnamese,Spanish,Malay": 0.11931818181818182, "English,Chinese,Indonesian,Filipino": 0.1875, "English,Chinese,Indonesian,Spanish": 0.20454545454545456, "English,Chinese,Indonesian,Malay": 0.19318181818181818, "English,Chinese,Filipino,Spanish": 0.1875, "English,Chinese,Filipino,Malay": 0.13636363636363635, "English,Chinese,Spanish,Malay": 0.1534090909090909, "English,Indonesian,Filipino,Spanish": 0.1590909090909091, "English,Indonesian,Filipino,Malay": 0.1534090909090909, "English,Indonesian,Spanish,Malay": 0.17613636363636365, "English,Filipino,Spanish,Malay": 0.13068181818181818, "Vietnamese,Chinese,Indonesian,Filipino": 0.1590909090909091, "Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, "Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, "Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, "Vietnamese,Chinese,Filipino,Malay": 0.13636363636363635, "Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, "Vietnamese,Indonesian,Spanish,Malay": 0.17045454545454544, "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, "Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, "Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, "Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, "Chinese,Filipino,Spanish,Malay": 0.17045454545454544, "Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, "English,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, "English,Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, "English,Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, "English,Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, "English,Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, "English,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, "English,Chinese,Indonesian,Filipino,Malay": 0.125, "English,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, "English,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, "English,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 } }, "AC3_2": 0.3491375586242397, "AC3_3": 0.2652213165102194, "AC3_4": 0.201536907654242, "AC3_5": 0.15751524616308105, "AC3_6": 0.12854579627422427, "AC3_7": 0.11021170607111686 } }, "sg_eval": { "prompt_1": { "accuracy": 0.3300970873786408 }, "prompt_2": { "accuracy": 0.27184466019417475 }, "prompt_3": { "accuracy": 0.22330097087378642 }, "prompt_4": { "accuracy": 0.2815533980582524 }, "prompt_5": { "accuracy": 0.3300970873786408 } }, "cn_eval": { "prompt_1": { "accuracy": 0.2571428571428571 }, "prompt_2": { "accuracy": 0.2571428571428571 }, "prompt_3": { "accuracy": 0.2571428571428571 }, "prompt_4": { "accuracy": 0.2571428571428571 }, "prompt_5": { "accuracy": 0.2761904761904762 } }, "us_eval": { "prompt_1": { "accuracy": 0.3925233644859813 }, "prompt_2": { "accuracy": 0.3364485981308411 }, "prompt_3": { "accuracy": 0.2897196261682243 }, "prompt_4": { "accuracy": 0.27102803738317754 }, "prompt_5": { "accuracy": 0.37383177570093457 } }, "ph_eval": { "prompt_1": { "accuracy": 0.36, "category_acc": { "brand": 0.2, "demographics": 0.4, "biology": 0.3, "history": 0.4, "literature": 0.3, "politics": 0.6, "culture": 0.3, "film": 0.3, "law": 0.5, "geography": 0.3 } }, "prompt_2": { "accuracy": 0.29, "category_acc": { "brand": 0.2, "demographics": 0.4, "biology": 0.0, "history": 0.26666666666666666, "literature": 0.3, "politics": 0.7, "culture": 0.3, "film": 0.3, "law": 0.2, "geography": 0.3 } }, "prompt_3": { "accuracy": 0.32, "category_acc": { "brand": 0.4, "demographics": 0.4, "biology": 0.1, "history": 0.2, "literature": 0.3, "politics": 0.7, "culture": 0.2, "film": 0.4, "law": 0.3, "geography": 0.3 } }, "prompt_4": { "accuracy": 0.33, "category_acc": { "brand": 0.3, "demographics": 0.2, "biology": 0.3, "history": 0.26666666666666666, "literature": 0.3, "politics": 0.7, "culture": 0.2, "film": 0.4, "law": 0.3, "geography": 0.3 } }, "prompt_5": { "accuracy": 0.32, "category_acc": { "brand": 0.2, "demographics": 0.4, "biology": 0.3, "history": 0.26666666666666666, "literature": 0.2, "politics": 0.5, "culture": 0.3, "film": 0.4, "law": 0.4, "geography": 0.3 } } }, "sing2eng": { "prompt_1": { "bleu_score": 0.06548239042166944 }, "prompt_2": { "bleu_score": 0.08859739904074258 }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": { "bleu_score": 0.09081980052265458 }, "prompt_2": { "bleu_score": 0.08990369723404892 }, "prompt_3": { "bleu_score": 0.07147675829443079 }, "prompt_4": { "bleu_score": 0.10530185202741413 }, "prompt_5": { "bleu_score": 0.08233748734434669 } }, "flores_vie2eng": { "prompt_1": { "bleu_score": 0.0756133901331998 }, "prompt_2": { "bleu_score": 0.07429706989745946 }, "prompt_3": { "bleu_score": 0.06335761424422617 }, "prompt_4": { "bleu_score": 0.08629075575051948 }, "prompt_5": { "bleu_score": 0.07114287465662482 } }, "flores_zho2eng": { "prompt_1": { "bleu_score": 0.05368708519318244 }, "prompt_2": { "bleu_score": 0.05671210457695124 }, "prompt_3": { "bleu_score": 0.046571853073635525 }, "prompt_4": { "bleu_score": 0.08039188090582502 }, "prompt_5": { "bleu_score": 0.05095075778676779 } }, "flores_zsm2eng": { "prompt_1": { "bleu_score": 0.09117383437868172 }, "prompt_2": { "bleu_score": 0.08757746309355022 }, "prompt_3": { "bleu_score": 0.06322565513222463 }, "prompt_4": { "bleu_score": 0.09460942924966294 }, "prompt_5": { "bleu_score": 0.0721033720675653 } }, "mmlu": { "prompt_1": { "accuracy": 0.3267211201866978 }, "prompt_2": { "accuracy": 0.3395565927654609 }, "prompt_3": { "accuracy": 0.3547257876312719 }, "prompt_4": { "accuracy": 0.35822637106184363 }, "prompt_5": { "accuracy": 0.34655775962660446 } }, "mmlu_full": { "prompt_1": { "accuracy": 0.306614229531641, "category_acc": { "high_school_european_history": 0.3170731707317073, "business_ethics": 0.4444444444444444, "clinical_knowledge": 0.36742424242424243, "medical_genetics": 0.40404040404040403, "high_school_us_history": 0.4039408866995074, "high_school_physics": 0.25333333333333335, "high_school_world_history": 0.3813559322033898, "virology": 0.2909090909090909, "high_school_microeconomics": 0.2911392405063291, "econometrics": 0.26548672566371684, "college_computer_science": 0.2828282828282828, "high_school_biology": 0.3300970873786408, "abstract_algebra": 0.24242424242424243, "professional_accounting": 0.28113879003558717, "philosophy": 0.25806451612903225, "professional_medicine": 0.2656826568265683, "nutrition": 0.3442622950819672, "global_facts": 0.18181818181818182, "machine_learning": 0.32432432432432434, "security_studies": 0.319672131147541, "public_relations": 0.25688073394495414, "professional_psychology": 0.33878887070376434, "prehistory": 0.29721362229102166, "anatomy": 0.291044776119403, "human_sexuality": 0.33076923076923076, "college_medicine": 0.27325581395348836, "high_school_government_and_politics": 0.4583333333333333, "college_chemistry": 0.29292929292929293, "logical_fallacies": 0.345679012345679, "high_school_geography": 0.38578680203045684, "elementary_mathematics": 0.20424403183023873, "human_aging": 0.36036036036036034, "college_mathematics": 0.25252525252525254, "high_school_psychology": 0.35294117647058826, "formal_logic": 0.32, "high_school_statistics": 0.19069767441860466, "international_law": 0.35, "high_school_mathematics": 0.21189591078066913, "high_school_computer_science": 0.32323232323232326, "conceptual_physics": 0.2948717948717949, "miscellaneous": 0.35294117647058826, "high_school_chemistry": 0.16336633663366337, "marketing": 0.48068669527896996, "professional_law": 0.2831050228310502, "management": 0.30392156862745096, "college_physics": 0.19801980198019803, "jurisprudence": 0.37383177570093457, "world_religions": 0.38235294117647056, "sociology": 0.325, "us_foreign_policy": 0.4444444444444444, "high_school_macroeconomics": 0.2622107969151671, "computer_security": 0.3838383838383838, "moral_scenarios": 0.24272930648769575, "moral_disputes": 0.3072463768115942, "electrical_engineering": 0.2638888888888889, "astronomy": 0.2847682119205298, "college_biology": 0.3356643356643357 } }, "prompt_2": { "accuracy": 0.3116195924204505, "category_acc": { "high_school_european_history": 0.4573170731707317, "business_ethics": 0.41414141414141414, "clinical_knowledge": 0.3560606060606061, "medical_genetics": 0.3939393939393939, "high_school_us_history": 0.39408866995073893, "high_school_physics": 0.2, "high_school_world_history": 0.4788135593220339, "virology": 0.28484848484848485, "high_school_microeconomics": 0.28270042194092826, "econometrics": 0.26548672566371684, "college_computer_science": 0.3434343434343434, "high_school_biology": 0.28802588996763756, "abstract_algebra": 0.20202020202020202, "professional_accounting": 0.27402135231316727, "philosophy": 0.2967741935483871, "professional_medicine": 0.28413284132841327, "nutrition": 0.29508196721311475, "global_facts": 0.2222222222222222, "machine_learning": 0.3063063063063063, "security_studies": 0.3073770491803279, "public_relations": 0.23853211009174313, "professional_psychology": 0.33387888707037644, "prehistory": 0.3219814241486068, "anatomy": 0.30597014925373134, "human_sexuality": 0.38461538461538464, "college_medicine": 0.32558139534883723, "high_school_government_and_politics": 0.3854166666666667, "college_chemistry": 0.2828282828282828, "logical_fallacies": 0.29012345679012347, "high_school_geography": 0.39593908629441626, "elementary_mathematics": 0.22015915119363394, "human_aging": 0.33783783783783783, "college_mathematics": 0.2222222222222222, "high_school_psychology": 0.32536764705882354, "formal_logic": 0.304, "high_school_statistics": 0.1813953488372093, "international_law": 0.4083333333333333, "high_school_mathematics": 0.241635687732342, "high_school_computer_science": 0.3333333333333333, "conceptual_physics": 0.2863247863247863, "miscellaneous": 0.29411764705882354, "high_school_chemistry": 0.2079207920792079, "marketing": 0.5150214592274678, "professional_law": 0.31898238747553814, "management": 0.2647058823529412, "college_physics": 0.21782178217821782, "jurisprudence": 0.35514018691588783, "world_religions": 0.37058823529411766, "sociology": 0.31, "us_foreign_policy": 0.5050505050505051, "high_school_macroeconomics": 0.30077120822622105, "computer_security": 0.32323232323232326, "moral_scenarios": 0.27069351230425054, "moral_disputes": 0.32463768115942027, "electrical_engineering": 0.2916666666666667, "astronomy": 0.2847682119205298, "college_biology": 0.3146853146853147 } }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": { "accuracy": 0.27191679049034173 }, "prompt_2": { "accuracy": 0.2771173848439822 }, "prompt_3": { "accuracy": 0.26448736998514116 }, "prompt_4": { "accuracy": 0.2704309063893016 }, "prompt_5": -1 }, "c_eval_full": { "prompt_1": { "accuracy": 0.2820672478206725, "category_acc": { "computer_network": 0.2916666666666667, "operating_system": 0.375, "computer_architecture": 0.34615384615384615, "college_programming": 0.23809523809523808, "college_physics": 0.16666666666666666, "college_chemistry": 0.1724137931034483, "advanced_mathematics": 0.3333333333333333, "probability_and_statistics": 0.2608695652173913, "discrete_mathematics": 0.23809523809523808, "electrical_engineer": 0.35714285714285715, "metrology_engineer": 0.2413793103448276, "high_school_mathematics": 0.30434782608695654, "high_school_physics": 0.3333333333333333, "high_school_chemistry": 0.125, "high_school_biology": 0.16666666666666666, "middle_school_mathematics": 0.25, "middle_school_biology": 0.3076923076923077, "middle_school_physics": 0.3333333333333333, "middle_school_chemistry": 0.32, "veterinary_medicine": 0.2857142857142857, "college_economics": 0.35, "business_administration": 0.39473684210526316, "marxism": 0.4166666666666667, "mao_zedong_thought": 0.27586206896551724, "education_science": 0.29411764705882354, "teacher_qualification": 0.2653061224489796, "high_school_politics": 0.3333333333333333, "high_school_geography": 0.4166666666666667, "middle_school_politics": 0.3076923076923077, "middle_school_geography": 0.11764705882352941, "modern_chinese_history": 0.25, "ideological_and_moral_cultivation": 0.2916666666666667, "logic": 0.3333333333333333, "law": 0.3103448275862069, "chinese_language_and_literature": 0.21428571428571427, "art_studies": 0.42105263157894735, "professional_tour_guide": 0.3235294117647059, "legal_professional": 0.17857142857142858, "high_school_chinese": 0.3333333333333333, "high_school_history": 0.32, "middle_school_history": 0.2962962962962963, "civil_servant": 0.1346153846153846, "sports_science": 0.2916666666666667, "plant_protection": 0.37037037037037035, "basic_medicine": 0.20833333333333334, "clinical_medicine": 0.18518518518518517, "urban_and_rural_planner": 0.2549019607843137, "accountant": 0.35185185185185186, "fire_engineer": 0.3333333333333333, "environmental_impact_assessment_engineer": 0.25, "tax_accountant": 0.18518518518518517, "physician": 0.2222222222222222 } }, "prompt_2": { "accuracy": 0.2683686176836862, "category_acc": { "computer_network": 0.2916666666666667, "operating_system": 0.2916666666666667, "computer_architecture": 0.34615384615384615, "college_programming": 0.35714285714285715, "college_physics": 0.2916666666666667, "college_chemistry": 0.20689655172413793, "advanced_mathematics": 0.2916666666666667, "probability_and_statistics": 0.2608695652173913, "discrete_mathematics": 0.23809523809523808, "electrical_engineer": 0.2619047619047619, "metrology_engineer": 0.20689655172413793, "high_school_mathematics": 0.391304347826087, "high_school_physics": 0.3333333333333333, "high_school_chemistry": 0.16666666666666666, "high_school_biology": 0.25, "middle_school_mathematics": 0.20833333333333334, "middle_school_biology": 0.19230769230769232, "middle_school_physics": 0.25, "middle_school_chemistry": 0.28, "veterinary_medicine": 0.25, "college_economics": 0.31666666666666665, "business_administration": 0.2894736842105263, "marxism": 0.25, "mao_zedong_thought": 0.2413793103448276, "education_science": 0.2647058823529412, "teacher_qualification": 0.2653061224489796, "high_school_politics": 0.2916666666666667, "high_school_geography": 0.2916666666666667, "middle_school_politics": 0.3076923076923077, "middle_school_geography": 0.23529411764705882, "modern_chinese_history": 0.25, "ideological_and_moral_cultivation": 0.25, "logic": 0.3333333333333333, "law": 0.3103448275862069, "chinese_language_and_literature": 0.17857142857142858, "art_studies": 0.39473684210526316, "professional_tour_guide": 0.3235294117647059, "legal_professional": 0.07142857142857142, "high_school_chinese": 0.2916666666666667, "high_school_history": 0.32, "middle_school_history": 0.2962962962962963, "civil_servant": 0.19230769230769232, "sports_science": 0.25, "plant_protection": 0.3333333333333333, "basic_medicine": 0.125, "clinical_medicine": 0.2222222222222222, "urban_and_rural_planner": 0.21568627450980393, "accountant": 0.3333333333333333, "fire_engineer": 0.2777777777777778, "environmental_impact_assessment_engineer": 0.2222222222222222, "tax_accountant": 0.2777777777777778, "physician": 0.25925925925925924 } }, "prompt_3": { "accuracy": 0.2646326276463263, "category_acc": { "computer_network": 0.20833333333333334, "operating_system": 0.2916666666666667, "computer_architecture": 0.3076923076923077, "college_programming": 0.19047619047619047, "college_physics": 0.20833333333333334, "college_chemistry": 0.20689655172413793, "advanced_mathematics": 0.2916666666666667, "probability_and_statistics": 0.2608695652173913, "discrete_mathematics": 0.14285714285714285, "electrical_engineer": 0.30952380952380953, "metrology_engineer": 0.2413793103448276, "high_school_mathematics": 0.34782608695652173, "high_school_physics": 0.2916666666666667, "high_school_chemistry": 0.16666666666666666, "high_school_biology": 0.20833333333333334, "middle_school_mathematics": 0.25, "middle_school_biology": 0.3076923076923077, "middle_school_physics": 0.2916666666666667, "middle_school_chemistry": 0.16, "veterinary_medicine": 0.2857142857142857, "college_economics": 0.36666666666666664, "business_administration": 0.2894736842105263, "marxism": 0.2916666666666667, "mao_zedong_thought": 0.3448275862068966, "education_science": 0.23529411764705882, "teacher_qualification": 0.24489795918367346, "high_school_politics": 0.3333333333333333, "high_school_geography": 0.375, "middle_school_politics": 0.3076923076923077, "middle_school_geography": 0.23529411764705882, "modern_chinese_history": 0.25, "ideological_and_moral_cultivation": 0.25, "logic": 0.25925925925925924, "law": 0.27586206896551724, "chinese_language_and_literature": 0.17857142857142858, "art_studies": 0.34210526315789475, "professional_tour_guide": 0.3235294117647059, "legal_professional": 0.10714285714285714, "high_school_chinese": 0.125, "high_school_history": 0.36, "middle_school_history": 0.3333333333333333, "civil_servant": 0.15384615384615385, "sports_science": 0.3333333333333333, "plant_protection": 0.2962962962962963, "basic_medicine": 0.375, "clinical_medicine": 0.18518518518518517, "urban_and_rural_planner": 0.21568627450980393, "accountant": 0.35185185185185186, "fire_engineer": 0.3055555555555556, "environmental_impact_assessment_engineer": 0.25, "tax_accountant": 0.18518518518518517, "physician": 0.2777777777777778 } }, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": { "accuracy": 0.3010752688172043 }, "prompt_2": { "accuracy": 0.26881720430107525 }, "prompt_3": { "accuracy": 0.3225806451612903 }, "prompt_4": { "accuracy": 0.2974910394265233 }, "prompt_5": { "accuracy": 0.2939068100358423 } }, "cmmlu_full": { "prompt_1": { "accuracy": 0.27283716111207046, "category_acc": { "agronomy": 0.25443786982248523, "anatomy": 0.22972972972972974, "ancient_chinese": 0.25609756097560976, "arts": 0.24375, "astronomy": 0.2727272727272727, "business_ethics": 0.2822966507177033, "chinese_civil_service_exam": 0.275, "chinese_driving_rule": 0.31297709923664124, "chinese_food_culture": 0.25, "chinese_foreign_policy": 0.2897196261682243, "chinese_history": 0.28792569659442724, "chinese_literature": 0.27450980392156865, "chinese_teacher_qualification": 0.2346368715083799, "clinical_knowledge": 0.27848101265822783, "college_actuarial_science": 0.22641509433962265, "college_education": 0.27102803738317754, "college_engineering_hydrology": 0.3018867924528302, "college_law": 0.24074074074074073, "college_mathematics": 0.22857142857142856, "college_medical_statistics": 0.2830188679245283, "college_medicine": 0.2564102564102564, "computer_science": 0.28921568627450983, "computer_security": 0.28654970760233917, "conceptual_physics": 0.3197278911564626, "construction_project_management": 0.34532374100719426, "economics": 0.27044025157232704, "education": 0.2822085889570552, "electrical_engineering": 0.29069767441860467, "elementary_chinese": 0.28174603174603174, "elementary_commonsense": 0.23737373737373738, "elementary_information_and_technology": 0.25210084033613445, "elementary_mathematics": 0.27391304347826084, "ethnology": 0.28888888888888886, "food_science": 0.27972027972027974, "genetics": 0.25, "global_facts": 0.28187919463087246, "high_school_biology": 0.1952662721893491, "high_school_chemistry": 0.26515151515151514, "high_school_geography": 0.2627118644067797, "high_school_mathematics": 0.2621951219512195, "high_school_physics": 0.2727272727272727, "high_school_politics": 0.18181818181818182, "human_sexuality": 0.30158730158730157, "international_law": 0.3027027027027027, "journalism": 0.29651162790697677, "jurisprudence": 0.291970802919708, "legal_and_moral_basis": 0.38317757009345793, "logical": 0.2764227642276423, "machine_learning": 0.2540983606557377, "management": 0.2571428571428571, "marketing": 0.29444444444444445, "marxist_theory": 0.291005291005291, "modern_chinese": 0.23275862068965517, "nutrition": 0.21379310344827587, "philosophy": 0.2761904761904762, "professional_accounting": 0.2857142857142857, "professional_law": 0.2559241706161137, "professional_medicine": 0.2553191489361702, "professional_psychology": 0.2672413793103448, "public_relations": 0.29310344827586204, "security_study": 0.362962962962963, "sociology": 0.26991150442477874, "sports_science": 0.2606060606060606, "traditional_chinese_medicine": 0.2594594594594595, "virology": 0.27218934911242604, "world_history": 0.2857142857142857, "world_religions": 0.26875 } }, "prompt_2": { "accuracy": 0.26368502849248837, "category_acc": { "agronomy": 0.2485207100591716, "anatomy": 0.2635135135135135, "ancient_chinese": 0.25609756097560976, "arts": 0.24375, "astronomy": 0.2545454545454545, "business_ethics": 0.27751196172248804, "chinese_civil_service_exam": 0.2375, "chinese_driving_rule": 0.3435114503816794, "chinese_food_culture": 0.23529411764705882, "chinese_foreign_policy": 0.2803738317757009, "chinese_history": 0.29411764705882354, "chinese_literature": 0.2549019607843137, "chinese_teacher_qualification": 0.2569832402234637, "clinical_knowledge": 0.29535864978902954, "college_actuarial_science": 0.2358490566037736, "college_education": 0.3177570093457944, "college_engineering_hydrology": 0.2641509433962264, "college_law": 0.26851851851851855, "college_mathematics": 0.23809523809523808, "college_medical_statistics": 0.2830188679245283, "college_medicine": 0.23443223443223443, "computer_science": 0.25980392156862747, "computer_security": 0.26900584795321636, "conceptual_physics": 0.35374149659863946, "construction_project_management": 0.2302158273381295, "economics": 0.24528301886792453, "education": 0.25153374233128833, "electrical_engineering": 0.27325581395348836, "elementary_chinese": 0.25396825396825395, "elementary_commonsense": 0.2222222222222222, "elementary_information_and_technology": 0.2689075630252101, "elementary_mathematics": 0.26956521739130435, "ethnology": 0.26666666666666666, "food_science": 0.2937062937062937, "genetics": 0.23295454545454544, "global_facts": 0.26174496644295303, "high_school_biology": 0.1952662721893491, "high_school_chemistry": 0.2803030303030303, "high_school_geography": 0.2711864406779661, "high_school_mathematics": 0.24390243902439024, "high_school_physics": 0.24545454545454545, "high_school_politics": 0.24475524475524477, "human_sexuality": 0.29365079365079366, "international_law": 0.2972972972972973, "journalism": 0.25, "jurisprudence": 0.26763990267639903, "legal_and_moral_basis": 0.35046728971962615, "logical": 0.25203252032520324, "machine_learning": 0.23770491803278687, "management": 0.24761904761904763, "marketing": 0.2833333333333333, "marxist_theory": 0.25925925925925924, "modern_chinese": 0.25862068965517243, "nutrition": 0.23448275862068965, "philosophy": 0.3142857142857143, "professional_accounting": 0.24, "professional_law": 0.26540284360189575, "professional_medicine": 0.2473404255319149, "professional_psychology": 0.24568965517241378, "public_relations": 0.27586206896551724, "security_study": 0.23703703703703705, "sociology": 0.27876106194690264, "sports_science": 0.2727272727272727, "traditional_chinese_medicine": 0.2648648648648649, "virology": 0.2958579881656805, "world_history": 0.22981366459627328, "world_religions": 0.2625 } }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": { "accuracy": 0.30303030303030304 }, "prompt_2": { "accuracy": 0.30303030303030304 }, "prompt_3": { "accuracy": 0.30303030303030304 }, "prompt_4": { "accuracy": 0.30303030303030304 }, "prompt_5": { "accuracy": 0.30303030303030304 } }, "ind_emotion": { "prompt_1": { "accuracy": 0.17045454545454544 }, "prompt_2": { "accuracy": 0.12954545454545455 }, "prompt_3": { "accuracy": 0.16590909090909092 }, "prompt_4": { "accuracy": 0.18409090909090908 }, "prompt_5": { "accuracy": 0.16363636363636364 } }, "ocnli": { "prompt_1": { "accuracy": 0.3206779661016949 }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": { "accuracy": 0.39827973074046374 }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": { "accuracy": 0.5164135227829495 }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": { "rouge1": 0.28118448681569536, "rouge2": 0.09331387602913904, "rougeL": 0.2138809568322139, "avg_rouge": 0.19612643989234943 }, "prompt_2": { "rouge1": 0.24552471302516946, "rouge2": 0.09095585006587337, "rougeL": 0.18942433566482253, "avg_rouge": 0.17530163291862177 }, "prompt_3": { "rouge1": 0.24743888885496398, "rouge2": 0.08474727575295327, "rougeL": 0.1887867551689575, "avg_rouge": 0.1736576399256249 }, "prompt_4": { "rouge1": 0.28671673455192875, "rouge2": 0.09321229706184096, "rougeL": 0.21608740008314425, "avg_rouge": 0.19867214389897134 }, "prompt_5": { "rouge1": 0.27268733909259185, "rouge2": 0.08940982363348364, "rougeL": 0.20594642927007592, "avg_rouge": 0.18934786399871714 } }, "dialogsum": { "prompt_1": { "rouge1": 0.2199240504646152, "rouge2": 0.06060248503052913, "rougeL": 0.16336037920498936, "avg_rouge": 0.14796230490004456 }, "prompt_2": { "rouge1": 0.22536703471066732, "rouge2": 0.06141649842395884, "rougeL": 0.16734155049930152, "avg_rouge": 0.15137502787797588 }, "prompt_3": { "rouge1": 0.2222788038678049, "rouge2": 0.06076147057263186, "rougeL": 0.16555015921222727, "avg_rouge": 0.149530144550888 }, "prompt_4": { "rouge1": 0.21505697288969472, "rouge2": 0.060482585415315926, "rougeL": 0.15943192555385252, "avg_rouge": 0.14499049461962107 }, "prompt_5": { "rouge1": 0.22239609469882782, "rouge2": 0.05979346403703034, "rougeL": 0.16366890653848695, "avg_rouge": 0.1486194884247817 } }, "sst2": { "prompt_1": { "accuracy": 0.4919724770642202 }, "prompt_2": { "accuracy": 0.5561926605504587 }, "prompt_3": { "accuracy": 0.5022935779816514 }, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": { "overall_acc": 0.3238095238095238, "language_acc": { "Malay": 0.28, "English": 0.32666666666666666, "Vietnamese": 0.28, "Spanish": 0.4266666666666667, "Indonesian": 0.32, "Filipino": 0.32666666666666666, "Chinese": 0.30666666666666664 }, "consistency_score_2": 0.36412698412698413, "consistency_score_3": 0.1561904761904762, "consistency_score_4": 0.07371428571428572, "consistency_score_5": 0.03873015873015873, "consistency_score_6": 0.024761904761904763, "consistency_score_7": 0.02, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.38666666666666666, "Malay,Vietnamese": 0.31333333333333335, "Malay,Spanish": 0.32, "Malay,Indonesian": 0.38, "Malay,Filipino": 0.3333333333333333, "Malay,Chinese": 0.34, "English,Vietnamese": 0.36666666666666664, "English,Spanish": 0.5, "English,Indonesian": 0.4533333333333333, "English,Filipino": 0.34, "English,Chinese": 0.4066666666666667, "Vietnamese,Spanish": 0.4066666666666667, "Vietnamese,Indonesian": 0.34, "Vietnamese,Filipino": 0.32, "Vietnamese,Chinese": 0.38, "Spanish,Indonesian": 0.36666666666666664, "Spanish,Filipino": 0.32666666666666666, "Spanish,Chinese": 0.35333333333333333, "Indonesian,Filipino": 0.34, "Indonesian,Chinese": 0.36, "Filipino,Chinese": 0.31333333333333335 }, "3_combine": { "Malay,English,Vietnamese": 0.14666666666666667, "Malay,English,Spanish": 0.2, "Malay,English,Indonesian": 0.18, "Malay,English,Filipino": 0.12666666666666668, "Malay,English,Chinese": 0.18, "Malay,Vietnamese,Spanish": 0.13333333333333333, "Malay,Vietnamese,Indonesian": 0.15333333333333332, "Malay,Vietnamese,Filipino": 0.10666666666666667, "Malay,Vietnamese,Chinese": 0.13333333333333333, "Malay,Spanish,Indonesian": 0.12666666666666668, "Malay,Spanish,Filipino": 0.10666666666666667, "Malay,Spanish,Chinese": 0.12666666666666668, "Malay,Indonesian,Filipino": 0.13333333333333333, "Malay,Indonesian,Chinese": 0.16666666666666666, "Malay,Filipino,Chinese": 0.12, "English,Vietnamese,Spanish": 0.22, "English,Vietnamese,Indonesian": 0.18666666666666668, "English,Vietnamese,Filipino": 0.12666666666666668, "English,Vietnamese,Chinese": 0.16666666666666666, "English,Spanish,Indonesian": 0.24, "English,Spanish,Filipino": 0.16666666666666666, "English,Spanish,Chinese": 0.22, "English,Indonesian,Filipino": 0.18, "English,Indonesian,Chinese": 0.2, "English,Filipino,Chinese": 0.13333333333333333, "Vietnamese,Spanish,Indonesian": 0.18666666666666668, "Vietnamese,Spanish,Filipino": 0.14666666666666667, "Vietnamese,Spanish,Chinese": 0.16666666666666666, "Vietnamese,Indonesian,Filipino": 0.12666666666666668, "Vietnamese,Indonesian,Chinese": 0.16, "Vietnamese,Filipino,Chinese": 0.12666666666666668, "Spanish,Indonesian,Filipino": 0.14666666666666667, "Spanish,Indonesian,Chinese": 0.18, "Spanish,Filipino,Chinese": 0.11333333333333333, "Indonesian,Filipino,Chinese": 0.13333333333333333 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.08, "Malay,English,Vietnamese,Indonesian": 0.08, "Malay,English,Vietnamese,Filipino": 0.06, "Malay,English,Vietnamese,Chinese": 0.06, "Malay,English,Spanish,Indonesian": 0.08666666666666667, "Malay,English,Spanish,Filipino": 0.06666666666666667, "Malay,English,Spanish,Chinese": 0.08666666666666667, "Malay,English,Indonesian,Filipino": 0.05333333333333334, "Malay,English,Indonesian,Chinese": 0.08666666666666667, "Malay,English,Filipino,Chinese": 0.06, "Malay,Vietnamese,Spanish,Indonesian": 0.06666666666666667, "Malay,Vietnamese,Spanish,Filipino": 0.05333333333333334, "Malay,Vietnamese,Spanish,Chinese": 0.06666666666666667, "Malay,Vietnamese,Indonesian,Filipino": 0.06, "Malay,Vietnamese,Indonesian,Chinese": 0.08666666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.06, "Malay,Spanish,Indonesian,Filipino": 0.04666666666666667, "Malay,Spanish,Indonesian,Chinese": 0.06, "Malay,Spanish,Filipino,Chinese": 0.04, "Malay,Indonesian,Filipino,Chinese": 0.06, "English,Vietnamese,Spanish,Indonesian": 0.12, "English,Vietnamese,Spanish,Filipino": 0.07333333333333333, "English,Vietnamese,Spanish,Chinese": 0.1, "English,Vietnamese,Indonesian,Filipino": 0.08, "English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, "English,Vietnamese,Filipino,Chinese": 0.06666666666666667, "English,Spanish,Indonesian,Filipino": 0.1, "English,Spanish,Indonesian,Chinese": 0.12666666666666668, "English,Spanish,Filipino,Chinese": 0.07333333333333333, "English,Indonesian,Filipino,Chinese": 0.06666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.08, "Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, "Vietnamese,Spanish,Filipino,Chinese": 0.06, "Vietnamese,Indonesian,Filipino,Chinese": 0.06, "Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.04, "Malay,English,Vietnamese,Spanish,Filipino": 0.04, "Malay,English,Vietnamese,Spanish,Chinese": 0.04, "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.04, "Malay,English,Vietnamese,Filipino,Chinese": 0.03333333333333333, "Malay,English,Spanish,Indonesian,Filipino": 0.02666666666666667, "Malay,English,Spanish,Indonesian,Chinese": 0.04, "Malay,English,Spanish,Filipino,Chinese": 0.03333333333333333, "Malay,English,Indonesian,Filipino,Chinese": 0.02666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.04, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, "English,Vietnamese,Spanish,Filipino,Chinese": 0.04, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, "English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.02666666666666667, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.02, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 } }, "AC3_2": 0.3427868239552699, "AC3_3": 0.21073318211785086, "AC3_4": 0.12009035523024628, "AC3_5": 0.06918522223093736, "AC3_6": 0.046005724681050915, "AC3_7": 0.03767313018294825 } }, "cross_logiqa": { "prompt_1": { "overall_acc": 0.3157467532467532, "language_acc": { "English": 0.3409090909090909, "Vietnamese": 0.29545454545454547, "Chinese": 0.3352272727272727, "Indonesian": 0.3068181818181818, "Filipino": 0.2784090909090909, "Spanish": 0.3181818181818182, "Malay": 0.3352272727272727 }, "consistency_score_2": 0.36390692640692646, "consistency_score_3": 0.1637987012987013, "consistency_score_4": 0.08230519480519481, "consistency_score_5": 0.04437229437229437, "consistency_score_6": 0.025974025974025976, "consistency_score_7": 0.017045454545454544, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.35795454545454547, "English,Chinese": 0.2784090909090909, "English,Indonesian": 0.4090909090909091, "English,Filipino": 0.3125, "English,Spanish": 0.4318181818181818, "English,Malay": 0.3522727272727273, "Vietnamese,Chinese": 0.29545454545454547, "Vietnamese,Indonesian": 0.42045454545454547, "Vietnamese,Filipino": 0.3465909090909091, "Vietnamese,Spanish": 0.3125, "Vietnamese,Malay": 0.38636363636363635, "Chinese,Indonesian": 0.36363636363636365, "Chinese,Filipino": 0.3352272727272727, "Chinese,Spanish": 0.32386363636363635, "Chinese,Malay": 0.36363636363636365, "Indonesian,Filipino": 0.39204545454545453, "Indonesian,Spanish": 0.4090909090909091, "Indonesian,Malay": 0.42045454545454547, "Filipino,Spanish": 0.35795454545454547, "Filipino,Malay": 0.4147727272727273, "Spanish,Malay": 0.35795454545454547 }, "3_combine": { "English,Vietnamese,Chinese": 0.09659090909090909, "English,Vietnamese,Indonesian": 0.2159090909090909, "English,Vietnamese,Filipino": 0.14204545454545456, "English,Vietnamese,Spanish": 0.17045454545454544, "English,Vietnamese,Malay": 0.18181818181818182, "English,Chinese,Indonesian": 0.14772727272727273, "English,Chinese,Filipino": 0.09659090909090909, "English,Chinese,Spanish": 0.13636363636363635, "English,Chinese,Malay": 0.13636363636363635, "English,Indonesian,Filipino": 0.17613636363636365, "English,Indonesian,Spanish": 0.23295454545454544, "English,Indonesian,Malay": 0.2215909090909091, "English,Filipino,Spanish": 0.1875, "English,Filipino,Malay": 0.16477272727272727, "English,Spanish,Malay": 0.19318181818181818, "Vietnamese,Chinese,Indonesian": 0.17045454545454544, "Vietnamese,Chinese,Filipino": 0.13068181818181818, "Vietnamese,Chinese,Spanish": 0.10227272727272728, "Vietnamese,Chinese,Malay": 0.13068181818181818, "Vietnamese,Indonesian,Filipino": 0.1875, "Vietnamese,Indonesian,Spanish": 0.18181818181818182, "Vietnamese,Indonesian,Malay": 0.22727272727272727, "Vietnamese,Filipino,Spanish": 0.125, "Vietnamese,Filipino,Malay": 0.1875, "Vietnamese,Spanish,Malay": 0.14204545454545456, "Chinese,Indonesian,Filipino": 0.14204545454545456, "Chinese,Indonesian,Spanish": 0.17045454545454544, "Chinese,Indonesian,Malay": 0.14772727272727273, "Chinese,Filipino,Spanish": 0.13636363636363635, "Chinese,Filipino,Malay": 0.17045454545454544, "Chinese,Spanish,Malay": 0.13068181818181818, "Indonesian,Filipino,Spanish": 0.18181818181818182, "Indonesian,Filipino,Malay": 0.19318181818181818, "Indonesian,Spanish,Malay": 0.19886363636363635, "Filipino,Spanish,Malay": 0.17613636363636365 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, "English,Vietnamese,Chinese,Filipino": 0.03409090909090909, "English,Vietnamese,Chinese,Spanish": 0.056818181818181816, "English,Vietnamese,Chinese,Malay": 0.056818181818181816, "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, "English,Vietnamese,Filipino,Spanish": 0.07954545454545454, "English,Vietnamese,Filipino,Malay": 0.09659090909090909, "English,Vietnamese,Spanish,Malay": 0.09659090909090909, "English,Chinese,Indonesian,Filipino": 0.056818181818181816, "English,Chinese,Indonesian,Spanish": 0.08522727272727272, "English,Chinese,Indonesian,Malay": 0.07386363636363637, "English,Chinese,Filipino,Spanish": 0.07954545454545454, "English,Chinese,Filipino,Malay": 0.0625, "English,Chinese,Spanish,Malay": 0.08522727272727272, "English,Indonesian,Filipino,Spanish": 0.11363636363636363, "English,Indonesian,Filipino,Malay": 0.11363636363636363, "English,Indonesian,Spanish,Malay": 0.13636363636363635, "English,Filipino,Spanish,Malay": 0.10227272727272728, "Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, "Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, "Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, "Vietnamese,Chinese,Filipino,Spanish": 0.03977272727272727, "Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, "Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, "Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, "Vietnamese,Indonesian,Filipino,Malay": 0.13068181818181818, "Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, "Vietnamese,Filipino,Spanish,Malay": 0.0625, "Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, "Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, "Chinese,Indonesian,Spanish,Malay": 0.0625, "Chinese,Filipino,Spanish,Malay": 0.06818181818181818, "Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, "English,Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, "English,Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, "English,Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, "English,Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, "English,Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, "English,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, "English,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, "English,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, "English,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, "English,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 } }, "AC3_2": 0.33812052790955294, "AC3_3": 0.21569971155012765, "AC3_4": 0.1305739020031706, "AC3_5": 0.07780986854708981, "AC3_6": 0.047999506417764064, "AC3_7": 0.032344789347265264 } }, "sg_eval": { "prompt_1": { "accuracy": 0.22330097087378642 } }, "cn_eval": { "prompt_1": { "accuracy": 0.26666666666666666 } }, "us_eval": { "prompt_1": { "accuracy": 0.3925233644859813 } }, "ph_eval": { "prompt_1": { "accuracy": 0.34, "category_acc": { "brand": 0.3, "demographics": 0.6, "biology": 0.2, "history": 0.26666666666666666, "literature": 0.1, "politics": 0.6, "culture": 0.3, "film": 0.5, "law": 0.3, "geography": 0.4 } } }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": { "bleu_score": 0.12061753243824382 } }, "flores_vie2eng": { "prompt_1": { "bleu_score": 0.11266537207817916 } }, "flores_zho2eng": { "prompt_1": { "bleu_score": 0.11647848956005542 } }, "flores_zsm2eng": { "prompt_1": { "bleu_score": 0.11570955404384092 } }, "mmlu": { "prompt_1": { "accuracy": 0.35822637106184363 } }, "mmlu_full": { "prompt_1": { "accuracy": 0.31583839828387555, "category_acc": { "high_school_european_history": 0.35365853658536583, "business_ethics": 0.3939393939393939, "clinical_knowledge": 0.3446969696969697, "medical_genetics": 0.42424242424242425, "high_school_us_history": 0.35960591133004927, "high_school_physics": 0.26, "high_school_world_history": 0.3940677966101695, "virology": 0.3212121212121212, "high_school_microeconomics": 0.3333333333333333, "econometrics": 0.22123893805309736, "college_computer_science": 0.24242424242424243, "high_school_biology": 0.34627831715210355, "abstract_algebra": 0.25252525252525254, "professional_accounting": 0.27402135231316727, "philosophy": 0.36451612903225805, "professional_medicine": 0.33948339483394835, "nutrition": 0.31475409836065577, "global_facts": 0.26262626262626265, "machine_learning": 0.32432432432432434, "security_studies": 0.29918032786885246, "public_relations": 0.25688073394495414, "professional_psychology": 0.32078559738134205, "prehistory": 0.3498452012383901, "anatomy": 0.29850746268656714, "human_sexuality": 0.2923076923076923, "college_medicine": 0.313953488372093, "high_school_government_and_politics": 0.421875, "college_chemistry": 0.30303030303030304, "logical_fallacies": 0.3271604938271605, "high_school_geography": 0.3248730964467005, "elementary_mathematics": 0.21220159151193635, "human_aging": 0.36036036036036034, "college_mathematics": 0.25252525252525254, "high_school_psychology": 0.38235294117647056, "formal_logic": 0.272, "high_school_statistics": 0.2651162790697674, "international_law": 0.4583333333333333, "high_school_mathematics": 0.2342007434944238, "high_school_computer_science": 0.31313131313131315, "conceptual_physics": 0.23931623931623933, "miscellaneous": 0.36061381074168797, "high_school_chemistry": 0.24257425742574257, "marketing": 0.38197424892703863, "professional_law": 0.2857142857142857, "management": 0.28431372549019607, "college_physics": 0.27722772277227725, "jurisprudence": 0.29906542056074764, "world_religions": 0.4411764705882353, "sociology": 0.445, "us_foreign_policy": 0.5252525252525253, "high_school_macroeconomics": 0.29562982005141386, "computer_security": 0.37373737373737376, "moral_scenarios": 0.23937360178970918, "moral_disputes": 0.3217391304347826, "electrical_engineering": 0.3055555555555556, "astronomy": 0.271523178807947, "college_biology": 0.3146853146853147 } } }, "c_eval": { "prompt_1": { "accuracy": 0.3060921248142645 } }, "c_eval_full": { "prompt_1": { "accuracy": 0.28144458281444584, "category_acc": { "computer_network": 0.25, "operating_system": 0.375, "computer_architecture": 0.15384615384615385, "college_programming": 0.40476190476190477, "college_physics": 0.125, "college_chemistry": 0.27586206896551724, "advanced_mathematics": 0.25, "probability_and_statistics": 0.21739130434782608, "discrete_mathematics": 0.3333333333333333, "electrical_engineer": 0.2857142857142857, "metrology_engineer": 0.20689655172413793, "high_school_mathematics": 0.17391304347826086, "high_school_physics": 0.25, "high_school_chemistry": 0.375, "high_school_biology": 0.16666666666666666, "middle_school_mathematics": 0.3333333333333333, "middle_school_biology": 0.23076923076923078, "middle_school_physics": 0.4166666666666667, "middle_school_chemistry": 0.28, "veterinary_medicine": 0.21428571428571427, "college_economics": 0.3, "business_administration": 0.2894736842105263, "marxism": 0.4166666666666667, "mao_zedong_thought": 0.3793103448275862, "education_science": 0.2647058823529412, "teacher_qualification": 0.2653061224489796, "high_school_politics": 0.16666666666666666, "high_school_geography": 0.3333333333333333, "middle_school_politics": 0.5769230769230769, "middle_school_geography": 0.11764705882352941, "modern_chinese_history": 0.21428571428571427, "ideological_and_moral_cultivation": 0.4583333333333333, "logic": 0.4444444444444444, "law": 0.2413793103448276, "chinese_language_and_literature": 0.35714285714285715, "art_studies": 0.3684210526315789, "professional_tour_guide": 0.3235294117647059, "legal_professional": 0.14285714285714285, "high_school_chinese": 0.20833333333333334, "high_school_history": 0.2, "middle_school_history": 0.3333333333333333, "civil_servant": 0.19230769230769232, "sports_science": 0.3333333333333333, "plant_protection": 0.48148148148148145, "basic_medicine": 0.25, "clinical_medicine": 0.2962962962962963, "urban_and_rural_planner": 0.2549019607843137, "accountant": 0.18518518518518517, "fire_engineer": 0.2777777777777778, "environmental_impact_assessment_engineer": 0.2777777777777778, "tax_accountant": 0.24074074074074073, "physician": 0.24074074074074073 } } }, "cmmlu": { "prompt_1": { "accuracy": 0.3010752688172043 } }, "cmmlu_full": { "prompt_1": { "accuracy": 0.2924365394577793, "category_acc": { "agronomy": 0.28402366863905326, "anatomy": 0.22972972972972974, "ancient_chinese": 0.23780487804878048, "arts": 0.3375, "astronomy": 0.28484848484848485, "business_ethics": 0.3588516746411483, "chinese_civil_service_exam": 0.25, "chinese_driving_rule": 0.3282442748091603, "chinese_food_culture": 0.3382352941176471, "chinese_foreign_policy": 0.2616822429906542, "chinese_history": 0.29102167182662536, "chinese_literature": 0.30392156862745096, "chinese_teacher_qualification": 0.29608938547486036, "clinical_knowledge": 0.24050632911392406, "college_actuarial_science": 0.18867924528301888, "college_education": 0.34579439252336447, "college_engineering_hydrology": 0.32075471698113206, "college_law": 0.2222222222222222, "college_mathematics": 0.21904761904761905, "college_medical_statistics": 0.3867924528301887, "college_medicine": 0.24175824175824176, "computer_science": 0.3088235294117647, "computer_security": 0.27485380116959063, "conceptual_physics": 0.24489795918367346, "construction_project_management": 0.3381294964028777, "economics": 0.3081761006289308, "education": 0.26380368098159507, "electrical_engineering": 0.313953488372093, "elementary_chinese": 0.2777777777777778, "elementary_commonsense": 0.30808080808080807, "elementary_information_and_technology": 0.38235294117647056, "elementary_mathematics": 0.27391304347826084, "ethnology": 0.2962962962962963, "food_science": 0.34265734265734266, "genetics": 0.2897727272727273, "global_facts": 0.2348993288590604, "high_school_biology": 0.24260355029585798, "high_school_chemistry": 0.25757575757575757, "high_school_geography": 0.2966101694915254, "high_school_mathematics": 0.1951219512195122, "high_school_physics": 0.32727272727272727, "high_school_politics": 0.2937062937062937, "human_sexuality": 0.2857142857142857, "international_law": 0.2864864864864865, "journalism": 0.36627906976744184, "jurisprudence": 0.2944038929440389, "legal_and_moral_basis": 0.40654205607476634, "logical": 0.22764227642276422, "machine_learning": 0.2459016393442623, "management": 0.28095238095238095, "marketing": 0.3277777777777778, "marxist_theory": 0.3386243386243386, "modern_chinese": 0.23275862068965517, "nutrition": 0.2620689655172414, "philosophy": 0.3142857142857143, "professional_accounting": 0.24571428571428572, "professional_law": 0.26540284360189575, "professional_medicine": 0.2845744680851064, "professional_psychology": 0.3103448275862069, "public_relations": 0.23563218390804597, "security_study": 0.2814814814814815, "sociology": 0.4026548672566372, "sports_science": 0.2727272727272727, "traditional_chinese_medicine": 0.23783783783783785, "virology": 0.3727810650887574, "world_history": 0.3167701863354037, "world_religions": 0.3375 } } }, "zbench": { "prompt_1": { "accuracy": 0.2727272727272727 } }, "ind_emotion": { "prompt_1": { "accuracy": 0.16136363636363638 } }, "ocnli": { "prompt_1": { "accuracy": 0.33932203389830506 } }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-2-7b-chat": { "model_size": "7B", "model_link": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "zero_shot": { "cross_mmlu": { "prompt_1": { "overall_acc": 0.3952380952380952, "language_acc": { "Malay": 0.3466666666666667, "English": 0.5533333333333333, "Vietnamese": 0.36666666666666664, "Spanish": 0.44666666666666666, "Indonesian": 0.32, "Filipino": 0.36, "Chinese": 0.37333333333333335 }, "consistency_score_2": 0.5076190476190476, "consistency_score_3": 0.3369523809523809, "consistency_score_4": 0.2552380952380952, "consistency_score_5": 0.20634920634920634, "consistency_score_6": 0.17238095238095238, "consistency_score_7": 0.14666666666666667, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.41333333333333333, "Malay,Vietnamese": 0.52, "Malay,Spanish": 0.43333333333333335, "Malay,Indonesian": 0.6066666666666667, "Malay,Filipino": 0.5266666666666666, "Malay,Chinese": 0.52, "English,Vietnamese": 0.46, "English,Spanish": 0.5466666666666666, "English,Indonesian": 0.44, "English,Filipino": 0.44666666666666666, "English,Chinese": 0.49333333333333335, "Vietnamese,Spanish": 0.5133333333333333, "Vietnamese,Indonesian": 0.6, "Vietnamese,Filipino": 0.47333333333333333, "Vietnamese,Chinese": 0.54, "Spanish,Indonesian": 0.56, "Spanish,Filipino": 0.5133333333333333, "Spanish,Chinese": 0.5, "Indonesian,Filipino": 0.5533333333333333, "Indonesian,Chinese": 0.56, "Filipino,Chinese": 0.44 }, "3_combine": { "Malay,English,Vietnamese": 0.2733333333333333, "Malay,English,Spanish": 0.2733333333333333, "Malay,English,Indonesian": 0.32666666666666666, "Malay,English,Filipino": 0.26666666666666666, "Malay,English,Chinese": 0.30666666666666664, "Malay,Vietnamese,Spanish": 0.32666666666666666, "Malay,Vietnamese,Indonesian": 0.41333333333333333, "Malay,Vietnamese,Filipino": 0.3333333333333333, "Malay,Vietnamese,Chinese": 0.36666666666666664, "Malay,Spanish,Indonesian": 0.36666666666666664, "Malay,Spanish,Filipino": 0.30666666666666664, "Malay,Spanish,Chinese": 0.32, "Malay,Indonesian,Filipino": 0.41333333333333333, "Malay,Indonesian,Chinese": 0.4066666666666667, "Malay,Filipino,Chinese": 0.31333333333333335, "English,Vietnamese,Spanish": 0.32666666666666666, "English,Vietnamese,Indonesian": 0.32, "English,Vietnamese,Filipino": 0.2733333333333333, "English,Vietnamese,Chinese": 0.30666666666666664, "English,Spanish,Indonesian": 0.34, "English,Spanish,Filipino": 0.32, "English,Spanish,Chinese": 0.32666666666666666, "English,Indonesian,Filipino": 0.31333333333333335, "English,Indonesian,Chinese": 0.32, "English,Filipino,Chinese": 0.2866666666666667, "Vietnamese,Spanish,Indonesian": 0.3933333333333333, "Vietnamese,Spanish,Filipino": 0.3333333333333333, "Vietnamese,Spanish,Chinese": 0.3466666666666667, "Vietnamese,Indonesian,Filipino": 0.3933333333333333, "Vietnamese,Indonesian,Chinese": 0.41333333333333333, "Vietnamese,Filipino,Chinese": 0.3333333333333333, "Spanish,Indonesian,Filipino": 0.36, "Spanish,Indonesian,Chinese": 0.3933333333333333, "Spanish,Filipino,Chinese": 0.31333333333333335, "Indonesian,Filipino,Chinese": 0.36666666666666664 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.22, "Malay,English,Vietnamese,Indonesian": 0.24, "Malay,English,Vietnamese,Filipino": 0.20666666666666667, "Malay,English,Vietnamese,Chinese": 0.22666666666666666, "Malay,English,Spanish,Indonesian": 0.24666666666666667, "Malay,English,Spanish,Filipino": 0.2, "Malay,English,Spanish,Chinese": 0.24, "Malay,English,Indonesian,Filipino": 0.24666666666666667, "Malay,English,Indonesian,Chinese": 0.26, "Malay,English,Filipino,Chinese": 0.20666666666666667, "Malay,Vietnamese,Spanish,Indonesian": 0.29333333333333333, "Malay,Vietnamese,Spanish,Filipino": 0.25333333333333335, "Malay,Vietnamese,Spanish,Chinese": 0.26, "Malay,Vietnamese,Indonesian,Filipino": 0.30666666666666664, "Malay,Vietnamese,Indonesian,Chinese": 0.32666666666666666, "Malay,Vietnamese,Filipino,Chinese": 0.26, "Malay,Spanish,Indonesian,Filipino": 0.2733333333333333, "Malay,Spanish,Indonesian,Chinese": 0.29333333333333333, "Malay,Spanish,Filipino,Chinese": 0.22666666666666666, "Malay,Indonesian,Filipino,Chinese": 0.3, "English,Vietnamese,Spanish,Indonesian": 0.26, "English,Vietnamese,Spanish,Filipino": 0.22, "English,Vietnamese,Spanish,Chinese": 0.23333333333333334, "English,Vietnamese,Indonesian,Filipino": 0.24, "English,Vietnamese,Indonesian,Chinese": 0.24666666666666667, "English,Vietnamese,Filipino,Chinese": 0.22, "English,Spanish,Indonesian,Filipino": 0.25333333333333335, "English,Spanish,Indonesian,Chinese": 0.26666666666666666, "English,Spanish,Filipino,Chinese": 0.22666666666666666, "English,Indonesian,Filipino,Chinese": 0.24666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, "Vietnamese,Spanish,Indonesian,Chinese": 0.3, "Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, "Vietnamese,Indonesian,Filipino,Chinese": 0.31333333333333335, "Spanish,Indonesian,Filipino,Chinese": 0.28 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.2, "Malay,English,Vietnamese,Spanish,Filipino": 0.17333333333333334, "Malay,English,Vietnamese,Spanish,Chinese": 0.19333333333333333, "Malay,English,Vietnamese,Indonesian,Filipino": 0.2, "Malay,English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, "Malay,English,Vietnamese,Filipino,Chinese": 0.17333333333333334, "Malay,English,Spanish,Indonesian,Filipino": 0.19333333333333333, "Malay,English,Spanish,Indonesian,Chinese": 0.22, "Malay,English,Spanish,Filipino,Chinese": 0.16666666666666666, "Malay,English,Indonesian,Filipino,Chinese": 0.2, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.2, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.20666666666666667, "English,Vietnamese,Spanish,Filipino,Chinese": 0.18, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, "English,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 } }, "AC3_2": 0.4444343981826413, "AC3_3": 0.363775333184547, "AC3_4": 0.3101722093924611, "AC3_5": 0.27113959035573343, "AC3_6": 0.2400607222331948, "AC3_7": 0.21394258929854018 }, "prompt_2": { "overall_acc": 0.39333333333333337, "language_acc": { "Malay": 0.3333333333333333, "English": 0.5133333333333333, "Vietnamese": 0.4533333333333333, "Spanish": 0.41333333333333333, "Indonesian": 0.36666666666666664, "Filipino": 0.3, "Chinese": 0.37333333333333335 }, "consistency_score_2": 0.4517460317460319, "consistency_score_3": 0.27314285714285713, "consistency_score_4": 0.19142857142857145, "consistency_score_5": 0.14476190476190473, "consistency_score_6": 0.1142857142857143, "consistency_score_7": 0.09333333333333334, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.41333333333333333, "Malay,Vietnamese": 0.47333333333333333, "Malay,Spanish": 0.47333333333333333, "Malay,Indonesian": 0.4866666666666667, "Malay,Filipino": 0.4866666666666667, "Malay,Chinese": 0.4266666666666667, "English,Vietnamese": 0.4666666666666667, "English,Spanish": 0.46, "English,Indonesian": 0.4533333333333333, "English,Filipino": 0.4, "English,Chinese": 0.37333333333333335, "Vietnamese,Spanish": 0.4666666666666667, "Vietnamese,Indonesian": 0.49333333333333335, "Vietnamese,Filipino": 0.41333333333333333, "Vietnamese,Chinese": 0.48, "Spanish,Indonesian": 0.5, "Spanish,Filipino": 0.4066666666666667, "Spanish,Chinese": 0.46, "Indonesian,Filipino": 0.44666666666666666, "Indonesian,Chinese": 0.46, "Filipino,Chinese": 0.44666666666666666 }, "3_combine": { "Malay,English,Vietnamese": 0.2733333333333333, "Malay,English,Spanish": 0.26, "Malay,English,Indonesian": 0.2733333333333333, "Malay,English,Filipino": 0.23333333333333334, "Malay,English,Chinese": 0.22666666666666666, "Malay,Vietnamese,Spanish": 0.3333333333333333, "Malay,Vietnamese,Indonesian": 0.32, "Malay,Vietnamese,Filipino": 0.26666666666666666, "Malay,Vietnamese,Chinese": 0.29333333333333333, "Malay,Spanish,Indonesian": 0.32, "Malay,Spanish,Filipino": 0.28, "Malay,Spanish,Chinese": 0.29333333333333333, "Malay,Indonesian,Filipino": 0.2733333333333333, "Malay,Indonesian,Chinese": 0.28, "Malay,Filipino,Chinese": 0.26666666666666666, "English,Vietnamese,Spanish": 0.2866666666666667, "English,Vietnamese,Indonesian": 0.31333333333333335, "English,Vietnamese,Filipino": 0.24666666666666667, "English,Vietnamese,Chinese": 0.26, "English,Spanish,Indonesian": 0.2866666666666667, "English,Spanish,Filipino": 0.21333333333333335, "English,Spanish,Chinese": 0.23333333333333334, "English,Indonesian,Filipino": 0.25333333333333335, "English,Indonesian,Chinese": 0.24666666666666667, "English,Filipino,Chinese": 0.21333333333333335, "Vietnamese,Spanish,Indonesian": 0.31333333333333335, "Vietnamese,Spanish,Filipino": 0.26, "Vietnamese,Spanish,Chinese": 0.30666666666666664, "Vietnamese,Indonesian,Filipino": 0.2866666666666667, "Vietnamese,Indonesian,Chinese": 0.31333333333333335, "Vietnamese,Filipino,Chinese": 0.28, "Spanish,Indonesian,Filipino": 0.24666666666666667, "Spanish,Indonesian,Chinese": 0.2733333333333333, "Spanish,Filipino,Chinese": 0.26666666666666666, "Indonesian,Filipino,Chinese": 0.26666666666666666 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.22, "Malay,English,Vietnamese,Indonesian": 0.22666666666666666, "Malay,English,Vietnamese,Filipino": 0.16666666666666666, "Malay,English,Vietnamese,Chinese": 0.18666666666666668, "Malay,English,Spanish,Indonesian": 0.20666666666666667, "Malay,English,Spanish,Filipino": 0.16, "Malay,English,Spanish,Chinese": 0.18666666666666668, "Malay,English,Indonesian,Filipino": 0.16, "Malay,English,Indonesian,Chinese": 0.18666666666666668, "Malay,English,Filipino,Chinese": 0.14, "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, "Malay,Vietnamese,Spanish,Filipino": 0.22, "Malay,Vietnamese,Spanish,Chinese": 0.23333333333333334, "Malay,Vietnamese,Indonesian,Filipino": 0.19333333333333333, "Malay,Vietnamese,Indonesian,Chinese": 0.22, "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, "Malay,Spanish,Indonesian,Filipino": 0.2, "Malay,Spanish,Indonesian,Chinese": 0.21333333333333335, "Malay,Spanish,Filipino,Chinese": 0.2, "Malay,Indonesian,Filipino,Chinese": 0.18666666666666668, "English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, "English,Vietnamese,Spanish,Filipino": 0.16666666666666666, "English,Vietnamese,Spanish,Chinese": 0.19333333333333333, "English,Vietnamese,Indonesian,Filipino": 0.19333333333333333, "English,Vietnamese,Indonesian,Chinese": 0.2, "English,Vietnamese,Filipino,Chinese": 0.16666666666666666, "English,Spanish,Indonesian,Filipino": 0.16, "English,Spanish,Indonesian,Chinese": 0.18, "English,Spanish,Filipino,Chinese": 0.14666666666666667, "English,Indonesian,Filipino,Chinese": 0.14666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, "Vietnamese,Spanish,Indonesian,Chinese": 0.22, "Vietnamese,Spanish,Filipino,Chinese": 0.2, "Vietnamese,Indonesian,Filipino,Chinese": 0.2, "Spanish,Indonesian,Filipino,Chinese": 0.18 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, "Malay,English,Vietnamese,Spanish,Filipino": 0.14666666666666667, "Malay,English,Vietnamese,Spanish,Chinese": 0.16666666666666666, "Malay,English,Vietnamese,Indonesian,Filipino": 0.14, "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, "Malay,English,Vietnamese,Filipino,Chinese": 0.12, "Malay,English,Spanish,Indonesian,Filipino": 0.13333333333333333, "Malay,English,Spanish,Indonesian,Chinese": 0.15333333333333332, "Malay,English,Spanish,Filipino,Chinese": 0.12, "Malay,English,Indonesian,Filipino,Chinese": 0.11333333333333333, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.18, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.14, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, "English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, "English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.1, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 } }, "AC3_2": 0.4205209115455307, "AC3_3": 0.32240068586188614, "AC3_4": 0.25752442992338753, "AC3_5": 0.21163421824975528, "AC3_6": 0.17711069414897446, "AC3_7": 0.15086757987767563 }, "prompt_3": { "overall_acc": 0.38761904761904764, "language_acc": { "Malay": 0.31333333333333335, "English": 0.4533333333333333, "Vietnamese": 0.38, "Spanish": 0.41333333333333333, "Indonesian": 0.41333333333333333, "Filipino": 0.37333333333333335, "Chinese": 0.36666666666666664 }, "consistency_score_2": 0.43396825396825406, "consistency_score_3": 0.24704761904761904, "consistency_score_4": 0.1603809523809524, "consistency_score_5": 0.11047619047619049, "consistency_score_6": 0.07904761904761905, "consistency_score_7": 0.06, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.36, "Malay,Vietnamese": 0.43333333333333335, "Malay,Spanish": 0.37333333333333335, "Malay,Indonesian": 0.5066666666666667, "Malay,Filipino": 0.4, "Malay,Chinese": 0.42, "English,Vietnamese": 0.42, "English,Spanish": 0.41333333333333333, "English,Indonesian": 0.38666666666666666, "English,Filipino": 0.4266666666666667, "English,Chinese": 0.41333333333333333, "Vietnamese,Spanish": 0.5, "Vietnamese,Indonesian": 0.48, "Vietnamese,Filipino": 0.43333333333333335, "Vietnamese,Chinese": 0.44, "Spanish,Indonesian": 0.44, "Spanish,Filipino": 0.4533333333333333, "Spanish,Chinese": 0.42, "Indonesian,Filipino": 0.46, "Indonesian,Chinese": 0.44, "Filipino,Chinese": 0.49333333333333335 }, "3_combine": { "Malay,English,Vietnamese": 0.20666666666666667, "Malay,English,Spanish": 0.17333333333333334, "Malay,English,Indonesian": 0.21333333333333335, "Malay,English,Filipino": 0.21333333333333335, "Malay,English,Chinese": 0.2, "Malay,Vietnamese,Spanish": 0.25333333333333335, "Malay,Vietnamese,Indonesian": 0.29333333333333333, "Malay,Vietnamese,Filipino": 0.26, "Malay,Vietnamese,Chinese": 0.25333333333333335, "Malay,Spanish,Indonesian": 0.23333333333333334, "Malay,Spanish,Filipino": 0.21333333333333335, "Malay,Spanish,Chinese": 0.20666666666666667, "Malay,Indonesian,Filipino": 0.26666666666666666, "Malay,Indonesian,Chinese": 0.2733333333333333, "Malay,Filipino,Chinese": 0.25333333333333335, "English,Vietnamese,Spanish": 0.24666666666666667, "English,Vietnamese,Indonesian": 0.22666666666666666, "English,Vietnamese,Filipino": 0.23333333333333334, "English,Vietnamese,Chinese": 0.23333333333333334, "English,Spanish,Indonesian": 0.22666666666666666, "English,Spanish,Filipino": 0.24666666666666667, "English,Spanish,Chinese": 0.22, "English,Indonesian,Filipino": 0.24666666666666667, "English,Indonesian,Chinese": 0.22666666666666666, "English,Filipino,Chinese": 0.26666666666666666, "Vietnamese,Spanish,Indonesian": 0.3, "Vietnamese,Spanish,Filipino": 0.29333333333333333, "Vietnamese,Spanish,Chinese": 0.28, "Vietnamese,Indonesian,Filipino": 0.29333333333333333, "Vietnamese,Indonesian,Chinese": 0.2866666666666667, "Vietnamese,Filipino,Chinese": 0.2733333333333333, "Spanish,Indonesian,Filipino": 0.28, "Spanish,Indonesian,Chinese": 0.24, "Spanish,Filipino,Chinese": 0.24666666666666667, "Indonesian,Filipino,Chinese": 0.26666666666666666 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.12666666666666668, "Malay,English,Vietnamese,Indonesian": 0.14666666666666667, "Malay,English,Vietnamese,Filipino": 0.14666666666666667, "Malay,English,Vietnamese,Chinese": 0.14, "Malay,English,Spanish,Indonesian": 0.12, "Malay,English,Spanish,Filipino": 0.13333333333333333, "Malay,English,Spanish,Chinese": 0.1, "Malay,English,Indonesian,Filipino": 0.14666666666666667, "Malay,English,Indonesian,Chinese": 0.14, "Malay,English,Filipino,Chinese": 0.16666666666666666, "Malay,Vietnamese,Spanish,Indonesian": 0.18, "Malay,Vietnamese,Spanish,Filipino": 0.17333333333333334, "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, "Malay,Vietnamese,Indonesian,Filipino": 0.18666666666666668, "Malay,Vietnamese,Indonesian,Chinese": 0.2, "Malay,Vietnamese,Filipino,Chinese": 0.18, "Malay,Spanish,Indonesian,Filipino": 0.16666666666666666, "Malay,Spanish,Indonesian,Chinese": 0.14666666666666667, "Malay,Spanish,Filipino,Chinese": 0.14, "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, "English,Vietnamese,Spanish,Indonesian": 0.16, "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, "English,Vietnamese,Spanish,Chinese": 0.16666666666666666, "English,Vietnamese,Indonesian,Filipino": 0.16, "English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, "English,Vietnamese,Filipino,Chinese": 0.17333333333333334, "English,Spanish,Indonesian,Filipino": 0.16666666666666666, "English,Spanish,Indonesian,Chinese": 0.13333333333333333, "English,Spanish,Filipino,Chinese": 0.16, "English,Indonesian,Filipino,Chinese": 0.16, "Vietnamese,Spanish,Indonesian,Filipino": 0.22, "Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, "Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, "Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, "Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.09333333333333334, "Malay,English,Vietnamese,Spanish,Filipino": 0.10666666666666667, "Malay,English,Vietnamese,Spanish,Chinese": 0.08666666666666667, "Malay,English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, "Malay,English,Vietnamese,Indonesian,Chinese": 0.1, "Malay,English,Vietnamese,Filipino,Chinese": 0.12, "Malay,English,Spanish,Indonesian,Filipino": 0.10666666666666667, "Malay,English,Spanish,Indonesian,Chinese": 0.07333333333333333, "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, "Malay,English,Indonesian,Filipino,Chinese": 0.10666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.1, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, "English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, "English,Spanish,Indonesian,Filipino,Chinese": 0.1, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.08, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 } }, "AC3_2": 0.40948627359408357, "AC3_3": 0.3017658491492744, "AC3_4": 0.22688581024378213, "AC3_5": 0.1719457342821506, "AC3_6": 0.13131584059382395, "AC3_7": 0.10391489359380625 }, "prompt_4": { "overall_acc": 0.38, "language_acc": { "Malay": 0.32666666666666666, "English": 0.5466666666666666, "Vietnamese": 0.36666666666666664, "Spanish": 0.3933333333333333, "Indonesian": 0.32, "Filipino": 0.35333333333333333, "Chinese": 0.35333333333333333 }, "consistency_score_2": 0.47428571428571425, "consistency_score_3": 0.29219047619047617, "consistency_score_4": 0.2076190476190476, "consistency_score_5": 0.15809523809523812, "consistency_score_6": 0.12476190476190477, "consistency_score_7": 0.1, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.46, "Malay,Vietnamese": 0.49333333333333335, "Malay,Spanish": 0.43333333333333335, "Malay,Indonesian": 0.5533333333333333, "Malay,Filipino": 0.5, "Malay,Chinese": 0.5533333333333333, "English,Vietnamese": 0.4533333333333333, "English,Spanish": 0.47333333333333333, "English,Indonesian": 0.38666666666666666, "English,Filipino": 0.4666666666666667, "English,Chinese": 0.4266666666666667, "Vietnamese,Spanish": 0.4266666666666667, "Vietnamese,Indonesian": 0.47333333333333333, "Vietnamese,Filipino": 0.48, "Vietnamese,Chinese": 0.4866666666666667, "Spanish,Indonesian": 0.52, "Spanish,Filipino": 0.44, "Spanish,Chinese": 0.47333333333333333, "Indonesian,Filipino": 0.5133333333333333, "Indonesian,Chinese": 0.4666666666666667, "Filipino,Chinese": 0.48 }, "3_combine": { "Malay,English,Vietnamese": 0.2866666666666667, "Malay,English,Spanish": 0.26666666666666666, "Malay,English,Indonesian": 0.29333333333333333, "Malay,English,Filipino": 0.29333333333333333, "Malay,English,Chinese": 0.3, "Malay,Vietnamese,Spanish": 0.26666666666666666, "Malay,Vietnamese,Indonesian": 0.34, "Malay,Vietnamese,Filipino": 0.30666666666666664, "Malay,Vietnamese,Chinese": 0.32, "Malay,Spanish,Indonesian": 0.30666666666666664, "Malay,Spanish,Filipino": 0.26666666666666666, "Malay,Spanish,Chinese": 0.32666666666666666, "Malay,Indonesian,Filipino": 0.3466666666666667, "Malay,Indonesian,Chinese": 0.36666666666666664, "Malay,Filipino,Chinese": 0.34, "English,Vietnamese,Spanish": 0.25333333333333335, "English,Vietnamese,Indonesian": 0.2733333333333333, "English,Vietnamese,Filipino": 0.29333333333333333, "English,Vietnamese,Chinese": 0.2733333333333333, "English,Spanish,Indonesian": 0.26, "English,Spanish,Filipino": 0.2733333333333333, "English,Spanish,Chinese": 0.28, "English,Indonesian,Filipino": 0.28, "English,Indonesian,Chinese": 0.24, "English,Filipino,Chinese": 0.2733333333333333, "Vietnamese,Spanish,Indonesian": 0.29333333333333333, "Vietnamese,Spanish,Filipino": 0.24666666666666667, "Vietnamese,Spanish,Chinese": 0.2733333333333333, "Vietnamese,Indonesian,Filipino": 0.31333333333333335, "Vietnamese,Indonesian,Chinese": 0.2866666666666667, "Vietnamese,Filipino,Chinese": 0.3, "Spanish,Indonesian,Filipino": 0.30666666666666664, "Spanish,Indonesian,Chinese": 0.30666666666666664, "Spanish,Filipino,Chinese": 0.2866666666666667, "Indonesian,Filipino,Chinese": 0.2866666666666667 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.18, "Malay,English,Vietnamese,Indonesian": 0.23333333333333334, "Malay,English,Vietnamese,Filipino": 0.20666666666666667, "Malay,English,Vietnamese,Chinese": 0.21333333333333335, "Malay,English,Spanish,Indonesian": 0.2, "Malay,English,Spanish,Filipino": 0.18666666666666668, "Malay,English,Spanish,Chinese": 0.22, "Malay,English,Indonesian,Filipino": 0.22666666666666666, "Malay,English,Indonesian,Chinese": 0.21333333333333335, "Malay,English,Filipino,Chinese": 0.22666666666666666, "Malay,Vietnamese,Spanish,Indonesian": 0.22666666666666666, "Malay,Vietnamese,Spanish,Filipino": 0.17333333333333334, "Malay,Vietnamese,Spanish,Chinese": 0.21333333333333335, "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, "Malay,Vietnamese,Indonesian,Chinese": 0.24666666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.22, "Malay,Spanish,Indonesian,Filipino": 0.20666666666666667, "Malay,Spanish,Indonesian,Chinese": 0.24666666666666667, "Malay,Spanish,Filipino,Chinese": 0.22, "Malay,Indonesian,Filipino,Chinese": 0.25333333333333335, "English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, "English,Vietnamese,Spanish,Chinese": 0.18666666666666668, "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, "English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, "English,Vietnamese,Filipino,Chinese": 0.19333333333333333, "English,Spanish,Indonesian,Filipino": 0.2, "English,Spanish,Indonesian,Chinese": 0.18666666666666668, "English,Spanish,Filipino,Chinese": 0.19333333333333333, "English,Indonesian,Filipino,Chinese": 0.18, "Vietnamese,Spanish,Indonesian,Filipino": 0.2, "Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, "Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, "Vietnamese,Indonesian,Filipino,Chinese": 0.2, "Spanish,Indonesian,Filipino,Chinese": 0.2 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.16, "Malay,English,Vietnamese,Spanish,Filipino": 0.13333333333333333, "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, "Malay,English,Vietnamese,Indonesian,Filipino": 0.18, "Malay,English,Vietnamese,Indonesian,Chinese": 0.18, "Malay,English,Vietnamese,Filipino,Chinese": 0.16, "Malay,English,Spanish,Indonesian,Filipino": 0.15333333333333332, "Malay,English,Spanish,Indonesian,Chinese": 0.16666666666666666, "Malay,English,Spanish,Filipino,Chinese": 0.16, "Malay,English,Indonesian,Filipino,Chinese": 0.16666666666666666, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.18, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, "English,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, "English,Spanish,Indonesian,Filipino,Chinese": 0.14, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 } }, "AC3_2": 0.4219397992817127, "AC3_3": 0.33035987526964034, "AC3_4": 0.26852512151021857, "AC3_5": 0.22329203535673334, "AC3_6": 0.1878490565665582, "AC3_7": 0.15833333330034724 }, "prompt_5": { "overall_acc": 0.399047619047619, "language_acc": { "Malay": 0.3933333333333333, "English": 0.4866666666666667, "Vietnamese": 0.48, "Spanish": 0.4, "Indonesian": 0.32, "Filipino": 0.36666666666666664, "Chinese": 0.3466666666666667 }, "consistency_score_2": 0.47777777777777775, "consistency_score_3": 0.30571428571428577, "consistency_score_4": 0.22361904761904758, "consistency_score_5": 0.1733333333333333, "consistency_score_6": 0.13904761904761906, "consistency_score_7": 0.11333333333333333, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.4533333333333333, "Malay,Vietnamese": 0.5133333333333333, "Malay,Spanish": 0.46, "Malay,Indonesian": 0.56, "Malay,Filipino": 0.52, "Malay,Chinese": 0.5, "English,Vietnamese": 0.52, "English,Spanish": 0.5066666666666667, "English,Indonesian": 0.44666666666666666, "English,Filipino": 0.5, "English,Chinese": 0.4266666666666667, "Vietnamese,Spanish": 0.4866666666666667, "Vietnamese,Indonesian": 0.52, "Vietnamese,Filipino": 0.44666666666666666, "Vietnamese,Chinese": 0.4666666666666667, "Spanish,Indonesian": 0.44, "Spanish,Filipino": 0.44, "Spanish,Chinese": 0.4, "Indonesian,Filipino": 0.46, "Indonesian,Chinese": 0.52, "Filipino,Chinese": 0.44666666666666666 }, "3_combine": { "Malay,English,Vietnamese": 0.32, "Malay,English,Spanish": 0.2866666666666667, "Malay,English,Indonesian": 0.31333333333333335, "Malay,English,Filipino": 0.30666666666666664, "Malay,English,Chinese": 0.29333333333333333, "Malay,Vietnamese,Spanish": 0.32, "Malay,Vietnamese,Indonesian": 0.36666666666666664, "Malay,Vietnamese,Filipino": 0.30666666666666664, "Malay,Vietnamese,Chinese": 0.3333333333333333, "Malay,Spanish,Indonesian": 0.34, "Malay,Spanish,Filipino": 0.2866666666666667, "Malay,Spanish,Chinese": 0.3, "Malay,Indonesian,Filipino": 0.36, "Malay,Indonesian,Chinese": 0.38666666666666666, "Malay,Filipino,Chinese": 0.31333333333333335, "English,Vietnamese,Spanish": 0.32666666666666666, "English,Vietnamese,Indonesian": 0.31333333333333335, "English,Vietnamese,Filipino": 0.30666666666666664, "English,Vietnamese,Chinese": 0.29333333333333333, "English,Spanish,Indonesian": 0.28, "English,Spanish,Filipino": 0.29333333333333333, "English,Spanish,Chinese": 0.24666666666666667, "English,Indonesian,Filipino": 0.29333333333333333, "English,Indonesian,Chinese": 0.29333333333333333, "English,Filipino,Chinese": 0.28, "Vietnamese,Spanish,Indonesian": 0.31333333333333335, "Vietnamese,Spanish,Filipino": 0.2733333333333333, "Vietnamese,Spanish,Chinese": 0.26666666666666666, "Vietnamese,Indonesian,Filipino": 0.31333333333333335, "Vietnamese,Indonesian,Chinese": 0.3466666666666667, "Vietnamese,Filipino,Chinese": 0.28, "Spanish,Indonesian,Filipino": 0.26666666666666666, "Spanish,Indonesian,Chinese": 0.30666666666666664, "Spanish,Filipino,Chinese": 0.26666666666666666, "Indonesian,Filipino,Chinese": 0.30666666666666664 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.24, "Malay,English,Vietnamese,Indonesian": 0.25333333333333335, "Malay,English,Vietnamese,Filipino": 0.21333333333333335, "Malay,English,Vietnamese,Chinese": 0.23333333333333334, "Malay,English,Spanish,Indonesian": 0.23333333333333334, "Malay,English,Spanish,Filipino": 0.20666666666666667, "Malay,English,Spanish,Chinese": 0.18666666666666668, "Malay,English,Indonesian,Filipino": 0.24, "Malay,English,Indonesian,Chinese": 0.24, "Malay,English,Filipino,Chinese": 0.21333333333333335, "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, "Malay,Vietnamese,Spanish,Chinese": 0.21333333333333335, "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, "Malay,Vietnamese,Indonesian,Chinese": 0.2866666666666667, "Malay,Vietnamese,Filipino,Chinese": 0.22, "Malay,Spanish,Indonesian,Filipino": 0.24, "Malay,Spanish,Indonesian,Chinese": 0.26666666666666666, "Malay,Spanish,Filipino,Chinese": 0.21333333333333335, "Malay,Indonesian,Filipino,Chinese": 0.26, "English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, "English,Vietnamese,Spanish,Filipino": 0.22, "English,Vietnamese,Spanish,Chinese": 0.2, "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, "English,Vietnamese,Indonesian,Chinese": 0.22666666666666666, "English,Vietnamese,Filipino,Chinese": 0.20666666666666667, "English,Spanish,Indonesian,Filipino": 0.2, "English,Spanish,Indonesian,Chinese": 0.19333333333333333, "English,Spanish,Filipino,Chinese": 0.17333333333333334, "English,Indonesian,Filipino,Chinese": 0.2, "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, "Vietnamese,Spanish,Indonesian,Chinese": 0.22, "Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, "Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, "Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.2, "Malay,English,Vietnamese,Spanish,Filipino": 0.16666666666666666, "Malay,English,Vietnamese,Spanish,Chinese": 0.16, "Malay,English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, "Malay,English,Vietnamese,Indonesian,Chinese": 0.2, "Malay,English,Vietnamese,Filipino,Chinese": 0.16666666666666666, "Malay,English,Spanish,Indonesian,Filipino": 0.18, "Malay,English,Spanish,Indonesian,Chinese": 0.17333333333333334, "Malay,English,Spanish,Filipino,Chinese": 0.14, "Malay,English,Indonesian,Filipino,Chinese": 0.18, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.16, "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, "English,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 } }, "AC3_2": 0.43487810760186335, "AC3_3": 0.3462007721516491, "AC3_4": 0.2866209266972571, "AC3_5": 0.24168607871540143, "AC3_6": 0.20623345971725707, "AC3_7": 0.17653035932118522 } }, "cross_logiqa": { "prompt_1": { "overall_acc": 0.36525974025974023, "language_acc": { "English": 0.42613636363636365, "Vietnamese": 0.32386363636363635, "Chinese": 0.36363636363636365, "Indonesian": 0.35795454545454547, "Filipino": 0.3125, "Spanish": 0.4318181818181818, "Malay": 0.3409090909090909 }, "consistency_score_2": 0.5457251082251082, "consistency_score_3": 0.37094155844155835, "consistency_score_4": 0.2797077922077921, "consistency_score_5": 0.22321428571428573, "consistency_score_6": 0.18344155844155846, "consistency_score_7": 0.1534090909090909, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.4715909090909091, "English,Chinese": 0.6022727272727273, "English,Indonesian": 0.5340909090909091, "English,Filipino": 0.4375, "English,Spanish": 0.6534090909090909, "English,Malay": 0.4943181818181818, "Vietnamese,Chinese": 0.4715909090909091, "Vietnamese,Indonesian": 0.6136363636363636, "Vietnamese,Filipino": 0.4715909090909091, "Vietnamese,Spanish": 0.48295454545454547, "Vietnamese,Malay": 0.5340909090909091, "Chinese,Indonesian": 0.5909090909090909, "Chinese,Filipino": 0.5113636363636364, "Chinese,Spanish": 0.5681818181818182, "Chinese,Malay": 0.5795454545454546, "Indonesian,Filipino": 0.5397727272727273, "Indonesian,Spanish": 0.6079545454545454, "Indonesian,Malay": 0.6647727272727273, "Filipino,Spanish": 0.5170454545454546, "Filipino,Malay": 0.5625, "Spanish,Malay": 0.5511363636363636 }, "3_combine": { "English,Vietnamese,Chinese": 0.32954545454545453, "English,Vietnamese,Indonesian": 0.3693181818181818, "English,Vietnamese,Filipino": 0.26704545454545453, "English,Vietnamese,Spanish": 0.3465909090909091, "English,Vietnamese,Malay": 0.3181818181818182, "English,Chinese,Indonesian": 0.3977272727272727, "English,Chinese,Filipino": 0.3409090909090909, "English,Chinese,Spanish": 0.4431818181818182, "English,Chinese,Malay": 0.38636363636363635, "English,Indonesian,Filipino": 0.3125, "English,Indonesian,Spanish": 0.4318181818181818, "English,Indonesian,Malay": 0.3806818181818182, "English,Filipino,Spanish": 0.3465909090909091, "English,Filipino,Malay": 0.3125, "English,Spanish,Malay": 0.38636363636363635, "Vietnamese,Chinese,Indonesian": 0.39204545454545453, "Vietnamese,Chinese,Filipino": 0.3068181818181818, "Vietnamese,Chinese,Spanish": 0.32954545454545453, "Vietnamese,Chinese,Malay": 0.35795454545454547, "Vietnamese,Indonesian,Filipino": 0.35795454545454547, "Vietnamese,Indonesian,Spanish": 0.4034090909090909, "Vietnamese,Indonesian,Malay": 0.45454545454545453, "Vietnamese,Filipino,Spanish": 0.30113636363636365, "Vietnamese,Filipino,Malay": 0.3352272727272727, "Vietnamese,Spanish,Malay": 0.3465909090909091, "Chinese,Indonesian,Filipino": 0.3693181818181818, "Chinese,Indonesian,Spanish": 0.4375, "Chinese,Indonesian,Malay": 0.45454545454545453, "Chinese,Filipino,Spanish": 0.36363636363636365, "Chinese,Filipino,Malay": 0.375, "Chinese,Spanish,Malay": 0.4090909090909091, "Indonesian,Filipino,Spanish": 0.3806818181818182, "Indonesian,Filipino,Malay": 0.42045454545454547, "Indonesian,Spanish,Malay": 0.4602272727272727, "Filipino,Spanish,Malay": 0.35795454545454547 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.2897727272727273, "English,Vietnamese,Chinese,Filipino": 0.22727272727272727, "English,Vietnamese,Chinese,Spanish": 0.26704545454545453, "English,Vietnamese,Chinese,Malay": 0.2556818181818182, "English,Vietnamese,Indonesian,Filipino": 0.24431818181818182, "English,Vietnamese,Indonesian,Spanish": 0.29545454545454547, "English,Vietnamese,Indonesian,Malay": 0.29545454545454547, "English,Vietnamese,Filipino,Spanish": 0.2215909090909091, "English,Vietnamese,Filipino,Malay": 0.2159090909090909, "English,Vietnamese,Spanish,Malay": 0.24431818181818182, "English,Chinese,Indonesian,Filipino": 0.26136363636363635, "English,Chinese,Indonesian,Spanish": 0.3465909090909091, "English,Chinese,Indonesian,Malay": 0.3181818181818182, "English,Chinese,Filipino,Spanish": 0.2784090909090909, "English,Chinese,Filipino,Malay": 0.26136363636363635, "English,Chinese,Spanish,Malay": 0.3125, "English,Indonesian,Filipino,Spanish": 0.2727272727272727, "English,Indonesian,Filipino,Malay": 0.26136363636363635, "English,Indonesian,Spanish,Malay": 0.3181818181818182, "English,Filipino,Spanish,Malay": 0.2556818181818182, "Vietnamese,Chinese,Indonesian,Filipino": 0.26136363636363635, "Vietnamese,Chinese,Indonesian,Spanish": 0.3068181818181818, "Vietnamese,Chinese,Indonesian,Malay": 0.32954545454545453, "Vietnamese,Chinese,Filipino,Spanish": 0.23295454545454544, "Vietnamese,Chinese,Filipino,Malay": 0.24431818181818182, "Vietnamese,Chinese,Spanish,Malay": 0.2727272727272727, "Vietnamese,Indonesian,Filipino,Spanish": 0.2727272727272727, "Vietnamese,Indonesian,Filipino,Malay": 0.29545454545454547, "Vietnamese,Indonesian,Spanish,Malay": 0.32954545454545453, "Vietnamese,Filipino,Spanish,Malay": 0.23863636363636365, "Chinese,Indonesian,Filipino,Spanish": 0.29545454545454547, "Chinese,Indonesian,Filipino,Malay": 0.3125, "Chinese,Indonesian,Spanish,Malay": 0.36363636363636365, "Chinese,Filipino,Spanish,Malay": 0.2897727272727273, "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.20454545454545456, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.25, "English,Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, "English,Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, "English,Vietnamese,Chinese,Filipino,Malay": 0.18181818181818182, "English,Vietnamese,Chinese,Spanish,Malay": 0.21022727272727273, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, "English,Vietnamese,Indonesian,Filipino,Malay": 0.21022727272727273, "English,Vietnamese,Indonesian,Spanish,Malay": 0.23863636363636365, "English,Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, "English,Chinese,Indonesian,Filipino,Spanish": 0.23295454545454544, "English,Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, "English,Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, "English,Chinese,Filipino,Spanish,Malay": 0.2215909090909091, "English,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26704545454545453, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.19318181818181818, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.18181818181818182, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1875 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 } }, "AC3_2": 0.43761740185507836, "AC3_3": 0.3680787238721668, "AC3_4": 0.3168097318751346, "AC3_5": 0.2770935960120265, "AC3_6": 0.2442269268747246, "AC3_7": 0.21606914208382175 }, "prompt_2": { "overall_acc": 0.36525974025974023, "language_acc": { "English": 0.45454545454545453, "Vietnamese": 0.32954545454545453, "Chinese": 0.3465909090909091, "Indonesian": 0.3522727272727273, "Filipino": 0.3181818181818182, "Spanish": 0.4090909090909091, "Malay": 0.3465909090909091 }, "consistency_score_2": 0.5522186147186148, "consistency_score_3": 0.37824675324675316, "consistency_score_4": 0.28587662337662345, "consistency_score_5": 0.22916666666666663, "consistency_score_6": 0.19155844155844154, "consistency_score_7": 0.16477272727272727, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.4715909090909091, "English,Chinese": 0.5795454545454546, "English,Indonesian": 0.4772727272727273, "English,Filipino": 0.4375, "English,Spanish": 0.6306818181818182, "English,Malay": 0.5284090909090909, "Vietnamese,Chinese": 0.5056818181818182, "Vietnamese,Indonesian": 0.5965909090909091, "Vietnamese,Filipino": 0.5113636363636364, "Vietnamese,Spanish": 0.5965909090909091, "Vietnamese,Malay": 0.5454545454545454, "Chinese,Indonesian": 0.5284090909090909, "Chinese,Filipino": 0.4772727272727273, "Chinese,Spanish": 0.5681818181818182, "Chinese,Malay": 0.5454545454545454, "Indonesian,Filipino": 0.5965909090909091, "Indonesian,Spanish": 0.6420454545454546, "Indonesian,Malay": 0.6647727272727273, "Filipino,Spanish": 0.5227272727272727, "Filipino,Malay": 0.5795454545454546, "Spanish,Malay": 0.5909090909090909 }, "3_combine": { "English,Vietnamese,Chinese": 0.36363636363636365, "English,Vietnamese,Indonesian": 0.3352272727272727, "English,Vietnamese,Filipino": 0.26704545454545453, "English,Vietnamese,Spanish": 0.39204545454545453, "English,Vietnamese,Malay": 0.3181818181818182, "English,Chinese,Indonesian": 0.3409090909090909, "English,Chinese,Filipino": 0.32386363636363635, "English,Chinese,Spanish": 0.4375, "English,Chinese,Malay": 0.3806818181818182, "English,Indonesian,Filipino": 0.3181818181818182, "English,Indonesian,Spanish": 0.4147727272727273, "English,Indonesian,Malay": 0.3806818181818182, "English,Filipino,Spanish": 0.3352272727272727, "English,Filipino,Malay": 0.32954545454545453, "English,Spanish,Malay": 0.4034090909090909, "Vietnamese,Chinese,Indonesian": 0.3806818181818182, "Vietnamese,Chinese,Filipino": 0.3068181818181818, "Vietnamese,Chinese,Spanish": 0.39204545454545453, "Vietnamese,Chinese,Malay": 0.35795454545454547, "Vietnamese,Indonesian,Filipino": 0.39204545454545453, "Vietnamese,Indonesian,Spanish": 0.44886363636363635, "Vietnamese,Indonesian,Malay": 0.4375, "Vietnamese,Filipino,Spanish": 0.36363636363636365, "Vietnamese,Filipino,Malay": 0.3522727272727273, "Vietnamese,Spanish,Malay": 0.38636363636363635, "Chinese,Indonesian,Filipino": 0.3693181818181818, "Chinese,Indonesian,Spanish": 0.4147727272727273, "Chinese,Indonesian,Malay": 0.4147727272727273, "Chinese,Filipino,Spanish": 0.3465909090909091, "Chinese,Filipino,Malay": 0.375, "Chinese,Spanish,Malay": 0.4034090909090909, "Indonesian,Filipino,Spanish": 0.42613636363636365, "Indonesian,Filipino,Malay": 0.4602272727272727, "Indonesian,Spanish,Malay": 0.48295454545454547, "Filipino,Spanish,Malay": 0.38636363636363635 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.2727272727272727, "English,Vietnamese,Chinese,Filipino": 0.23295454545454544, "English,Vietnamese,Chinese,Spanish": 0.3181818181818182, "English,Vietnamese,Chinese,Malay": 0.26136363636363635, "English,Vietnamese,Indonesian,Filipino": 0.23295454545454544, "English,Vietnamese,Indonesian,Spanish": 0.29545454545454547, "English,Vietnamese,Indonesian,Malay": 0.2727272727272727, "English,Vietnamese,Filipino,Spanish": 0.23295454545454544, "English,Vietnamese,Filipino,Malay": 0.21022727272727273, "English,Vietnamese,Spanish,Malay": 0.2727272727272727, "English,Chinese,Indonesian,Filipino": 0.25, "English,Chinese,Indonesian,Spanish": 0.3125, "English,Chinese,Indonesian,Malay": 0.2840909090909091, "English,Chinese,Filipino,Spanish": 0.2727272727272727, "English,Chinese,Filipino,Malay": 0.26136363636363635, "English,Chinese,Spanish,Malay": 0.3181818181818182, "English,Indonesian,Filipino,Spanish": 0.2784090909090909, "English,Indonesian,Filipino,Malay": 0.2784090909090909, "English,Indonesian,Spanish,Malay": 0.32386363636363635, "English,Filipino,Spanish,Malay": 0.26136363636363635, "Vietnamese,Chinese,Indonesian,Filipino": 0.2727272727272727, "Vietnamese,Chinese,Indonesian,Spanish": 0.32386363636363635, "Vietnamese,Chinese,Indonesian,Malay": 0.3125, "Vietnamese,Chinese,Filipino,Spanish": 0.26136363636363635, "Vietnamese,Chinese,Filipino,Malay": 0.2556818181818182, "Vietnamese,Chinese,Spanish,Malay": 0.2897727272727273, "Vietnamese,Indonesian,Filipino,Spanish": 0.3181818181818182, "Vietnamese,Indonesian,Filipino,Malay": 0.3181818181818182, "Vietnamese,Indonesian,Spanish,Malay": 0.3409090909090909, "Vietnamese,Filipino,Spanish,Malay": 0.26704545454545453, "Chinese,Indonesian,Filipino,Spanish": 0.29545454545454547, "Chinese,Indonesian,Filipino,Malay": 0.32386363636363635, "Chinese,Indonesian,Spanish,Malay": 0.3409090909090909, "Chinese,Filipino,Spanish,Malay": 0.2840909090909091, "Indonesian,Filipino,Spanish,Malay": 0.35795454545454547 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.20454545454545456, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.25, "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, "English,Vietnamese,Chinese,Filipino,Spanish": 0.21022727272727273, "English,Vietnamese,Chinese,Filipino,Malay": 0.1875, "English,Vietnamese,Chinese,Spanish,Malay": 0.23295454545454544, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, "English,Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, "English,Vietnamese,Indonesian,Spanish,Malay": 0.23295454545454544, "English,Vietnamese,Filipino,Spanish,Malay": 0.1875, "English,Chinese,Indonesian,Filipino,Spanish": 0.22727272727272727, "English,Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, "English,Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, "English,Chinese,Filipino,Spanish,Malay": 0.2215909090909091, "English,Indonesian,Filipino,Spanish,Malay": 0.24431818181818182, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.23863636363636365, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.24431818181818182, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2159090909090909, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.26136363636363635, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.26704545454545453 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1875, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 } }, "AC3_2": 0.4396904333759216, "AC3_3": 0.3716398230090522, "AC3_4": 0.3207291959604179, "AC3_5": 0.2816340463795666, "AC3_6": 0.2513157396087978, "AC3_7": 0.22709870523350076 }, "prompt_3": { "overall_acc": 0.3514610389610389, "language_acc": { "English": 0.39204545454545453, "Vietnamese": 0.32386363636363635, "Chinese": 0.3693181818181818, "Indonesian": 0.36363636363636365, "Filipino": 0.26136363636363635, "Spanish": 0.3977272727272727, "Malay": 0.3522727272727273 }, "consistency_score_2": 0.49675324675324684, "consistency_score_3": 0.3131493506493507, "consistency_score_4": 0.2183441558441558, "consistency_score_5": 0.16071428571428573, "consistency_score_6": 0.12256493506493506, "consistency_score_7": 0.09659090909090909, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.42045454545454547, "English,Chinese": 0.5568181818181818, "English,Indonesian": 0.4602272727272727, "English,Filipino": 0.375, "English,Spanish": 0.5511363636363636, "English,Malay": 0.4318181818181818, "Vietnamese,Chinese": 0.4659090909090909, "Vietnamese,Indonesian": 0.5284090909090909, "Vietnamese,Filipino": 0.5, "Vietnamese,Spanish": 0.4943181818181818, "Vietnamese,Malay": 0.5170454545454546, "Chinese,Indonesian": 0.5284090909090909, "Chinese,Filipino": 0.44886363636363635, "Chinese,Spanish": 0.48295454545454547, "Chinese,Malay": 0.5340909090909091, "Indonesian,Filipino": 0.5113636363636364, "Indonesian,Spanish": 0.5397727272727273, "Indonesian,Malay": 0.6477272727272727, "Filipino,Spanish": 0.45454545454545453, "Filipino,Malay": 0.5113636363636364, "Spanish,Malay": 0.4715909090909091 }, "3_combine": { "English,Vietnamese,Chinese": 0.3125, "English,Vietnamese,Indonesian": 0.2784090909090909, "English,Vietnamese,Filipino": 0.23295454545454544, "English,Vietnamese,Spanish": 0.3181818181818182, "English,Vietnamese,Malay": 0.2556818181818182, "English,Chinese,Indonesian": 0.32386363636363635, "English,Chinese,Filipino": 0.2727272727272727, "English,Chinese,Spanish": 0.35795454545454547, "English,Chinese,Malay": 0.3125, "English,Indonesian,Filipino": 0.24431818181818182, "English,Indonesian,Spanish": 0.32954545454545453, "English,Indonesian,Malay": 0.3125, "English,Filipino,Spanish": 0.26704545454545453, "English,Filipino,Malay": 0.24431818181818182, "English,Spanish,Malay": 0.29545454545454547, "Vietnamese,Chinese,Indonesian": 0.3352272727272727, "Vietnamese,Chinese,Filipino": 0.2840909090909091, "Vietnamese,Chinese,Spanish": 0.3125, "Vietnamese,Chinese,Malay": 0.32386363636363635, "Vietnamese,Indonesian,Filipino": 0.32386363636363635, "Vietnamese,Indonesian,Spanish": 0.3465909090909091, "Vietnamese,Indonesian,Malay": 0.3977272727272727, "Vietnamese,Filipino,Spanish": 0.30113636363636365, "Vietnamese,Filipino,Malay": 0.3068181818181818, "Vietnamese,Spanish,Malay": 0.3125, "Chinese,Indonesian,Filipino": 0.3181818181818182, "Chinese,Indonesian,Spanish": 0.3352272727272727, "Chinese,Indonesian,Malay": 0.4034090909090909, "Chinese,Filipino,Spanish": 0.30113636363636365, "Chinese,Filipino,Malay": 0.3068181818181818, "Chinese,Spanish,Malay": 0.3125, "Indonesian,Filipino,Spanish": 0.32386363636363635, "Indonesian,Filipino,Malay": 0.3806818181818182, "Indonesian,Spanish,Malay": 0.375, "Filipino,Spanish,Malay": 0.30113636363636365 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.23295454545454544, "English,Vietnamese,Chinese,Filipino": 0.1875, "English,Vietnamese,Chinese,Spanish": 0.25, "English,Vietnamese,Chinese,Malay": 0.19886363636363635, "English,Vietnamese,Indonesian,Filipino": 0.16477272727272727, "English,Vietnamese,Indonesian,Spanish": 0.2215909090909091, "English,Vietnamese,Indonesian,Malay": 0.20454545454545456, "English,Vietnamese,Filipino,Spanish": 0.19318181818181818, "English,Vietnamese,Filipino,Malay": 0.1534090909090909, "English,Vietnamese,Spanish,Malay": 0.19318181818181818, "English,Chinese,Indonesian,Filipino": 0.19318181818181818, "English,Chinese,Indonesian,Spanish": 0.24431818181818182, "English,Chinese,Indonesian,Malay": 0.23863636363636365, "English,Chinese,Filipino,Spanish": 0.22727272727272727, "English,Chinese,Filipino,Malay": 0.18181818181818182, "English,Chinese,Spanish,Malay": 0.23295454545454544, "English,Indonesian,Filipino,Spanish": 0.19886363636363635, "English,Indonesian,Filipino,Malay": 0.18181818181818182, "English,Indonesian,Spanish,Malay": 0.2215909090909091, "English,Filipino,Spanish,Malay": 0.1875, "Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, "Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, "Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, "Vietnamese,Chinese,Filipino,Spanish": 0.2215909090909091, "Vietnamese,Chinese,Filipino,Malay": 0.19318181818181818, "Vietnamese,Chinese,Spanish,Malay": 0.21022727272727273, "Vietnamese,Indonesian,Filipino,Spanish": 0.24431818181818182, "Vietnamese,Indonesian,Filipino,Malay": 0.25, "Vietnamese,Indonesian,Spanish,Malay": 0.2727272727272727, "Vietnamese,Filipino,Spanish,Malay": 0.2159090909090909, "Chinese,Indonesian,Filipino,Spanish": 0.2215909090909091, "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, "Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, "Chinese,Filipino,Spanish,Malay": 0.2159090909090909, "Indonesian,Filipino,Spanish,Malay": 0.25 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, "English,Vietnamese,Chinese,Filipino,Spanish": 0.17045454545454544, "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, "English,Vietnamese,Chinese,Spanish,Malay": 0.1590909090909091, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, "English,Vietnamese,Indonesian,Spanish,Malay": 0.16477272727272727, "English,Vietnamese,Filipino,Spanish,Malay": 0.13068181818181818, "English,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, "English,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, "English,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, "English,Chinese,Filipino,Spanish,Malay": 0.1590909090909091, "English,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1875, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 } }, "AC3_2": 0.41166345611253086, "AC3_3": 0.3312009497087612, "AC3_4": 0.2693533318060607, "AC3_5": 0.2205682589562558, "AC3_6": 0.18174868791753823, "AC3_7": 0.15153573777909068 }, "prompt_4": { "overall_acc": 0.35714285714285715, "language_acc": { "English": 0.4318181818181818, "Vietnamese": 0.3465909090909091, "Chinese": 0.32954545454545453, "Indonesian": 0.35795454545454547, "Filipino": 0.29545454545454547, "Spanish": 0.375, "Malay": 0.36363636363636365 }, "consistency_score_2": 0.5267857142857143, "consistency_score_3": 0.3444805194805194, "consistency_score_4": 0.24918831168831163, "consistency_score_5": 0.19182900432900432, "consistency_score_6": 0.1534090909090909, "consistency_score_7": 0.125, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.4318181818181818, "English,Chinese": 0.5568181818181818, "English,Indonesian": 0.5056818181818182, "English,Filipino": 0.42613636363636365, "English,Spanish": 0.5852272727272727, "English,Malay": 0.44886363636363635, "Vietnamese,Chinese": 0.48863636363636365, "Vietnamese,Indonesian": 0.5965909090909091, "Vietnamese,Filipino": 0.44886363636363635, "Vietnamese,Spanish": 0.4943181818181818, "Vietnamese,Malay": 0.5454545454545454, "Chinese,Indonesian": 0.5511363636363636, "Chinese,Filipino": 0.4602272727272727, "Chinese,Spanish": 0.48863636363636365, "Chinese,Malay": 0.5340909090909091, "Indonesian,Filipino": 0.5511363636363636, "Indonesian,Spanish": 0.6193181818181818, "Indonesian,Malay": 0.6818181818181818, "Filipino,Spanish": 0.48295454545454547, "Filipino,Malay": 0.5738636363636364, "Spanish,Malay": 0.5909090909090909 }, "3_combine": { "English,Vietnamese,Chinese": 0.3068181818181818, "English,Vietnamese,Indonesian": 0.3352272727272727, "English,Vietnamese,Filipino": 0.23295454545454544, "English,Vietnamese,Spanish": 0.3181818181818182, "English,Vietnamese,Malay": 0.2784090909090909, "English,Chinese,Indonesian": 0.3693181818181818, "English,Chinese,Filipino": 0.3068181818181818, "English,Chinese,Spanish": 0.36363636363636365, "English,Chinese,Malay": 0.3409090909090909, "English,Indonesian,Filipino": 0.29545454545454547, "English,Indonesian,Spanish": 0.39204545454545453, "English,Indonesian,Malay": 0.375, "English,Filipino,Spanish": 0.2840909090909091, "English,Filipino,Malay": 0.2840909090909091, "English,Spanish,Malay": 0.3465909090909091, "Vietnamese,Chinese,Indonesian": 0.3693181818181818, "Vietnamese,Chinese,Filipino": 0.2556818181818182, "Vietnamese,Chinese,Spanish": 0.30113636363636365, "Vietnamese,Chinese,Malay": 0.3409090909090909, "Vietnamese,Indonesian,Filipino": 0.3522727272727273, "Vietnamese,Indonesian,Spanish": 0.3977272727272727, "Vietnamese,Indonesian,Malay": 0.44886363636363635, "Vietnamese,Filipino,Spanish": 0.2897727272727273, "Vietnamese,Filipino,Malay": 0.3352272727272727, "Vietnamese,Spanish,Malay": 0.36363636363636365, "Chinese,Indonesian,Filipino": 0.3409090909090909, "Chinese,Indonesian,Spanish": 0.375, "Chinese,Indonesian,Malay": 0.4318181818181818, "Chinese,Filipino,Spanish": 0.2840909090909091, "Chinese,Filipino,Malay": 0.3465909090909091, "Chinese,Spanish,Malay": 0.35795454545454547, "Indonesian,Filipino,Spanish": 0.36363636363636365, "Indonesian,Filipino,Malay": 0.4318181818181818, "Indonesian,Spanish,Malay": 0.4772727272727273, "Filipino,Spanish,Malay": 0.36363636363636365 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.26136363636363635, "English,Vietnamese,Chinese,Filipino": 0.18181818181818182, "English,Vietnamese,Chinese,Spanish": 0.2215909090909091, "English,Vietnamese,Chinese,Malay": 0.21022727272727273, "English,Vietnamese,Indonesian,Filipino": 0.2159090909090909, "English,Vietnamese,Indonesian,Spanish": 0.26136363636363635, "English,Vietnamese,Indonesian,Malay": 0.26136363636363635, "English,Vietnamese,Filipino,Spanish": 0.1875, "English,Vietnamese,Filipino,Malay": 0.18181818181818182, "English,Vietnamese,Spanish,Malay": 0.23295454545454544, "English,Chinese,Indonesian,Filipino": 0.23295454545454544, "English,Chinese,Indonesian,Spanish": 0.2840909090909091, "English,Chinese,Indonesian,Malay": 0.2897727272727273, "English,Chinese,Filipino,Spanish": 0.21022727272727273, "English,Chinese,Filipino,Malay": 0.22727272727272727, "English,Chinese,Spanish,Malay": 0.26704545454545453, "English,Indonesian,Filipino,Spanish": 0.23863636363636365, "English,Indonesian,Filipino,Malay": 0.24431818181818182, "English,Indonesian,Spanish,Malay": 0.3125, "English,Filipino,Spanish,Malay": 0.21022727272727273, "Vietnamese,Chinese,Indonesian,Filipino": 0.23295454545454544, "Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, "Vietnamese,Chinese,Indonesian,Malay": 0.29545454545454547, "Vietnamese,Chinese,Filipino,Spanish": 0.1875, "Vietnamese,Chinese,Filipino,Malay": 0.2159090909090909, "Vietnamese,Chinese,Spanish,Malay": 0.24431818181818182, "Vietnamese,Indonesian,Filipino,Spanish": 0.26704545454545453, "Vietnamese,Indonesian,Filipino,Malay": 0.30113636363636365, "Vietnamese,Indonesian,Spanish,Malay": 0.3352272727272727, "Vietnamese,Filipino,Spanish,Malay": 0.24431818181818182, "Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, "Chinese,Indonesian,Filipino,Malay": 0.2897727272727273, "Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, "Chinese,Filipino,Spanish,Malay": 0.23863636363636365, "Indonesian,Filipino,Spanish,Malay": 0.29545454545454547 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.17045454545454544, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, "English,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, "English,Vietnamese,Chinese,Filipino,Spanish": 0.14204545454545456, "English,Vietnamese,Chinese,Filipino,Malay": 0.14204545454545456, "English,Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, "English,Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, "English,Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, "English,Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, "English,Chinese,Indonesian,Filipino,Spanish": 0.1875, "English,Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, "English,Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, "English,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, "English,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1875, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 } }, "AC3_2": 0.4256854256372674, "AC3_3": 0.35069742838923085, "AC3_4": 0.2935551730248303, "AC3_5": 0.24959515590750173, "AC3_6": 0.21462639105494122, "AC3_7": 0.1851851851467764 }, "prompt_5": { "overall_acc": 0.33928571428571425, "language_acc": { "English": 0.39204545454545453, "Vietnamese": 0.3352272727272727, "Chinese": 0.3409090909090909, "Indonesian": 0.3181818181818182, "Filipino": 0.26136363636363635, "Spanish": 0.38636363636363635, "Malay": 0.3409090909090909 }, "consistency_score_2": 0.5159632034632035, "consistency_score_3": 0.33279220779220786, "consistency_score_4": 0.24074675324675324, "consistency_score_5": 0.1877705627705628, "consistency_score_6": 0.15422077922077923, "consistency_score_7": 0.13068181818181818, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.4715909090909091, "English,Chinese": 0.48863636363636365, "English,Indonesian": 0.5227272727272727, "English,Filipino": 0.3977272727272727, "English,Spanish": 0.5909090909090909, "English,Malay": 0.4772727272727273, "Vietnamese,Chinese": 0.48863636363636365, "Vietnamese,Indonesian": 0.5340909090909091, "Vietnamese,Filipino": 0.4602272727272727, "Vietnamese,Spanish": 0.5397727272727273, "Vietnamese,Malay": 0.5511363636363636, "Chinese,Indonesian": 0.5113636363636364, "Chinese,Filipino": 0.48863636363636365, "Chinese,Spanish": 0.4602272727272727, "Chinese,Malay": 0.5454545454545454, "Indonesian,Filipino": 0.5170454545454546, "Indonesian,Spanish": 0.5397727272727273, "Indonesian,Malay": 0.6306818181818182, "Filipino,Spanish": 0.48863636363636365, "Filipino,Malay": 0.5738636363636364, "Spanish,Malay": 0.5568181818181818 }, "3_combine": { "English,Vietnamese,Chinese": 0.2784090909090909, "English,Vietnamese,Indonesian": 0.32386363636363635, "English,Vietnamese,Filipino": 0.23863636363636365, "English,Vietnamese,Spanish": 0.3522727272727273, "English,Vietnamese,Malay": 0.30113636363636365, "English,Chinese,Indonesian": 0.3068181818181818, "English,Chinese,Filipino": 0.2784090909090909, "English,Chinese,Spanish": 0.32386363636363635, "English,Chinese,Malay": 0.3181818181818182, "English,Indonesian,Filipino": 0.2840909090909091, "English,Indonesian,Spanish": 0.36363636363636365, "English,Indonesian,Malay": 0.35795454545454547, "English,Filipino,Spanish": 0.2897727272727273, "English,Filipino,Malay": 0.2840909090909091, "English,Spanish,Malay": 0.36363636363636365, "Vietnamese,Chinese,Indonesian": 0.3409090909090909, "Vietnamese,Chinese,Filipino": 0.2897727272727273, "Vietnamese,Chinese,Spanish": 0.3125, "Vietnamese,Chinese,Malay": 0.35795454545454547, "Vietnamese,Indonesian,Filipino": 0.3181818181818182, "Vietnamese,Indonesian,Spanish": 0.3693181818181818, "Vietnamese,Indonesian,Malay": 0.3977272727272727, "Vietnamese,Filipino,Spanish": 0.3181818181818182, "Vietnamese,Filipino,Malay": 0.3409090909090909, "Vietnamese,Spanish,Malay": 0.38636363636363635, "Chinese,Indonesian,Filipino": 0.32386363636363635, "Chinese,Indonesian,Spanish": 0.3068181818181818, "Chinese,Indonesian,Malay": 0.38636363636363635, "Chinese,Filipino,Spanish": 0.3068181818181818, "Chinese,Filipino,Malay": 0.3693181818181818, "Chinese,Spanish,Malay": 0.35795454545454547, "Indonesian,Filipino,Spanish": 0.3352272727272727, "Indonesian,Filipino,Malay": 0.4034090909090909, "Indonesian,Spanish,Malay": 0.4034090909090909, "Filipino,Spanish,Malay": 0.35795454545454547 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.2215909090909091, "English,Vietnamese,Chinese,Filipino": 0.1875, "English,Vietnamese,Chinese,Spanish": 0.22727272727272727, "English,Vietnamese,Chinese,Malay": 0.21022727272727273, "English,Vietnamese,Indonesian,Filipino": 0.19886363636363635, "English,Vietnamese,Indonesian,Spanish": 0.25, "English,Vietnamese,Indonesian,Malay": 0.24431818181818182, "English,Vietnamese,Filipino,Spanish": 0.20454545454545456, "English,Vietnamese,Filipino,Malay": 0.19886363636363635, "English,Vietnamese,Spanish,Malay": 0.2556818181818182, "English,Chinese,Indonesian,Filipino": 0.20454545454545456, "English,Chinese,Indonesian,Spanish": 0.2215909090909091, "English,Chinese,Indonesian,Malay": 0.23295454545454544, "English,Chinese,Filipino,Spanish": 0.21022727272727273, "English,Chinese,Filipino,Malay": 0.2159090909090909, "English,Chinese,Spanish,Malay": 0.2556818181818182, "English,Indonesian,Filipino,Spanish": 0.2215909090909091, "English,Indonesian,Filipino,Malay": 0.23863636363636365, "English,Indonesian,Spanish,Malay": 0.2784090909090909, "English,Filipino,Spanish,Malay": 0.22727272727272727, "Vietnamese,Chinese,Indonesian,Filipino": 0.22727272727272727, "Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, "Vietnamese,Chinese,Indonesian,Malay": 0.2784090909090909, "Vietnamese,Chinese,Filipino,Spanish": 0.2215909090909091, "Vietnamese,Chinese,Filipino,Malay": 0.24431818181818182, "Vietnamese,Chinese,Spanish,Malay": 0.26704545454545453, "Vietnamese,Indonesian,Filipino,Spanish": 0.26704545454545453, "Vietnamese,Indonesian,Filipino,Malay": 0.2727272727272727, "Vietnamese,Indonesian,Spanish,Malay": 0.3068181818181818, "Vietnamese,Filipino,Spanish,Malay": 0.2784090909090909, "Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, "Chinese,Indonesian,Filipino,Malay": 0.2784090909090909, "Chinese,Indonesian,Spanish,Malay": 0.26704545454545453, "Chinese,Filipino,Spanish,Malay": 0.2556818181818182, "Indonesian,Filipino,Spanish,Malay": 0.2784090909090909 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1590909090909091, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, "English,Vietnamese,Chinese,Indonesian,Malay": 0.17613636363636365, "English,Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, "English,Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, "English,Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, "English,Vietnamese,Indonesian,Spanish,Malay": 0.21022727272727273, "English,Vietnamese,Filipino,Spanish,Malay": 0.18181818181818182, "English,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, "English,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, "English,Chinese,Indonesian,Spanish,Malay": 0.19318181818181818, "English,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, "English,Indonesian,Filipino,Spanish,Malay": 0.1875, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.22727272727272727, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.16477272727272727, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 } }, "AC3_2": 0.40937542364264756, "AC3_3": 0.33600759139237873, "AC3_4": 0.2816460756781233, "AC3_5": 0.2417497799482653, "AC3_6": 0.21205357138560268, "AC3_7": 0.18868739201511867 } }, "sg_eval": { "prompt_1": { "accuracy": 0.4368932038834951 }, "prompt_2": { "accuracy": 0.4077669902912621 }, "prompt_3": { "accuracy": 0.42718446601941745 }, "prompt_4": { "accuracy": 0.4368932038834951 }, "prompt_5": { "accuracy": 0.4368932038834951 } }, "cn_eval": { "prompt_1": { "accuracy": 0.3047619047619048 }, "prompt_2": { "accuracy": 0.2571428571428571 }, "prompt_3": { "accuracy": 0.3333333333333333 }, "prompt_4": { "accuracy": 0.2571428571428571 }, "prompt_5": { "accuracy": 0.3047619047619048 } }, "us_eval": { "prompt_1": { "accuracy": 0.5420560747663551 }, "prompt_2": { "accuracy": 0.4485981308411215 }, "prompt_3": { "accuracy": 0.4766355140186916 }, "prompt_4": { "accuracy": 0.4205607476635514 }, "prompt_5": { "accuracy": 0.5327102803738317 } }, "ph_eval": { "prompt_1": { "accuracy": 0.35, "category_acc": { "brand": 0.3, "demographics": 0.0, "biology": 0.5, "history": 0.3333333333333333, "literature": 0.1, "politics": 0.4, "culture": 0.4, "film": 0.3, "law": 0.4, "geography": 0.6 } }, "prompt_2": { "accuracy": 0.28, "category_acc": { "brand": 0.2, "demographics": 0.2, "biology": 0.3, "history": 0.26666666666666666, "literature": 0.1, "politics": 0.6, "culture": 0.3, "film": 0.1, "law": 0.3, "geography": 0.4 } }, "prompt_3": { "accuracy": 0.32, "category_acc": { "brand": 0.1, "demographics": 0.0, "biology": 0.4, "history": 0.26666666666666666, "literature": 0.3, "politics": 0.5, "culture": 0.2, "film": 0.3, "law": 0.3, "geography": 0.7 } }, "prompt_4": { "accuracy": 0.35, "category_acc": { "brand": 0.2, "demographics": 0.0, "biology": 0.4, "history": 0.26666666666666666, "literature": 0.0, "politics": 0.5, "culture": 0.4, "film": 0.5, "law": 0.3, "geography": 0.8 } }, "prompt_5": { "accuracy": 0.37, "category_acc": { "brand": 0.3, "demographics": 0.2, "biology": 0.5, "history": 0.26666666666666666, "literature": 0.4, "politics": 0.7, "culture": 0.3, "film": 0.2, "law": 0.2, "geography": 0.6 } } }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": { "bleu_score": 0.19373624451910515 }, "prompt_3": { "bleu_score": 0.1964270980872178 }, "prompt_4": { "bleu_score": 0.17192589562871752 }, "prompt_5": { "bleu_score": 0.16258965500852568 } }, "flores_vie2eng": { "prompt_1": { "bleu_score": 0.07611358656798692 }, "prompt_2": { "bleu_score": 0.1505276369962426 }, "prompt_3": { "bleu_score": 0.1472413472807031 }, "prompt_4": { "bleu_score": 0.12943833789303555 }, "prompt_5": { "bleu_score": 0.13666592724690585 } }, "flores_zho2eng": { "prompt_1": { "bleu_score": 0.06335447509619128 }, "prompt_2": { "bleu_score": 0.11606202497694892 }, "prompt_3": { "bleu_score": 0.1132366541993831 }, "prompt_4": { "bleu_score": 0.09974517382489148 }, "prompt_5": { "bleu_score": 0.15550715308466337 } }, "flores_zsm2eng": { "prompt_1": { "bleu_score": 0.09680686879468663 }, "prompt_2": { "bleu_score": 0.1670411439704822 }, "prompt_3": { "bleu_score": 0.16381439862453073 }, "prompt_4": { "bleu_score": 0.148689576132622 }, "prompt_5": { "bleu_score": 0.14101463475586926 } }, "mmlu": { "prompt_1": { "accuracy": 0.44340723453908987 }, "prompt_2": { "accuracy": 0.44340723453908987 }, "prompt_3": { "accuracy": 0.4200700116686114 }, "prompt_4": { "accuracy": 0.4177362893815636 }, "prompt_5": { "accuracy": 0.4224037339556593 } }, "mmlu_full": { "prompt_1": { "accuracy": 0.4365391490883089, "category_acc": { "high_school_european_history": 0.5304878048780488, "business_ethics": 0.47474747474747475, "clinical_knowledge": 0.4772727272727273, "medical_genetics": 0.5050505050505051, "high_school_us_history": 0.5763546798029556, "high_school_physics": 0.30666666666666664, "high_school_world_history": 0.5805084745762712, "virology": 0.4, "high_school_microeconomics": 0.3924050632911392, "econometrics": 0.3008849557522124, "college_computer_science": 0.3333333333333333, "high_school_biology": 0.4854368932038835, "abstract_algebra": 0.31313131313131315, "professional_accounting": 0.33807829181494664, "philosophy": 0.5193548387096775, "professional_medicine": 0.39114391143911437, "nutrition": 0.4524590163934426, "global_facts": 0.43434343434343436, "machine_learning": 0.32432432432432434, "security_studies": 0.36065573770491804, "public_relations": 0.45871559633027525, "professional_psychology": 0.42716857610474634, "prehistory": 0.5139318885448917, "anatomy": 0.47761194029850745, "human_sexuality": 0.43846153846153846, "college_medicine": 0.4127906976744186, "high_school_government_and_politics": 0.5885416666666666, "college_chemistry": 0.2727272727272727, "logical_fallacies": 0.5, "high_school_geography": 0.5583756345177665, "elementary_mathematics": 0.33156498673740054, "human_aging": 0.5045045045045045, "college_mathematics": 0.2222222222222222, "high_school_psychology": 0.6047794117647058, "formal_logic": 0.336, "high_school_statistics": 0.2744186046511628, "international_law": 0.5416666666666666, "high_school_mathematics": 0.23048327137546468, "high_school_computer_science": 0.36363636363636365, "conceptual_physics": 0.39316239316239315, "miscellaneous": 0.6368286445012787, "high_school_chemistry": 0.3069306930693069, "marketing": 0.6223175965665236, "professional_law": 0.3561643835616438, "management": 0.5980392156862745, "college_physics": 0.25742574257425743, "jurisprudence": 0.4485981308411215, "world_religions": 0.6647058823529411, "sociology": 0.59, "us_foreign_policy": 0.6262626262626263, "high_school_macroeconomics": 0.37275064267352187, "computer_security": 0.5858585858585859, "moral_scenarios": 0.25279642058165547, "moral_disputes": 0.45217391304347826, "electrical_engineering": 0.4375, "astronomy": 0.5033112582781457, "college_biology": 0.5174825174825175 } }, "prompt_2": { "accuracy": 0.4371111905613157, "category_acc": { "high_school_european_history": 0.5426829268292683, "business_ethics": 0.46464646464646464, "clinical_knowledge": 0.45454545454545453, "medical_genetics": 0.47474747474747475, "high_school_us_history": 0.5615763546798029, "high_school_physics": 0.28, "high_school_world_history": 0.5889830508474576, "virology": 0.37575757575757573, "high_school_microeconomics": 0.4177215189873418, "econometrics": 0.34513274336283184, "college_computer_science": 0.36363636363636365, "high_school_biology": 0.46601941747572817, "abstract_algebra": 0.3333333333333333, "professional_accounting": 0.35587188612099646, "philosophy": 0.5032258064516129, "professional_medicine": 0.41697416974169743, "nutrition": 0.46557377049180326, "global_facts": 0.40404040404040403, "machine_learning": 0.3333333333333333, "security_studies": 0.4057377049180328, "public_relations": 0.5045871559633027, "professional_psychology": 0.42716857610474634, "prehistory": 0.5294117647058824, "anatomy": 0.44029850746268656, "human_sexuality": 0.4307692307692308, "college_medicine": 0.3953488372093023, "high_school_government_and_politics": 0.6458333333333334, "college_chemistry": 0.25252525252525254, "logical_fallacies": 0.5432098765432098, "high_school_geography": 0.5888324873096447, "elementary_mathematics": 0.3448275862068966, "human_aging": 0.4954954954954955, "college_mathematics": 0.2727272727272727, "high_school_psychology": 0.5735294117647058, "formal_logic": 0.296, "high_school_statistics": 0.28837209302325584, "international_law": 0.5583333333333333, "high_school_mathematics": 0.25650557620817843, "high_school_computer_science": 0.37373737373737376, "conceptual_physics": 0.34615384615384615, "miscellaneous": 0.6432225063938619, "high_school_chemistry": 0.32673267326732675, "marketing": 0.6437768240343348, "professional_law": 0.35290280495759946, "management": 0.5784313725490197, "college_physics": 0.19801980198019803, "jurisprudence": 0.4766355140186916, "world_religions": 0.6352941176470588, "sociology": 0.625, "us_foreign_policy": 0.6262626262626263, "high_school_macroeconomics": 0.40359897172236503, "computer_security": 0.494949494949495, "moral_scenarios": 0.22595078299776286, "moral_disputes": 0.48695652173913045, "electrical_engineering": 0.4236111111111111, "astronomy": 0.45695364238410596, "college_biology": 0.48951048951048953 } }, "prompt_3": { "accuracy": 0.42924562030747226, "category_acc": { "high_school_european_history": 0.5487804878048781, "business_ethics": 0.48484848484848486, "clinical_knowledge": 0.4772727272727273, "medical_genetics": 0.43434343434343436, "high_school_us_history": 0.5665024630541872, "high_school_physics": 0.24, "high_school_world_history": 0.5805084745762712, "virology": 0.44242424242424244, "high_school_microeconomics": 0.3881856540084388, "econometrics": 0.30973451327433627, "college_computer_science": 0.35353535353535354, "high_school_biology": 0.459546925566343, "abstract_algebra": 0.29292929292929293, "professional_accounting": 0.33451957295373663, "philosophy": 0.5225806451612903, "professional_medicine": 0.4059040590405904, "nutrition": 0.46885245901639344, "global_facts": 0.46464646464646464, "machine_learning": 0.32432432432432434, "security_studies": 0.3319672131147541, "public_relations": 0.48623853211009177, "professional_psychology": 0.42225859247135844, "prehistory": 0.4984520123839009, "anatomy": 0.44029850746268656, "human_sexuality": 0.38461538461538464, "college_medicine": 0.4011627906976744, "high_school_government_and_politics": 0.6197916666666666, "college_chemistry": 0.26262626262626265, "logical_fallacies": 0.5061728395061729, "high_school_geography": 0.5329949238578681, "elementary_mathematics": 0.3793103448275862, "human_aging": 0.527027027027027, "college_mathematics": 0.2222222222222222, "high_school_psychology": 0.5386029411764706, "formal_logic": 0.36, "high_school_statistics": 0.2744186046511628, "international_law": 0.5666666666666667, "high_school_mathematics": 0.24907063197026022, "high_school_computer_science": 0.35353535353535354, "conceptual_physics": 0.37606837606837606, "miscellaneous": 0.6304347826086957, "high_school_chemistry": 0.30198019801980197, "marketing": 0.6008583690987125, "professional_law": 0.35551206784083494, "management": 0.5392156862745098, "college_physics": 0.15841584158415842, "jurisprudence": 0.5046728971962616, "world_religions": 0.6647058823529411, "sociology": 0.575, "us_foreign_policy": 0.5656565656565656, "high_school_macroeconomics": 0.38046272493573263, "computer_security": 0.5151515151515151, "moral_scenarios": 0.24496644295302014, "moral_disputes": 0.4811594202898551, "electrical_engineering": 0.3819444444444444, "astronomy": 0.41721854304635764, "college_biology": 0.42657342657342656 } }, "prompt_4": { "accuracy": 0.4173042545584555, "category_acc": { "high_school_european_history": 0.5487804878048781, "business_ethics": 0.43434343434343436, "clinical_knowledge": 0.42424242424242425, "medical_genetics": 0.41414141414141414, "high_school_us_history": 0.5615763546798029, "high_school_physics": 0.26666666666666666, "high_school_world_history": 0.6059322033898306, "virology": 0.38181818181818183, "high_school_microeconomics": 0.38396624472573837, "econometrics": 0.30973451327433627, "college_computer_science": 0.3333333333333333, "high_school_biology": 0.45307443365695793, "abstract_algebra": 0.23232323232323232, "professional_accounting": 0.36298932384341637, "philosophy": 0.4645161290322581, "professional_medicine": 0.36531365313653136, "nutrition": 0.419672131147541, "global_facts": 0.45454545454545453, "machine_learning": 0.3333333333333333, "security_studies": 0.3401639344262295, "public_relations": 0.4954128440366973, "professional_psychology": 0.41407528641571195, "prehistory": 0.47368421052631576, "anatomy": 0.43283582089552236, "human_sexuality": 0.3769230769230769, "college_medicine": 0.3372093023255814, "high_school_government_and_politics": 0.5677083333333334, "college_chemistry": 0.24242424242424243, "logical_fallacies": 0.4691358024691358, "high_school_geography": 0.5380710659898477, "elementary_mathematics": 0.41114058355437666, "human_aging": 0.45495495495495497, "college_mathematics": 0.21212121212121213, "high_school_psychology": 0.5330882352941176, "formal_logic": 0.336, "high_school_statistics": 0.2744186046511628, "international_law": 0.5, "high_school_mathematics": 0.2527881040892193, "high_school_computer_science": 0.3939393939393939, "conceptual_physics": 0.36324786324786323, "miscellaneous": 0.6023017902813299, "high_school_chemistry": 0.2871287128712871, "marketing": 0.5579399141630901, "professional_law": 0.35551206784083494, "management": 0.5588235294117647, "college_physics": 0.3069306930693069, "jurisprudence": 0.4672897196261682, "world_religions": 0.6352941176470588, "sociology": 0.545, "us_foreign_policy": 0.5757575757575758, "high_school_macroeconomics": 0.37017994858611825, "computer_security": 0.5252525252525253, "moral_scenarios": 0.26733780760626397, "moral_disputes": 0.43768115942028984, "electrical_engineering": 0.3333333333333333, "astronomy": 0.3973509933774834, "college_biology": 0.4195804195804196 } }, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": { "accuracy": 0.3046594982078853 }, "prompt_3": { "accuracy": 0.2974910394265233 }, "prompt_4": { "accuracy": 0.3010752688172043 }, "prompt_5": { "accuracy": 0.2867383512544803 } }, "cmmlu_full": { "prompt_1": { "accuracy": 0.3115178725608703, "category_acc": { "agronomy": 0.2958579881656805, "anatomy": 0.27702702702702703, "ancient_chinese": 0.27439024390243905, "arts": 0.31875, "astronomy": 0.3151515151515151, "business_ethics": 0.2535885167464115, "chinese_civil_service_exam": 0.30625, "chinese_driving_rule": 0.3969465648854962, "chinese_food_culture": 0.29411764705882354, "chinese_foreign_policy": 0.34579439252336447, "chinese_history": 0.3684210526315789, "chinese_literature": 0.28431372549019607, "chinese_teacher_qualification": 0.3407821229050279, "clinical_knowledge": 0.26582278481012656, "college_actuarial_science": 0.22641509433962265, "college_education": 0.34579439252336447, "college_engineering_hydrology": 0.3018867924528302, "college_law": 0.25925925925925924, "college_mathematics": 0.19047619047619047, "college_medical_statistics": 0.3584905660377358, "college_medicine": 0.304029304029304, "computer_science": 0.3627450980392157, "computer_security": 0.2807017543859649, "conceptual_physics": 0.3197278911564626, "construction_project_management": 0.26618705035971224, "economics": 0.31446540880503143, "education": 0.27607361963190186, "electrical_engineering": 0.3430232558139535, "elementary_chinese": 0.30952380952380953, "elementary_commonsense": 0.2878787878787879, "elementary_information_and_technology": 0.40336134453781514, "elementary_mathematics": 0.2782608695652174, "ethnology": 0.31851851851851853, "food_science": 0.32867132867132864, "genetics": 0.2556818181818182, "global_facts": 0.3691275167785235, "high_school_biology": 0.2485207100591716, "high_school_chemistry": 0.2878787878787879, "high_school_geography": 0.3474576271186441, "high_school_mathematics": 0.1951219512195122, "high_school_physics": 0.2545454545454545, "high_school_politics": 0.2867132867132867, "human_sexuality": 0.30158730158730157, "international_law": 0.31891891891891894, "journalism": 0.36046511627906974, "jurisprudence": 0.35766423357664234, "legal_and_moral_basis": 0.49065420560747663, "logical": 0.2926829268292683, "machine_learning": 0.18032786885245902, "management": 0.3476190476190476, "marketing": 0.4, "marxist_theory": 0.328042328042328, "modern_chinese": 0.25862068965517243, "nutrition": 0.25517241379310346, "philosophy": 0.45714285714285713, "professional_accounting": 0.29714285714285715, "professional_law": 0.2985781990521327, "professional_medicine": 0.2632978723404255, "professional_psychology": 0.3275862068965517, "public_relations": 0.2988505747126437, "security_study": 0.31851851851851853, "sociology": 0.37168141592920356, "sports_science": 0.3151515151515151, "traditional_chinese_medicine": 0.22702702702702704, "virology": 0.2781065088757396, "world_history": 0.3416149068322981, "world_religions": 0.325 } }, "prompt_2": { "accuracy": 0.31548955275427387, "category_acc": { "agronomy": 0.27218934911242604, "anatomy": 0.25675675675675674, "ancient_chinese": 0.23170731707317074, "arts": 0.34375, "astronomy": 0.3333333333333333, "business_ethics": 0.27751196172248804, "chinese_civil_service_exam": 0.275, "chinese_driving_rule": 0.40458015267175573, "chinese_food_culture": 0.25735294117647056, "chinese_foreign_policy": 0.32710280373831774, "chinese_history": 0.39009287925696595, "chinese_literature": 0.3235294117647059, "chinese_teacher_qualification": 0.3854748603351955, "clinical_knowledge": 0.27848101265822783, "college_actuarial_science": 0.2358490566037736, "college_education": 0.3644859813084112, "college_engineering_hydrology": 0.37735849056603776, "college_law": 0.24074074074074073, "college_mathematics": 0.24761904761904763, "college_medical_statistics": 0.32075471698113206, "college_medicine": 0.3076923076923077, "computer_science": 0.3333333333333333, "computer_security": 0.3567251461988304, "conceptual_physics": 0.36054421768707484, "construction_project_management": 0.26618705035971224, "economics": 0.27044025157232704, "education": 0.32515337423312884, "electrical_engineering": 0.3023255813953488, "elementary_chinese": 0.3253968253968254, "elementary_commonsense": 0.2777777777777778, "elementary_information_and_technology": 0.42016806722689076, "elementary_mathematics": 0.28695652173913044, "ethnology": 0.3037037037037037, "food_science": 0.3706293706293706, "genetics": 0.26704545454545453, "global_facts": 0.3221476510067114, "high_school_biology": 0.30177514792899407, "high_school_chemistry": 0.29545454545454547, "high_school_geography": 0.3389830508474576, "high_school_mathematics": 0.25, "high_school_physics": 0.2818181818181818, "high_school_politics": 0.3356643356643357, "human_sexuality": 0.31746031746031744, "international_law": 0.2918918918918919, "journalism": 0.32558139534883723, "jurisprudence": 0.3381995133819951, "legal_and_moral_basis": 0.5, "logical": 0.23577235772357724, "machine_learning": 0.21311475409836064, "management": 0.29523809523809524, "marketing": 0.40555555555555556, "marxist_theory": 0.4126984126984127, "modern_chinese": 0.21551724137931033, "nutrition": 0.25517241379310346, "philosophy": 0.4, "professional_accounting": 0.33714285714285713, "professional_law": 0.33175355450236965, "professional_medicine": 0.23404255319148937, "professional_psychology": 0.33620689655172414, "public_relations": 0.3045977011494253, "security_study": 0.37037037037037035, "sociology": 0.3407079646017699, "sports_science": 0.30303030303030304, "traditional_chinese_medicine": 0.22702702702702704, "virology": 0.28402366863905326, "world_history": 0.2857142857142857, "world_religions": 0.3625 } }, "prompt_3": { "accuracy": 0.3228285270246935, "category_acc": { "agronomy": 0.34911242603550297, "anatomy": 0.2635135135135135, "ancient_chinese": 0.31097560975609756, "arts": 0.33125, "astronomy": 0.296969696969697, "business_ethics": 0.291866028708134, "chinese_civil_service_exam": 0.2625, "chinese_driving_rule": 0.4122137404580153, "chinese_food_culture": 0.27941176470588236, "chinese_foreign_policy": 0.37383177570093457, "chinese_history": 0.3684210526315789, "chinese_literature": 0.29901960784313725, "chinese_teacher_qualification": 0.37988826815642457, "clinical_knowledge": 0.270042194092827, "college_actuarial_science": 0.1792452830188679, "college_education": 0.4205607476635514, "college_engineering_hydrology": 0.36792452830188677, "college_law": 0.23148148148148148, "college_mathematics": 0.21904761904761905, "college_medical_statistics": 0.32075471698113206, "college_medicine": 0.2967032967032967, "computer_science": 0.3627450980392157, "computer_security": 0.30409356725146197, "conceptual_physics": 0.3877551020408163, "construction_project_management": 0.2805755395683453, "economics": 0.32075471698113206, "education": 0.3558282208588957, "electrical_engineering": 0.32558139534883723, "elementary_chinese": 0.25, "elementary_commonsense": 0.29292929292929293, "elementary_information_and_technology": 0.41596638655462187, "elementary_mathematics": 0.2956521739130435, "ethnology": 0.3037037037037037, "food_science": 0.4125874125874126, "genetics": 0.2727272727272727, "global_facts": 0.3691275167785235, "high_school_biology": 0.2781065088757396, "high_school_chemistry": 0.26515151515151514, "high_school_geography": 0.3135593220338983, "high_school_mathematics": 0.2682926829268293, "high_school_physics": 0.2909090909090909, "high_school_politics": 0.35664335664335667, "human_sexuality": 0.3412698412698413, "international_law": 0.32432432432432434, "journalism": 0.3372093023255814, "jurisprudence": 0.36009732360097324, "legal_and_moral_basis": 0.5607476635514018, "logical": 0.2926829268292683, "machine_learning": 0.23770491803278687, "management": 0.30952380952380953, "marketing": 0.4, "marxist_theory": 0.38095238095238093, "modern_chinese": 0.23275862068965517, "nutrition": 0.32413793103448274, "philosophy": 0.4380952380952381, "professional_accounting": 0.30857142857142855, "professional_law": 0.33649289099526064, "professional_medicine": 0.2074468085106383, "professional_psychology": 0.38362068965517243, "public_relations": 0.3505747126436782, "security_study": 0.2962962962962963, "sociology": 0.34513274336283184, "sports_science": 0.3212121212121212, "traditional_chinese_medicine": 0.2594594594594595, "virology": 0.3136094674556213, "world_history": 0.2795031055900621, "world_religions": 0.3625 } }, "prompt_4": { "accuracy": 0.30694180625107925, "category_acc": { "agronomy": 0.28994082840236685, "anatomy": 0.27702702702702703, "ancient_chinese": 0.2804878048780488, "arts": 0.34375, "astronomy": 0.3333333333333333, "business_ethics": 0.3014354066985646, "chinese_civil_service_exam": 0.25625, "chinese_driving_rule": 0.42748091603053434, "chinese_food_culture": 0.3382352941176471, "chinese_foreign_policy": 0.38317757009345793, "chinese_history": 0.3591331269349845, "chinese_literature": 0.29901960784313725, "chinese_teacher_qualification": 0.36312849162011174, "clinical_knowledge": 0.2489451476793249, "college_actuarial_science": 0.24528301886792453, "college_education": 0.29906542056074764, "college_engineering_hydrology": 0.3113207547169811, "college_law": 0.24074074074074073, "college_mathematics": 0.2, "college_medical_statistics": 0.2169811320754717, "college_medicine": 0.25274725274725274, "computer_science": 0.3137254901960784, "computer_security": 0.30994152046783624, "conceptual_physics": 0.3129251700680272, "construction_project_management": 0.302158273381295, "economics": 0.3270440251572327, "education": 0.294478527607362, "electrical_engineering": 0.29069767441860467, "elementary_chinese": 0.2857142857142857, "elementary_commonsense": 0.30808080808080807, "elementary_information_and_technology": 0.40756302521008403, "elementary_mathematics": 0.2608695652173913, "ethnology": 0.31851851851851853, "food_science": 0.3356643356643357, "genetics": 0.2784090909090909, "global_facts": 0.3288590604026846, "high_school_biology": 0.2603550295857988, "high_school_chemistry": 0.30303030303030304, "high_school_geography": 0.288135593220339, "high_school_mathematics": 0.22560975609756098, "high_school_physics": 0.2909090909090909, "high_school_politics": 0.3356643356643357, "human_sexuality": 0.30952380952380953, "international_law": 0.3081081081081081, "journalism": 0.3546511627906977, "jurisprudence": 0.32116788321167883, "legal_and_moral_basis": 0.514018691588785, "logical": 0.2845528455284553, "machine_learning": 0.20491803278688525, "management": 0.2761904761904762, "marketing": 0.32222222222222224, "marxist_theory": 0.3915343915343915, "modern_chinese": 0.2413793103448276, "nutrition": 0.31724137931034485, "philosophy": 0.42857142857142855, "professional_accounting": 0.29714285714285715, "professional_law": 0.3127962085308057, "professional_medicine": 0.22606382978723405, "professional_psychology": 0.33620689655172414, "public_relations": 0.29310344827586204, "security_study": 0.2518518518518518, "sociology": 0.3141592920353982, "sports_science": 0.3151515151515151, "traditional_chinese_medicine": 0.23783783783783785, "virology": 0.3076923076923077, "world_history": 0.30434782608695654, "world_religions": 0.375 } }, "prompt_5": { "accuracy": 0.31402175790018994, "category_acc": { "agronomy": 0.2958579881656805, "anatomy": 0.2635135135135135, "ancient_chinese": 0.2621951219512195, "arts": 0.3125, "astronomy": 0.296969696969697, "business_ethics": 0.3349282296650718, "chinese_civil_service_exam": 0.30625, "chinese_driving_rule": 0.40458015267175573, "chinese_food_culture": 0.2647058823529412, "chinese_foreign_policy": 0.3925233644859813, "chinese_history": 0.3715170278637771, "chinese_literature": 0.3088235294117647, "chinese_teacher_qualification": 0.37988826815642457, "clinical_knowledge": 0.28270042194092826, "college_actuarial_science": 0.1792452830188679, "college_education": 0.35514018691588783, "college_engineering_hydrology": 0.33962264150943394, "college_law": 0.2222222222222222, "college_mathematics": 0.18095238095238095, "college_medical_statistics": 0.3490566037735849, "college_medicine": 0.315018315018315, "computer_science": 0.3627450980392157, "computer_security": 0.3216374269005848, "conceptual_physics": 0.3877551020408163, "construction_project_management": 0.23741007194244604, "economics": 0.29559748427672955, "education": 0.3006134969325153, "electrical_engineering": 0.32558139534883723, "elementary_chinese": 0.25, "elementary_commonsense": 0.30303030303030304, "elementary_information_and_technology": 0.36554621848739494, "elementary_mathematics": 0.30869565217391304, "ethnology": 0.3037037037037037, "food_science": 0.3706293706293706, "genetics": 0.2727272727272727, "global_facts": 0.38926174496644295, "high_school_biology": 0.28402366863905326, "high_school_chemistry": 0.20454545454545456, "high_school_geography": 0.3220338983050847, "high_school_mathematics": 0.21341463414634146, "high_school_physics": 0.2818181818181818, "high_school_politics": 0.32167832167832167, "human_sexuality": 0.3253968253968254, "international_law": 0.32972972972972975, "journalism": 0.313953488372093, "jurisprudence": 0.35036496350364965, "legal_and_moral_basis": 0.5, "logical": 0.2764227642276423, "machine_learning": 0.2786885245901639, "management": 0.3, "marketing": 0.39444444444444443, "marxist_theory": 0.3862433862433862, "modern_chinese": 0.2672413793103448, "nutrition": 0.3103448275862069, "philosophy": 0.4, "professional_accounting": 0.3028571428571429, "professional_law": 0.3033175355450237, "professional_medicine": 0.2127659574468085, "professional_psychology": 0.33620689655172414, "public_relations": 0.3045977011494253, "security_study": 0.2962962962962963, "sociology": 0.3407079646017699, "sports_science": 0.2787878787878788, "traditional_chinese_medicine": 0.2918918918918919, "virology": 0.2958579881656805, "world_history": 0.32919254658385094, "world_religions": 0.3375 } } }, "zbench": { "prompt_1": { "accuracy": 0.21212121212121213 }, "prompt_2": { "accuracy": 0.24242424242424243 }, "prompt_3": { "accuracy": 0.21212121212121213 }, "prompt_4": { "accuracy": 0.24242424242424243 }, "prompt_5": { "accuracy": 0.24242424242424243 } }, "ind_emotion": { "prompt_1": { "accuracy": 0.3931818181818182 }, "prompt_2": { "accuracy": 0.40454545454545454 }, "prompt_3": { "accuracy": 0.3931818181818182 }, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": { "rouge1": 0.2916103150813641, "rouge2": 0.10525979858778076, "rougeL": 0.21829240386382892, "avg_rouge": 0.20505417251099126 }, "prompt_2": { "rouge1": 0.3196708229238851, "rouge2": 0.12001738828619245, "rougeL": 0.23976427412056603, "avg_rouge": 0.22648416177688122 }, "prompt_3": { "rouge1": 0.3189803205740337, "rouge2": 0.1178512379749291, "rougeL": 0.24077847034893995, "avg_rouge": 0.22587000963263426 }, "prompt_4": { "rouge1": 0.3188029985347647, "rouge2": 0.11961123119155025, "rougeL": 0.2407341727607601, "avg_rouge": 0.22638280082902504 }, "prompt_5": { "rouge1": 0.3259632337286004, "rouge2": 0.1156636436621222, "rougeL": 0.24355755950429206, "avg_rouge": 0.22839481229833822 } }, "dialogsum": { "prompt_1": { "rouge1": 0.2144204431797328, "rouge2": 0.05441713180207946, "rougeL": 0.15257149078102575, "avg_rouge": 0.14046968858761266 }, "prompt_2": { "rouge1": 0.22126376283018281, "rouge2": 0.05519386728126753, "rougeL": 0.1573029997600269, "avg_rouge": 0.14458687662382574 }, "prompt_3": { "rouge1": 0.21960987693146633, "rouge2": 0.054773669944745355, "rougeL": 0.1554536415313014, "avg_rouge": 0.14327906280250435 }, "prompt_4": { "rouge1": 0.21323836243498684, "rouge2": 0.05457124324104029, "rougeL": 0.15066676607418653, "avg_rouge": 0.1394921239167379 }, "prompt_5": { "rouge1": 0.22277568686099017, "rouge2": 0.05661362557889607, "rougeL": 0.16029410846825173, "avg_rouge": 0.14656114030271264 } }, "sst2": { "prompt_1": { "accuracy": 0.5137614678899083 }, "prompt_2": { "accuracy": 0.5860091743119266 }, "prompt_3": { "accuracy": 0.5653669724770642 }, "prompt_4": { "accuracy": 0.5435779816513762 }, "prompt_5": { "accuracy": 0.7626146788990825 } }, "cola": { "prompt_1": { "accuracy": 0.7612655800575263 }, "prompt_2": { "accuracy": 0.6941514860977949 }, "prompt_3": { "accuracy": 0.7219558964525408 }, "prompt_4": { "accuracy": 0.6912751677852349 }, "prompt_5": { "accuracy": 0.7804410354745925 } }, "qqp": { "prompt_1": { "accuracy": 0.49920850853326737 }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": { "overall_acc": 0.2552380952380952, "language_acc": { "Malay": 0.2, "English": 0.26, "Vietnamese": 0.32666666666666666, "Spanish": 0.28, "Indonesian": 0.24666666666666667, "Filipino": 0.25333333333333335, "Chinese": 0.22 }, "consistency_score_2": 0.25047619047619046, "consistency_score_3": 0.06457142857142857, "consistency_score_4": 0.015428571428571423, "consistency_score_5": 0.0028571428571428576, "consistency_score_6": 0.0, "consistency_score_7": 0.0, "detailed_consistency_score": { "2_combine": { "Malay,English": 0.26, "Malay,Vietnamese": 0.30666666666666664, "Malay,Spanish": 0.22666666666666666, "Malay,Indonesian": 0.26666666666666666, "Malay,Filipino": 0.24666666666666667, "Malay,Chinese": 0.2, "English,Vietnamese": 0.22666666666666666, "English,Spanish": 0.2733333333333333, "English,Indonesian": 0.3, "English,Filipino": 0.24, "English,Chinese": 0.23333333333333334, "Vietnamese,Spanish": 0.24, "Vietnamese,Indonesian": 0.21333333333333335, "Vietnamese,Filipino": 0.2866666666666667, "Vietnamese,Chinese": 0.28, "Spanish,Indonesian": 0.24666666666666667, "Spanish,Filipino": 0.29333333333333333, "Spanish,Chinese": 0.24, "Indonesian,Filipino": 0.22, "Indonesian,Chinese": 0.24, "Filipino,Chinese": 0.22 }, "3_combine": { "Malay,English,Vietnamese": 0.08, "Malay,English,Spanish": 0.07333333333333333, "Malay,English,Indonesian": 0.1, "Malay,English,Filipino": 0.08, "Malay,English,Chinese": 0.04, "Malay,Vietnamese,Spanish": 0.07333333333333333, "Malay,Vietnamese,Indonesian": 0.07333333333333333, "Malay,Vietnamese,Filipino": 0.07333333333333333, "Malay,Vietnamese,Chinese": 0.05333333333333334, "Malay,Spanish,Indonesian": 0.07333333333333333, "Malay,Spanish,Filipino": 0.07333333333333333, "Malay,Spanish,Chinese": 0.04666666666666667, "Malay,Indonesian,Filipino": 0.06666666666666667, "Malay,Indonesian,Chinese": 0.04, "Malay,Filipino,Chinese": 0.06, "English,Vietnamese,Spanish": 0.08, "English,Vietnamese,Indonesian": 0.06666666666666667, "English,Vietnamese,Filipino": 0.07333333333333333, "English,Vietnamese,Chinese": 0.04666666666666667, "English,Spanish,Indonesian": 0.08666666666666667, "English,Spanish,Filipino": 0.07333333333333333, "English,Spanish,Chinese": 0.06666666666666667, "English,Indonesian,Filipino": 0.05333333333333334, "English,Indonesian,Chinese": 0.06666666666666667, "English,Filipino,Chinese": 0.04666666666666667, "Vietnamese,Spanish,Indonesian": 0.06666666666666667, "Vietnamese,Spanish,Filipino": 0.07333333333333333, "Vietnamese,Spanish,Chinese": 0.06, "Vietnamese,Indonesian,Filipino": 0.04666666666666667, "Vietnamese,Indonesian,Chinese": 0.05333333333333334, "Vietnamese,Filipino,Chinese": 0.07333333333333333, "Spanish,Indonesian,Filipino": 0.06666666666666667, "Spanish,Indonesian,Chinese": 0.05333333333333334, "Spanish,Filipino,Chinese": 0.06, "Indonesian,Filipino,Chinese": 0.04 }, "4_combine": { "Malay,English,Vietnamese,Spanish": 0.03333333333333333, "Malay,English,Vietnamese,Indonesian": 0.02666666666666667, "Malay,English,Vietnamese,Filipino": 0.02, "Malay,English,Vietnamese,Chinese": 0.006666666666666667, "Malay,English,Spanish,Indonesian": 0.03333333333333333, "Malay,English,Spanish,Filipino": 0.02666666666666667, "Malay,English,Spanish,Chinese": 0.006666666666666667, "Malay,English,Indonesian,Filipino": 0.02, "Malay,English,Indonesian,Chinese": 0.02, "Malay,English,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian": 0.02, "Malay,Vietnamese,Spanish,Filipino": 0.02, "Malay,Vietnamese,Spanish,Chinese": 0.013333333333333334, "Malay,Vietnamese,Indonesian,Filipino": 0.013333333333333334, "Malay,Vietnamese,Indonesian,Chinese": 0.013333333333333334, "Malay,Vietnamese,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino": 0.013333333333333334, "Malay,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,Spanish,Filipino,Chinese": 0.013333333333333334, "Malay,Indonesian,Filipino,Chinese": 0.02, "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, "English,Vietnamese,Spanish,Filipino": 0.02, "English,Vietnamese,Spanish,Chinese": 0.006666666666666667, "English,Vietnamese,Indonesian,Filipino": 0.02, "English,Vietnamese,Indonesian,Chinese": 0.0, "English,Vietnamese,Filipino,Chinese": 0.02, "English,Spanish,Indonesian,Filipino": 0.013333333333333334, "English,Spanish,Indonesian,Chinese": 0.02, "English,Spanish,Filipino,Chinese": 0.02, "English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, "Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, "Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, "Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "Spanish,Indonesian,Filipino,Chinese": 0.0 }, "5_combine": { "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, "Malay,English,Vietnamese,Spanish,Filipino": 0.006666666666666667, "Malay,English,Vietnamese,Spanish,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino": 0.006666666666666667, "Malay,English,Spanish,Indonesian,Chinese": 0.006666666666666667, "Malay,English,Spanish,Filipino,Chinese": 0.0, "Malay,English,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, "English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "6_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 }, "7_combine": { "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 } }, "AC3_2": 0.25283472329320233, "AC3_3": 0.10306815277982001, "AC3_4": 0.0290982205582652, "AC3_5": 0.005651027936661305, "AC3_6": 0.0, "AC3_7": 0.0 } }, "cross_logiqa": { "prompt_1": { "overall_acc": 0.3181818181818182, "language_acc": { "English": 0.3522727272727273, "Vietnamese": 0.3068181818181818, "Chinese": 0.3125, "Indonesian": 0.32954545454545453, "Filipino": 0.2784090909090909, "Spanish": 0.3409090909090909, "Malay": 0.3068181818181818 }, "consistency_score_2": 0.34145021645021645, "consistency_score_3": 0.15016233766233766, "consistency_score_4": 0.07581168831168832, "consistency_score_5": 0.04220779220779221, "consistency_score_6": 0.025974025974025976, "consistency_score_7": 0.017045454545454544, "detailed_consistency_score": { "2_combine": { "English,Vietnamese": 0.3693181818181818, "English,Chinese": 0.3693181818181818, "English,Indonesian": 0.32386363636363635, "English,Filipino": 0.29545454545454547, "English,Spanish": 0.42613636363636365, "English,Malay": 0.3522727272727273, "Vietnamese,Chinese": 0.3465909090909091, "Vietnamese,Indonesian": 0.3522727272727273, "Vietnamese,Filipino": 0.3125, "Vietnamese,Spanish": 0.3522727272727273, "Vietnamese,Malay": 0.3693181818181818, "Chinese,Indonesian": 0.3522727272727273, "Chinese,Filipino": 0.2727272727272727, "Chinese,Spanish": 0.4090909090909091, "Chinese,Malay": 0.3181818181818182, "Indonesian,Filipino": 0.3352272727272727, "Indonesian,Spanish": 0.36363636363636365, "Indonesian,Malay": 0.3125, "Filipino,Spanish": 0.3068181818181818, "Filipino,Malay": 0.3068181818181818, "Spanish,Malay": 0.32386363636363635 }, "3_combine": { "English,Vietnamese,Chinese": 0.1590909090909091, "English,Vietnamese,Indonesian": 0.1534090909090909, "English,Vietnamese,Filipino": 0.11931818181818182, "English,Vietnamese,Spanish": 0.18181818181818182, "English,Vietnamese,Malay": 0.1590909090909091, "English,Chinese,Indonesian": 0.14772727272727273, "English,Chinese,Filipino": 0.11931818181818182, "English,Chinese,Spanish": 0.2215909090909091, "English,Chinese,Malay": 0.16477272727272727, "English,Indonesian,Filipino": 0.13636363636363635, "English,Indonesian,Spanish": 0.20454545454545456, "English,Indonesian,Malay": 0.13636363636363635, "English,Filipino,Spanish": 0.14772727272727273, "English,Filipino,Malay": 0.14204545454545456, "English,Spanish,Malay": 0.17613636363636365, "Vietnamese,Chinese,Indonesian": 0.14772727272727273, "Vietnamese,Chinese,Filipino": 0.11363636363636363, "Vietnamese,Chinese,Spanish": 0.18181818181818182, "Vietnamese,Chinese,Malay": 0.14772727272727273, "Vietnamese,Indonesian,Filipino": 0.14204545454545456, "Vietnamese,Indonesian,Spanish": 0.16477272727272727, "Vietnamese,Indonesian,Malay": 0.1534090909090909, "Vietnamese,Filipino,Spanish": 0.125, "Vietnamese,Filipino,Malay": 0.13636363636363635, "Vietnamese,Spanish,Malay": 0.1534090909090909, "Chinese,Indonesian,Filipino": 0.125, "Chinese,Indonesian,Spanish": 0.19318181818181818, "Chinese,Indonesian,Malay": 0.13068181818181818, "Chinese,Filipino,Spanish": 0.13636363636363635, "Chinese,Filipino,Malay": 0.11931818181818182, "Chinese,Spanish,Malay": 0.17045454545454544, "Indonesian,Filipino,Spanish": 0.14204545454545456, "Indonesian,Filipino,Malay": 0.13068181818181818, "Indonesian,Spanish,Malay": 0.14772727272727273, "Filipino,Spanish,Malay": 0.125 }, "4_combine": { "English,Vietnamese,Chinese,Indonesian": 0.07954545454545454, "English,Vietnamese,Chinese,Filipino": 0.045454545454545456, "English,Vietnamese,Chinese,Spanish": 0.09659090909090909, "English,Vietnamese,Chinese,Malay": 0.06818181818181818, "English,Vietnamese,Indonesian,Filipino": 0.07386363636363637, "English,Vietnamese,Indonesian,Spanish": 0.09659090909090909, "English,Vietnamese,Indonesian,Malay": 0.07954545454545454, "English,Vietnamese,Filipino,Spanish": 0.07386363636363637, "English,Vietnamese,Filipino,Malay": 0.08522727272727272, "English,Vietnamese,Spanish,Malay": 0.08522727272727272, "English,Chinese,Indonesian,Filipino": 0.0625, "English,Chinese,Indonesian,Spanish": 0.10795454545454546, "English,Chinese,Indonesian,Malay": 0.07386363636363637, "English,Chinese,Filipino,Spanish": 0.08522727272727272, "English,Chinese,Filipino,Malay": 0.07954545454545454, "English,Chinese,Spanish,Malay": 0.11363636363636363, "English,Indonesian,Filipino,Spanish": 0.09090909090909091, "English,Indonesian,Filipino,Malay": 0.07386363636363637, "English,Indonesian,Spanish,Malay": 0.09090909090909091, "English,Filipino,Spanish,Malay": 0.07954545454545454, "Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, "Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, "Vietnamese,Chinese,Indonesian,Malay": 0.07386363636363637, "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, "Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, "Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, "Vietnamese,Indonesian,Filipino,Spanish": 0.0625, "Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, "Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, "Vietnamese,Filipino,Spanish,Malay": 0.05113636363636364, "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, "Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, "Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, "Chinese,Filipino,Spanish,Malay": 0.056818181818181816, "Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 }, "5_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino": 0.03409090909090909, "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, "English,Vietnamese,Chinese,Filipino,Spanish": 0.03409090909090909, "English,Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, "English,Vietnamese,Chinese,Spanish,Malay": 0.05113636363636364, "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, "English,Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, "English,Vietnamese,Indonesian,Spanish,Malay": 0.05113636363636364, "English,Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, "English,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, "English,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, "English,Chinese,Indonesian,Spanish,Malay": 0.0625, "English,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, "English,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 }, "6_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 }, "7_combine": { "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 } }, "AC3_2": 0.3294056230390323, "AC3_3": 0.20403340156349362, "AC3_4": 0.12244821512418215, "AC3_5": 0.0745290745083944, "AC3_6": 0.0480274442399042, "AC3_7": 0.03235747302578674 } }, "sg_eval": { "prompt_1": { "accuracy": 0.2524271844660194 } }, "cn_eval": { "prompt_1": { "accuracy": 0.2571428571428571 } }, "us_eval": { "prompt_1": { "accuracy": 0.29906542056074764 } }, "ph_eval": { "prompt_1": { "accuracy": 0.25, "category_acc": { "brand": 0.2, "demographics": 0.4, "biology": 0.4, "history": 0.26666666666666666, "literature": 0.2, "politics": 0.4, "culture": 0.3, "film": 0.2, "law": 0.1, "geography": 0.1 } } }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": { "bleu_score": 0.004215631716446151 } }, "flores_vie2eng": { "prompt_1": { "bleu_score": 0.00472710747035566 } }, "flores_zho2eng": { "prompt_1": { "bleu_score": 0.0071983458563077175 } }, "flores_zsm2eng": { "prompt_1": { "bleu_score": 0.004256927827829785 } }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": { "accuracy": 0.24633535931355025, "category_acc": { "high_school_european_history": 0.2621951219512195, "business_ethics": 0.31313131313131315, "clinical_knowledge": 0.3181818181818182, "medical_genetics": 0.2727272727272727, "high_school_us_history": 0.24630541871921183, "high_school_physics": 0.28, "high_school_world_history": 0.2542372881355932, "virology": 0.2545454545454545, "high_school_microeconomics": 0.23628691983122363, "econometrics": 0.23893805309734514, "college_computer_science": 0.26262626262626265, "high_school_biology": 0.23948220064724918, "abstract_algebra": 0.20202020202020202, "professional_accounting": 0.2313167259786477, "philosophy": 0.23225806451612904, "professional_medicine": 0.23247232472324722, "nutrition": 0.25245901639344265, "global_facts": 0.26262626262626265, "machine_learning": 0.26126126126126126, "security_studies": 0.21721311475409835, "public_relations": 0.29357798165137616, "professional_psychology": 0.23895253682487724, "prehistory": 0.23219814241486067, "anatomy": 0.22388059701492538, "human_sexuality": 0.3, "college_medicine": 0.2441860465116279, "high_school_government_and_politics": 0.23958333333333334, "college_chemistry": 0.15151515151515152, "logical_fallacies": 0.25308641975308643, "high_school_geography": 0.23857868020304568, "elementary_mathematics": 0.22281167108753316, "human_aging": 0.22972972972972974, "college_mathematics": 0.30303030303030304, "high_school_psychology": 0.22794117647058823, "formal_logic": 0.288, "high_school_statistics": 0.23255813953488372, "international_law": 0.325, "high_school_mathematics": 0.20817843866171004, "high_school_computer_science": 0.2727272727272727, "conceptual_physics": 0.27350427350427353, "miscellaneous": 0.18925831202046037, "high_school_chemistry": 0.3118811881188119, "marketing": 0.2703862660944206, "professional_law": 0.2654924983692107, "management": 0.2647058823529412, "college_physics": 0.2079207920792079, "jurisprudence": 0.2803738317757009, "world_religions": 0.25882352941176473, "sociology": 0.29, "us_foreign_policy": 0.26262626262626265, "high_school_macroeconomics": 0.23136246786632392, "computer_security": 0.25252525252525254, "moral_scenarios": 0.2348993288590604, "moral_disputes": 0.2289855072463768, "electrical_engineering": 0.3055555555555556, "astronomy": 0.2913907284768212, "college_biology": 0.17482517482517482 } } }, "c_eval": { "prompt_1": { "accuracy": 0.2451708766716196 } }, "c_eval_full": { "prompt_1": { "accuracy": 0.24097135740971357, "category_acc": { "computer_network": 0.3333333333333333, "operating_system": 0.3333333333333333, "computer_architecture": 0.34615384615384615, "college_programming": 0.19047619047619047, "college_physics": 0.25, "college_chemistry": 0.06896551724137931, "advanced_mathematics": 0.125, "probability_and_statistics": 0.21739130434782608, "discrete_mathematics": 0.19047619047619047, "electrical_engineer": 0.30952380952380953, "metrology_engineer": 0.20689655172413793, "high_school_mathematics": 0.2608695652173913, "high_school_physics": 0.375, "high_school_chemistry": 0.2916666666666667, "high_school_biology": 0.4583333333333333, "middle_school_mathematics": 0.125, "middle_school_biology": 0.2692307692307692, "middle_school_physics": 0.2916666666666667, "middle_school_chemistry": 0.2, "veterinary_medicine": 0.21428571428571427, "college_economics": 0.15, "business_administration": 0.2631578947368421, "marxism": 0.16666666666666666, "mao_zedong_thought": 0.2413793103448276, "education_science": 0.20588235294117646, "teacher_qualification": 0.24489795918367346, "high_school_politics": 0.20833333333333334, "high_school_geography": 0.16666666666666666, "middle_school_politics": 0.2692307692307692, "middle_school_geography": 0.5882352941176471, "modern_chinese_history": 0.2857142857142857, "ideological_and_moral_cultivation": 0.2916666666666667, "logic": 0.07407407407407407, "law": 0.27586206896551724, "chinese_language_and_literature": 0.25, "art_studies": 0.21052631578947367, "professional_tour_guide": 0.23529411764705882, "legal_professional": 0.10714285714285714, "high_school_chinese": 0.3333333333333333, "high_school_history": 0.12, "middle_school_history": 0.18518518518518517, "civil_servant": 0.38461538461538464, "sports_science": 0.2916666666666667, "plant_protection": 0.1111111111111111, "basic_medicine": 0.20833333333333334, "clinical_medicine": 0.2222222222222222, "urban_and_rural_planner": 0.17647058823529413, "accountant": 0.37037037037037035, "fire_engineer": 0.16666666666666666, "environmental_impact_assessment_engineer": 0.19444444444444445, "tax_accountant": 0.25925925925925924, "physician": 0.2777777777777778 } } }, "cmmlu": { "prompt_1": { "accuracy": 0.26523297491039427 } }, "cmmlu_full": { "prompt_1": { "accuracy": 0.2542738732515973, "category_acc": { "agronomy": 0.2485207100591716, "anatomy": 0.22972972972972974, "ancient_chinese": 0.3170731707317073, "arts": 0.16875, "astronomy": 0.24848484848484848, "business_ethics": 0.2535885167464115, "chinese_civil_service_exam": 0.25625, "chinese_driving_rule": 0.3053435114503817, "chinese_food_culture": 0.25735294117647056, "chinese_foreign_policy": 0.29906542056074764, "chinese_history": 0.24458204334365324, "chinese_literature": 0.24019607843137256, "chinese_teacher_qualification": 0.24581005586592178, "clinical_knowledge": 0.2742616033755274, "college_actuarial_science": 0.22641509433962265, "college_education": 0.18691588785046728, "college_engineering_hydrology": 0.2830188679245283, "college_law": 0.25, "college_mathematics": 0.20952380952380953, "college_medical_statistics": 0.20754716981132076, "college_medicine": 0.25274725274725274, "computer_science": 0.28431372549019607, "computer_security": 0.23976608187134502, "conceptual_physics": 0.2585034013605442, "construction_project_management": 0.2949640287769784, "economics": 0.3522012578616352, "education": 0.294478527607362, "electrical_engineering": 0.3023255813953488, "elementary_chinese": 0.29365079365079366, "elementary_commonsense": 0.25252525252525254, "elementary_information_and_technology": 0.2605042016806723, "elementary_mathematics": 0.24782608695652175, "ethnology": 0.3333333333333333, "food_science": 0.25874125874125875, "genetics": 0.1875, "global_facts": 0.28187919463087246, "high_school_biology": 0.28994082840236685, "high_school_chemistry": 0.26515151515151514, "high_school_geography": 0.288135593220339, "high_school_mathematics": 0.22560975609756098, "high_school_physics": 0.2545454545454545, "high_school_politics": 0.18181818181818182, "human_sexuality": 0.23809523809523808, "international_law": 0.2594594594594595, "journalism": 0.23837209302325582, "jurisprudence": 0.2725060827250608, "legal_and_moral_basis": 0.2570093457943925, "logical": 0.22764227642276422, "machine_learning": 0.29508196721311475, "management": 0.23809523809523808, "marketing": 0.24444444444444444, "marxist_theory": 0.23809523809523808, "modern_chinese": 0.20689655172413793, "nutrition": 0.22758620689655173, "philosophy": 0.2857142857142857, "professional_accounting": 0.29714285714285715, "professional_law": 0.2559241706161137, "professional_medicine": 0.25, "professional_psychology": 0.25862068965517243, "public_relations": 0.22988505747126436, "security_study": 0.26666666666666666, "sociology": 0.22566371681415928, "sports_science": 0.20606060606060606, "traditional_chinese_medicine": 0.21081081081081082, "virology": 0.21301775147928995, "world_history": 0.22981366459627328, "world_religions": 0.28125 } } }, "zbench": { "prompt_1": { "accuracy": 0.30303030303030304 } }, "ind_emotion": { "prompt_1": { "accuracy": 0.16818181818181818 } }, "ocnli": { "prompt_1": { "accuracy": 0.34576271186440677 } }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": { "accuracy": 0.3684468397844194 } }, "samsum": { "prompt_1": { "rouge1": 0.09577397760787033, "rouge2": 0.008311756747884803, "rougeL": 0.06895432898784087, "avg_rouge": 0.057680021114532 } }, "dialogsum": { "prompt_1": { "rouge1": 0.08816571110741948, "rouge2": 0.005605736966615868, "rougeL": 0.06613129149097668, "avg_rouge": 0.05330091318833735 } }, "sst2": { "prompt_1": { "accuracy": 0.4873853211009174 } }, "cola": { "prompt_1": { "accuracy": 0.41514860977948226 } }, "qqp": { "prompt_1": { "accuracy": 0.5160029680930003 } }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-2-13b": { "model_size": "13B", "model_link": "https://huggingface.co/meta-llama/Llama-2-13b-hf", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-2-13b-chat": { "model_size": "13B", "model_link": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-2-70b": { "model_size": "70B", "model_link": "https://huggingface.co/meta-llama/Llama-2-70b-hf", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "llama-2-70b-chat": { "model_size": "70B", "model_link": "https://huggingface.co/meta-llama/Llama-2-70b-chat-hf", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "chatglm-6b": { "model_size": "6B", "model_link": "https://huggingface.co/THUDM/chatglm-6b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "chatglm2-6b": { "model_size": "6B", "model_link": "https://huggingface.co/THUDM/chatglm2-6b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "chatglm3-6b": { "model_size": "6B", "model_link": "https://huggingface.co/THUDM/chatglm3-6b", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-7b": { "model_size": "7B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan-7B", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-13b": { "model_size": "13B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan-13B-Base", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-13b-chat": { "model_size": "13B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan-13B-Chat", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "chatgpt": { "model_size": "-1B", "model_link": "https://openai.com/blog/chatgpt", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "chatgpt-1106": { "model_size": "-1B", "model_link": "https://openai.com/blog/chatgpt", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "chatgpt4": { "model_size": "-1B", "model_link": "https://openai.com/blog/chatgpt", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-2-7b": { "model_size": "7B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-7B-Base", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-2-7b-chat": { "model_size": "7B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-2-13b": { "model_size": "13B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-13B-Base", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "baichuan-2-13b-chat": { "model_size": "13B", "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "vicuna-7b-v1.5": { "model_size": "7B", "model_link": "https://huggingface.co/lmsys/vicuna-7b-v1.5", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "vicuna-13b-v1.5": { "model_size": "13B", "model_link": "https://huggingface.co/lmsys/vicuna-13b-v1.5", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "bloomz-7b1": { "model_size": "7.1B", "model_link": "https://huggingface.co/bigscience/bloomz-7b1", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "mt0-xxl": { "model_size": "13B", "model_link": "https://huggingface.co/bigscience/mt0-xxl", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "colossal-llama-2-7b-base": { "model_size": "7B", "model_link": "https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-7b-base", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } }, "fastchat-t5-3b-v1.0": { "model_size": "3B", "model_link": "https://huggingface.co/lmsys/fastchat-t5-3b-v1.0", "zero_shot": { "cross_mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "rte": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 } }, "five_shot": { "cross_mmlu": { "prompt_1": -1 }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { "prompt_1": -1 }, "cn_eval": { "prompt_1": -1 }, "us_eval": { "prompt_1": -1 }, "ph_eval": { "prompt_1": -1 }, "sing2eng": { "prompt_1": -1 }, "flores_ind2eng": { "prompt_1": -1 }, "flores_vie2eng": { "prompt_1": -1 }, "flores_zho2eng": { "prompt_1": -1 }, "flores_zsm2eng": { "prompt_1": -1 }, "mmlu": { "prompt_1": -1 }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { "prompt_1": -1 }, "cmmlu": { "prompt_1": -1 }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { "prompt_1": -1 }, "ind_emotion": { "prompt_1": -1 }, "ocnli": { "prompt_1": -1 }, "c3": { "prompt_1": -1 }, "dream": { "prompt_1": -1 }, "samsum": { "prompt_1": -1 }, "dialogsum": { "prompt_1": -1 }, "sst2": { "prompt_1": -1 }, "cola": { "prompt_1": -1 }, "qqp": { "prompt_1": -1 }, "mnli": { "prompt_1": -1 }, "qnli": { "prompt_1": -1 }, "wnli": { "prompt_1": -1 }, "rte": { "prompt_1": -1 }, "mrpc": { "prompt_1": -1 } } } }