diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -4,235 +4,3250 @@ "model_link": "https://huggingface.co/spaces/SeaEval/SeaEval_Leaderboard", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.23904761904761904, + "language_acc": { + "Malay": 0.25333333333333335, + "English": 0.2, + "Vietnamese": 0.3, + "Spanish": 0.22, + "Indonesian": 0.24, + "Filipino": 0.24, + "Chinese": 0.22 + }, + "consistency_score_2": 0.2612698412698413, + "consistency_score_3": 0.07238095238095239, + "consistency_score_4": 0.02133333333333333, + "consistency_score_5": 0.005714285714285715, + "consistency_score_6": 0.0009523809523809525, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.23333333333333334, + "Malay,Vietnamese": 0.26666666666666666, + "Malay,Spanish": 0.2733333333333333, + "Malay,Indonesian": 0.3, + "Malay,Filipino": 0.2733333333333333, + "Malay,Chinese": 0.28, + "English,Vietnamese": 0.25333333333333335, + "English,Spanish": 0.2866666666666667, + "English,Indonesian": 0.30666666666666664, + "English,Filipino": 0.2, + "English,Chinese": 0.21333333333333335, + "Vietnamese,Spanish": 0.18, + "Vietnamese,Indonesian": 0.32, + "Vietnamese,Filipino": 0.26666666666666666, + "Vietnamese,Chinese": 0.26, + "Spanish,Indonesian": 0.2733333333333333, + "Spanish,Filipino": 0.26, + "Spanish,Chinese": 0.2866666666666667, + "Indonesian,Filipino": 0.24, + "Indonesian,Chinese": 0.24, + "Filipino,Chinese": 0.2733333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.07333333333333333, + "Malay,English,Spanish": 0.07333333333333333, + "Malay,English,Indonesian": 0.08666666666666667, + "Malay,English,Filipino": 0.06, + "Malay,English,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish": 0.04, + "Malay,Vietnamese,Indonesian": 0.11333333333333333, + "Malay,Vietnamese,Filipino": 0.08, + "Malay,Vietnamese,Chinese": 0.12, + "Malay,Spanish,Indonesian": 0.05333333333333334, + "Malay,Spanish,Filipino": 0.08, + "Malay,Spanish,Chinese": 0.06666666666666667, + "Malay,Indonesian,Filipino": 0.08666666666666667, + "Malay,Indonesian,Chinese": 0.08, + "Malay,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish": 0.04666666666666667, + "English,Vietnamese,Indonesian": 0.10666666666666667, + "English,Vietnamese,Filipino": 0.08, + "English,Vietnamese,Chinese": 0.08, + "English,Spanish,Indonesian": 0.08666666666666667, + "English,Spanish,Filipino": 0.05333333333333334, + "English,Spanish,Chinese": 0.05333333333333334, + "English,Indonesian,Filipino": 0.06666666666666667, + "English,Indonesian,Chinese": 0.07333333333333333, + "English,Filipino,Chinese": 0.04, + "Vietnamese,Spanish,Indonesian": 0.08, + "Vietnamese,Spanish,Filipino": 0.04, + "Vietnamese,Spanish,Chinese": 0.03333333333333333, + "Vietnamese,Indonesian,Filipino": 0.08, + "Vietnamese,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino": 0.06666666666666667, + "Spanish,Indonesian,Chinese": 0.06, + "Spanish,Filipino,Chinese": 0.06666666666666667, + "Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.0, + "Malay,English,Vietnamese,Indonesian": 0.04666666666666667, + "Malay,English,Vietnamese,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Chinese": 0.04, + "Malay,English,Spanish,Indonesian": 0.013333333333333334, + "Malay,English,Spanish,Filipino": 0.02, + "Malay,English,Spanish,Chinese": 0.013333333333333334, + "Malay,English,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Spanish,Indonesian": 0.02, + "Malay,Vietnamese,Spanish,Filipino": 0.0, + "Malay,Vietnamese,Spanish,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.02, + "English,Vietnamese,Spanish,Filipino": 0.013333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.006666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.02, + "English,Spanish,Indonesian,Filipino": 0.03333333333333333, + "English,Spanish,Indonesian,Chinese": 0.013333333333333334, + "English,Spanish,Filipino,Chinese": 0.006666666666666667, + "English,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.0, + "Malay,English,Vietnamese,Spanish,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02, + "Malay,English,Vietnamese,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "English,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.2496652162904419, + "AC3_3": 0.11111693603538478, + "AC3_4": 0.03917093390351274, + "AC3_5": 0.011161756526846088, + "AC3_6": 0.0018972033249842531, + "AC3_7": 0.0 + }, + "prompt_2": { + "overall_acc": 0.22952380952380952, + "language_acc": { + "Malay": 0.23333333333333334, + "English": 0.26666666666666666, + "Vietnamese": 0.16, + "Spanish": 0.22, + "Indonesian": 0.21333333333333335, + "Filipino": 0.2733333333333333, + "Chinese": 0.24 + }, + "consistency_score_2": 0.26, + "consistency_score_3": 0.06647619047619047, + "consistency_score_4": 0.015999999999999997, + "consistency_score_5": 0.0038095238095238095, + "consistency_score_6": 0.0009523809523809525, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.24666666666666667, + "Malay,Vietnamese": 0.26666666666666666, + "Malay,Spanish": 0.3333333333333333, + "Malay,Indonesian": 0.23333333333333334, + "Malay,Filipino": 0.22666666666666666, + "Malay,Chinese": 0.18666666666666668, + "English,Vietnamese": 0.2866666666666667, + "English,Spanish": 0.24, + "English,Indonesian": 0.22666666666666666, + "English,Filipino": 0.26, + "English,Chinese": 0.25333333333333335, + "Vietnamese,Spanish": 0.3, + "Vietnamese,Indonesian": 0.32, + "Vietnamese,Filipino": 0.26, + "Vietnamese,Chinese": 0.3, + "Spanish,Indonesian": 0.2866666666666667, + "Spanish,Filipino": 0.25333333333333335, + "Spanish,Chinese": 0.2866666666666667, + "Indonesian,Filipino": 0.22666666666666666, + "Indonesian,Chinese": 0.22, + "Filipino,Chinese": 0.24666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.08, + "Malay,English,Spanish": 0.07333333333333333, + "Malay,English,Indonesian": 0.05333333333333334, + "Malay,English,Filipino": 0.06, + "Malay,English,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish": 0.11333333333333333, + "Malay,Vietnamese,Indonesian": 0.06, + "Malay,Vietnamese,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian": 0.06, + "Malay,Spanish,Filipino": 0.08666666666666667, + "Malay,Spanish,Chinese": 0.06, + "Malay,Indonesian,Filipino": 0.04666666666666667, + "Malay,Indonesian,Chinese": 0.03333333333333333, + "Malay,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish": 0.08666666666666667, + "English,Vietnamese,Indonesian": 0.07333333333333333, + "English,Vietnamese,Filipino": 0.07333333333333333, + "English,Vietnamese,Chinese": 0.06666666666666667, + "English,Spanish,Indonesian": 0.06, + "English,Spanish,Filipino": 0.04666666666666667, + "English,Spanish,Chinese": 0.05333333333333334, + "English,Indonesian,Filipino": 0.06666666666666667, + "English,Indonesian,Chinese": 0.05333333333333334, + "English,Filipino,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Vietnamese,Spanish,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Spanish,Indonesian,Filipino": 0.06, + "Spanish,Indonesian,Chinese": 0.04666666666666667, + "Spanish,Filipino,Chinese": 0.09333333333333334, + "Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04, + "Malay,English,Vietnamese,Indonesian": 0.02, + "Malay,English,Vietnamese,Filipino": 0.02, + "Malay,English,Vietnamese,Chinese": 0.02, + "Malay,English,Spanish,Indonesian": 0.006666666666666667, + "Malay,English,Spanish,Filipino": 0.013333333333333334, + "Malay,English,Spanish,Chinese": 0.006666666666666667, + "Malay,English,Indonesian,Filipino": 0.0, + "Malay,English,Indonesian,Chinese": 0.013333333333333334, + "Malay,English,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.0, + "Malay,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.02, + "Malay,Spanish,Indonesian,Chinese": 0.0, + "Malay,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.02, + "English,Vietnamese,Spanish,Chinese": 0.013333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.02, + "English,Vietnamese,Filipino,Chinese": 0.02666666666666667, + "English,Spanish,Indonesian,Filipino": 0.006666666666666667, + "English,Spanish,Indonesian,Chinese": 0.0, + "English,Spanish,Filipino,Chinese": 0.02, + "English,Indonesian,Filipino,Chinese": 0.02, + "Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.02666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.24381322952217824, + "AC3_3": 0.10309370591601984, + "AC3_4": 0.0299146625169083, + "AC3_5": 0.007494655001647091, + "AC3_6": 0.0018968909869770477, + "AC3_7": 0.0 + }, + "prompt_3": { + "overall_acc": 0.2619047619047619, + "language_acc": { + "Malay": 0.22666666666666666, + "English": 0.26, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.3, + "Indonesian": 0.3, + "Filipino": 0.21333333333333335, + "Chinese": 0.24 + }, + "consistency_score_2": 0.24349206349206348, + "consistency_score_3": 0.05523809523809523, + "consistency_score_4": 0.010476190476190474, + "consistency_score_5": 0.0012698412698412698, + "consistency_score_6": 0.0, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.24, + "Malay,Vietnamese": 0.24, + "Malay,Spanish": 0.30666666666666664, + "Malay,Indonesian": 0.2733333333333333, + "Malay,Filipino": 0.2733333333333333, + "Malay,Chinese": 0.19333333333333333, + "English,Vietnamese": 0.21333333333333335, + "English,Spanish": 0.20666666666666667, + "English,Indonesian": 0.30666666666666664, + "English,Filipino": 0.22, + "English,Chinese": 0.25333333333333335, + "Vietnamese,Spanish": 0.24, + "Vietnamese,Indonesian": 0.24, + "Vietnamese,Filipino": 0.21333333333333335, + "Vietnamese,Chinese": 0.26666666666666666, + "Spanish,Indonesian": 0.23333333333333334, + "Spanish,Filipino": 0.26, + "Spanish,Chinese": 0.20666666666666667, + "Indonesian,Filipino": 0.21333333333333335, + "Indonesian,Chinese": 0.22, + "Filipino,Chinese": 0.29333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.05333333333333334, + "Malay,English,Spanish": 0.05333333333333334, + "Malay,English,Indonesian": 0.07333333333333333, + "Malay,English,Filipino": 0.07333333333333333, + "Malay,English,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish": 0.06666666666666667, + "Malay,Vietnamese,Indonesian": 0.07333333333333333, + "Malay,Vietnamese,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian": 0.09333333333333334, + "Malay,Spanish,Filipino": 0.1, + "Malay,Spanish,Chinese": 0.02666666666666667, + "Malay,Indonesian,Filipino": 0.05333333333333334, + "Malay,Indonesian,Chinese": 0.04, + "Malay,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish": 0.03333333333333333, + "English,Vietnamese,Indonesian": 0.06, + "English,Vietnamese,Filipino": 0.03333333333333333, + "English,Vietnamese,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian": 0.05333333333333334, + "English,Spanish,Filipino": 0.03333333333333333, + "English,Spanish,Chinese": 0.04, + "English,Indonesian,Filipino": 0.08666666666666667, + "English,Indonesian,Chinese": 0.06, + "English,Filipino,Chinese": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "Vietnamese,Spanish,Filipino": 0.05333333333333334, + "Vietnamese,Spanish,Chinese": 0.03333333333333333, + "Vietnamese,Indonesian,Filipino": 0.04, + "Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino": 0.06666666666666667, + "Spanish,Indonesian,Chinese": 0.04, + "Spanish,Filipino,Chinese": 0.05333333333333334, + "Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.02, + "Malay,English,Vietnamese,Filipino": 0.006666666666666667, + "Malay,English,Vietnamese,Chinese": 0.0, + "Malay,English,Spanish,Indonesian": 0.013333333333333334, + "Malay,English,Spanish,Filipino": 0.006666666666666667, + "Malay,English,Spanish,Chinese": 0.013333333333333334, + "Malay,English,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Indonesian,Chinese": 0.0, + "Malay,English,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.006666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.0, + "Malay,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.0, + "English,Vietnamese,Indonesian,Filipino": 0.013333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.013333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.006666666666666667, + "English,Spanish,Indonesian,Filipino": 0.02, + "English,Spanish,Indonesian,Chinese": 0.0, + "English,Spanish,Filipino,Chinese": 0.0, + "English,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Filipino,Chinese": 0.0, + "Malay,English,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Spanish,Filipino,Chinese": 0.0, + "Malay,English,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.2523630054537754, + "AC3_3": 0.09123409120532372, + "AC3_4": 0.020146520139123693, + "AC3_5": 0.0025274283407019965, + "AC3_6": 0.0, + "AC3_7": 0.0 + }, + "prompt_4": { + "overall_acc": 0.22380952380952376, + "language_acc": { + "Malay": 0.22666666666666666, + "English": 0.19333333333333333, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.26, + "Indonesian": 0.2, + "Filipino": 0.20666666666666667, + "Chinese": 0.22666666666666666 + }, + "consistency_score_2": 0.25873015873015864, + "consistency_score_3": 0.06799999999999999, + "consistency_score_4": 0.016571428571428567, + "consistency_score_5": 0.0028571428571428576, + "consistency_score_6": 0.0, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.26666666666666666, + "Malay,Vietnamese": 0.22666666666666666, + "Malay,Spanish": 0.22, + "Malay,Indonesian": 0.2733333333333333, + "Malay,Filipino": 0.2866666666666667, + "Malay,Chinese": 0.30666666666666664, + "English,Vietnamese": 0.21333333333333335, + "English,Spanish": 0.26, + "English,Indonesian": 0.22666666666666666, + "English,Filipino": 0.26, + "English,Chinese": 0.22666666666666666, + "Vietnamese,Spanish": 0.22, + "Vietnamese,Indonesian": 0.2733333333333333, + "Vietnamese,Filipino": 0.35333333333333333, + "Vietnamese,Chinese": 0.24, + "Spanish,Indonesian": 0.3, + "Spanish,Filipino": 0.25333333333333335, + "Spanish,Chinese": 0.29333333333333333, + "Indonesian,Filipino": 0.25333333333333335, + "Indonesian,Chinese": 0.26666666666666666, + "Filipino,Chinese": 0.21333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.06, + "Malay,English,Spanish": 0.06, + "Malay,English,Indonesian": 0.04, + "Malay,English,Filipino": 0.1, + "Malay,English,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish": 0.03333333333333333, + "Malay,Vietnamese,Indonesian": 0.05333333333333334, + "Malay,Vietnamese,Filipino": 0.08666666666666667, + "Malay,Vietnamese,Chinese": 0.06666666666666667, + "Malay,Spanish,Indonesian": 0.04, + "Malay,Spanish,Filipino": 0.07333333333333333, + "Malay,Spanish,Chinese": 0.06666666666666667, + "Malay,Indonesian,Filipino": 0.08, + "Malay,Indonesian,Chinese": 0.10666666666666667, + "Malay,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish": 0.06666666666666667, + "English,Vietnamese,Indonesian": 0.05333333333333334, + "English,Vietnamese,Filipino": 0.06666666666666667, + "English,Vietnamese,Chinese": 0.06, + "English,Spanish,Indonesian": 0.08666666666666667, + "English,Spanish,Filipino": 0.07333333333333333, + "English,Spanish,Chinese": 0.06666666666666667, + "English,Indonesian,Filipino": 0.06, + "English,Indonesian,Chinese": 0.02666666666666667, + "English,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Vietnamese,Spanish,Chinese": 0.06666666666666667, + "Vietnamese,Indonesian,Filipino": 0.12, + "Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino": 0.07333333333333333, + "Spanish,Indonesian,Chinese": 0.07333333333333333, + "Spanish,Filipino,Chinese": 0.06, + "Indonesian,Filipino,Chinese": 0.08 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.013333333333333334, + "Malay,English,Vietnamese,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Chinese": 0.02, + "Malay,English,Spanish,Indonesian": 0.006666666666666667, + "Malay,English,Spanish,Filipino": 0.013333333333333334, + "Malay,English,Spanish,Chinese": 0.013333333333333334, + "Malay,English,Indonesian,Filipino": 0.006666666666666667, + "Malay,English,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Chinese": 0.0, + "Malay,Vietnamese,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Indonesian,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.02, + "English,Vietnamese,Spanish,Chinese": 0.02666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.02, + "English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "English,Spanish,Indonesian,Filipino": 0.02666666666666667, + "English,Spanish,Indonesian,Chinese": 0.013333333333333334, + "English,Spanish,Filipino,Chinese": 0.013333333333333334, + "English,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "English,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.0, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.0, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.24000626561442223, + "AC3_3": 0.10430809395903279, + "AC3_4": 0.030858048436336494, + "AC3_5": 0.0056422569002718736, + "AC3_6": 0.0, + "AC3_7": 0.0 + }, + "prompt_5": { + "overall_acc": 0.24476190476190476, + "language_acc": { + "Malay": 0.24666666666666667, + "English": 0.26666666666666666, + "Vietnamese": 0.21333333333333335, + "Spanish": 0.24, + "Indonesian": 0.25333333333333335, + "Filipino": 0.24, + "Chinese": 0.25333333333333335 + }, + "consistency_score_2": 0.25746031746031744, + "consistency_score_3": 0.07009523809523809, + "consistency_score_4": 0.020952380952380948, + "consistency_score_5": 0.006349206349206349, + "consistency_score_6": 0.001904761904761905, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.30666666666666664, + "Malay,Vietnamese": 0.23333333333333334, + "Malay,Spanish": 0.22, + "Malay,Indonesian": 0.24666666666666667, + "Malay,Filipino": 0.25333333333333335, + "Malay,Chinese": 0.29333333333333333, + "English,Vietnamese": 0.2866666666666667, + "English,Spanish": 0.2866666666666667, + "English,Indonesian": 0.2866666666666667, + "English,Filipino": 0.24, + "English,Chinese": 0.29333333333333333, + "Vietnamese,Spanish": 0.26666666666666666, + "Vietnamese,Indonesian": 0.19333333333333333, + "Vietnamese,Filipino": 0.24666666666666667, + "Vietnamese,Chinese": 0.20666666666666667, + "Spanish,Indonesian": 0.28, + "Spanish,Filipino": 0.2, + "Spanish,Chinese": 0.30666666666666664, + "Indonesian,Filipino": 0.19333333333333333, + "Indonesian,Chinese": 0.29333333333333333, + "Filipino,Chinese": 0.2733333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.08, + "Malay,English,Spanish": 0.08, + "Malay,English,Indonesian": 0.1, + "Malay,English,Filipino": 0.06666666666666667, + "Malay,English,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish": 0.04, + "Malay,Vietnamese,Indonesian": 0.06, + "Malay,Vietnamese,Filipino": 0.06666666666666667, + "Malay,Vietnamese,Chinese": 0.06, + "Malay,Spanish,Indonesian": 0.06666666666666667, + "Malay,Spanish,Filipino": 0.04, + "Malay,Spanish,Chinese": 0.08, + "Malay,Indonesian,Filipino": 0.04, + "Malay,Indonesian,Chinese": 0.07333333333333333, + "Malay,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish": 0.09333333333333334, + "English,Vietnamese,Indonesian": 0.08, + "English,Vietnamese,Filipino": 0.08666666666666667, + "English,Vietnamese,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian": 0.08666666666666667, + "English,Spanish,Filipino": 0.06, + "English,Spanish,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino": 0.03333333333333333, + "English,Indonesian,Chinese": 0.07333333333333333, + "English,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "Vietnamese,Spanish,Filipino": 0.04666666666666667, + "Vietnamese,Spanish,Chinese": 0.06666666666666667, + "Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Vietnamese,Indonesian,Chinese": 0.04666666666666667, + "Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino": 0.03333333333333333, + "Spanish,Indonesian,Chinese": 0.12666666666666668, + "Spanish,Filipino,Chinese": 0.08, + "Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian": 0.03333333333333333, + "Malay,English,Spanish,Filipino": 0.013333333333333334, + "Malay,English,Spanish,Chinese": 0.03333333333333333, + "Malay,English,Indonesian,Filipino": 0.006666666666666667, + "Malay,English,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Indonesian,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino": 0.0, + "Malay,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.013333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino": 0.02, + "English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino": 0.006666666666666667, + "English,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Spanish,Filipino,Chinese": 0.02666666666666667, + "English,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.0, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.0, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.0, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.2509505748702655, + "AC3_3": 0.10898049718544896, + "AC3_4": 0.038600443747207024, + "AC3_5": 0.012377340311728605, + "AC3_6": 0.003780106635717019, + "AC3_7": 0.0 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2637987012987013, + "language_acc": { + "English": 0.3181818181818182, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.22727272727272727, + "Indonesian": 0.3125, + "Filipino": 0.21022727272727273, + "Spanish": 0.2727272727272727, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.2521645021645022, + "consistency_score_3": 0.062012987012987, + "consistency_score_4": 0.014123376623376616, + "consistency_score_5": 0.002976190476190476, + "consistency_score_6": 0.0008116883116883117, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.19886363636363635, + "English,Chinese": 0.21022727272727273, + "English,Indonesian": 0.2784090909090909, + "English,Filipino": 0.19318181818181818, + "English,Spanish": 0.24431818181818182, + "English,Malay": 0.2727272727272727, + "Vietnamese,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian": 0.2840909090909091, + "Vietnamese,Filipino": 0.2840909090909091, + "Vietnamese,Spanish": 0.25, + "Vietnamese,Malay": 0.19886363636363635, + "Chinese,Indonesian": 0.19318181818181818, + "Chinese,Filipino": 0.30113636363636365, + "Chinese,Spanish": 0.2897727272727273, + "Chinese,Malay": 0.2897727272727273, + "Indonesian,Filipino": 0.2727272727272727, + "Indonesian,Spanish": 0.19886363636363635, + "Indonesian,Malay": 0.2897727272727273, + "Filipino,Spanish": 0.2840909090909091, + "Filipino,Malay": 0.2727272727272727, + "Spanish,Malay": 0.2727272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.03409090909090909, + "English,Vietnamese,Indonesian": 0.0625, + "English,Vietnamese,Filipino": 0.045454545454545456, + "English,Vietnamese,Spanish": 0.0625, + "English,Vietnamese,Malay": 0.045454545454545456, + "English,Chinese,Indonesian": 0.022727272727272728, + "English,Chinese,Filipino": 0.022727272727272728, + "English,Chinese,Spanish": 0.05113636363636364, + "English,Chinese,Malay": 0.07386363636363637, + "English,Indonesian,Filipino": 0.056818181818181816, + "English,Indonesian,Spanish": 0.0625, + "English,Indonesian,Malay": 0.09659090909090909, + "English,Filipino,Spanish": 0.028409090909090908, + "English,Filipino,Malay": 0.07386363636363637, + "English,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "Vietnamese,Chinese,Filipino": 0.09090909090909091, + "Vietnamese,Chinese,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Indonesian,Malay": 0.045454545454545456, + "Vietnamese,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Spanish,Malay": 0.045454545454545456, + "Chinese,Indonesian,Filipino": 0.07386363636363637, + "Chinese,Indonesian,Spanish": 0.028409090909090908, + "Chinese,Indonesian,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish": 0.09659090909090909, + "Chinese,Filipino,Malay": 0.09090909090909091, + "Chinese,Spanish,Malay": 0.08522727272727272, + "Indonesian,Filipino,Spanish": 0.07954545454545454, + "Indonesian,Filipino,Malay": 0.09659090909090909, + "Indonesian,Spanish,Malay": 0.045454545454545456, + "Filipino,Spanish,Malay": 0.07954545454545454 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino": 0.0, + "English,Vietnamese,Chinese,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.011363636363636364, + "English,Vietnamese,Indonesian,Spanish": 0.017045454545454544, + "English,Vietnamese,Indonesian,Malay": 0.017045454545454544, + "English,Vietnamese,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino": 0.0, + "English,Chinese,Indonesian,Spanish": 0.0, + "English,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Chinese,Filipino,Malay": 0.017045454545454544, + "English,Chinese,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Filipino,Spanish,Malay": 0.022727272727272728, + "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.25785043478000924, + "AC3_3": 0.10041963514217701, + "AC3_4": 0.026811316594309718, + "AC3_5": 0.005885975077480702, + "AC3_6": 0.001618396939871215, + "AC3_7": 0.0 + }, + "prompt_2": { + "overall_acc": 0.2386363636363636, + "language_acc": { + "English": 0.23863636363636365, + "Vietnamese": 0.23295454545454544, + "Chinese": 0.23295454545454544, + "Indonesian": 0.24431818181818182, + "Filipino": 0.25, + "Spanish": 0.2159090909090909, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.25162337662337664, + "consistency_score_3": 0.06655844155844157, + "consistency_score_4": 0.018506493506493502, + "consistency_score_5": 0.004599567099567099, + "consistency_score_6": 0.0008116883116883117, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.23863636363636365, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.2556818181818182, + "English,Filipino": 0.2556818181818182, + "English,Spanish": 0.2727272727272727, + "English,Malay": 0.22727272727272727, + "Vietnamese,Chinese": 0.26704545454545453, + "Vietnamese,Indonesian": 0.24431818181818182, + "Vietnamese,Filipino": 0.23295454545454544, + "Vietnamese,Spanish": 0.23295454545454544, + "Vietnamese,Malay": 0.2215909090909091, + "Chinese,Indonesian": 0.3068181818181818, + "Chinese,Filipino": 0.26136363636363635, + "Chinese,Spanish": 0.26136363636363635, + "Chinese,Malay": 0.2556818181818182, + "Indonesian,Filipino": 0.29545454545454547, + "Indonesian,Spanish": 0.24431818181818182, + "Indonesian,Malay": 0.29545454545454547, + "Filipino,Spanish": 0.18181818181818182, + "Filipino,Malay": 0.21022727272727273, + "Spanish,Malay": 0.2215909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.09090909090909091, + "English,Vietnamese,Indonesian": 0.056818181818181816, + "English,Vietnamese,Filipino": 0.0625, + "English,Vietnamese,Spanish": 0.0625, + "English,Vietnamese,Malay": 0.03977272727272727, + "English,Chinese,Indonesian": 0.08522727272727272, + "English,Chinese,Filipino": 0.09090909090909091, + "English,Chinese,Spanish": 0.09090909090909091, + "English,Chinese,Malay": 0.07954545454545454, + "English,Indonesian,Filipino": 0.08522727272727272, + "English,Indonesian,Spanish": 0.0625, + "English,Indonesian,Malay": 0.0625, + "English,Filipino,Spanish": 0.05113636363636364, + "English,Filipino,Malay": 0.056818181818181816, + "English,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian": 0.07954545454545454, + "Vietnamese,Chinese,Filipino": 0.07386363636363637, + "Vietnamese,Chinese,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish": 0.0625, + "Vietnamese,Indonesian,Malay": 0.07954545454545454, + "Vietnamese,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino": 0.10227272727272728, + "Chinese,Indonesian,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Malay": 0.11363636363636363, + "Chinese,Filipino,Spanish": 0.03977272727272727, + "Chinese,Filipino,Malay": 0.0625, + "Chinese,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish": 0.06818181818181818, + "Indonesian,Filipino,Malay": 0.11363636363636363, + "Indonesian,Spanish,Malay": 0.05113636363636364, + "Filipino,Spanish,Malay": 0.03409090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino": 0.045454545454545456, + "English,Vietnamese,Chinese,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.017045454545454544, + "English,Vietnamese,Indonesian,Malay": 0.005681818181818182, + "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Indonesian,Spanish,Malay": 0.0, + "English,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.0, + "Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.24495785666285866, + "AC3_3": 0.10408607346686233, + "AC3_4": 0.03434917354036099, + "AC3_5": 0.00902517948865479, + "AC3_6": 0.001617873651096289, + "AC3_7": 0.0 + }, + "prompt_3": { + "overall_acc": 0.25405844155844154, + "language_acc": { + "English": 0.25, + "Vietnamese": 0.3181818181818182, + "Chinese": 0.22727272727272727, + "Indonesian": 0.29545454545454547, + "Filipino": 0.2556818181818182, + "Spanish": 0.18181818181818182, + "Malay": 0.25 + }, + "consistency_score_2": 0.2527056277056277, + "consistency_score_3": 0.06493506493506493, + "consistency_score_4": 0.017045454545454537, + "consistency_score_5": 0.00514069264069264, + "consistency_score_6": 0.0016233766233766235, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2840909090909091, + "English,Chinese": 0.2784090909090909, + "English,Indonesian": 0.22727272727272727, + "English,Filipino": 0.23863636363636365, + "English,Spanish": 0.23863636363636365, + "English,Malay": 0.2897727272727273, + "Vietnamese,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian": 0.2215909090909091, + "Vietnamese,Filipino": 0.2784090909090909, + "Vietnamese,Spanish": 0.20454545454545456, + "Vietnamese,Malay": 0.2215909090909091, + "Chinese,Indonesian": 0.22727272727272727, + "Chinese,Filipino": 0.3125, + "Chinese,Spanish": 0.2897727272727273, + "Chinese,Malay": 0.2215909090909091, + "Indonesian,Filipino": 0.1590909090909091, + "Indonesian,Spanish": 0.3181818181818182, + "Indonesian,Malay": 0.2556818181818182, + "Filipino,Spanish": 0.24431818181818182, + "Filipino,Malay": 0.30113636363636365, + "Spanish,Malay": 0.2556818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.06818181818181818, + "English,Vietnamese,Indonesian": 0.05113636363636364, + "English,Vietnamese,Filipino": 0.08522727272727272, + "English,Vietnamese,Spanish": 0.045454545454545456, + "English,Vietnamese,Malay": 0.07954545454545454, + "English,Chinese,Indonesian": 0.05113636363636364, + "English,Chinese,Filipino": 0.07386363636363637, + "English,Chinese,Spanish": 0.08522727272727272, + "English,Chinese,Malay": 0.07386363636363637, + "English,Indonesian,Filipino": 0.028409090909090908, + "English,Indonesian,Spanish": 0.06818181818181818, + "English,Indonesian,Malay": 0.045454545454545456, + "English,Filipino,Spanish": 0.045454545454545456, + "English,Filipino,Malay": 0.09659090909090909, + "English,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian": 0.05113636363636364, + "Vietnamese,Chinese,Filipino": 0.07386363636363637, + "Vietnamese,Chinese,Spanish": 0.0625, + "Vietnamese,Chinese,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Indonesian,Malay": 0.03409090909090909, + "Vietnamese,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Spanish,Malay": 0.05113636363636364, + "Chinese,Indonesian,Filipino": 0.056818181818181816, + "Chinese,Indonesian,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish": 0.11363636363636363, + "Chinese,Filipino,Malay": 0.08522727272727272, + "Chinese,Spanish,Malay": 0.06818181818181818, + "Indonesian,Filipino,Spanish": 0.07954545454545454, + "Indonesian,Filipino,Malay": 0.05113636363636364, + "Indonesian,Spanish,Malay": 0.07386363636363637, + "Filipino,Spanish,Malay": 0.0625 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Indonesian,Spanish": 0.005681818181818182, + "English,Vietnamese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Spanish,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Filipino": 0.005681818181818182, + "English,Chinese,Indonesian,Spanish": 0.011363636363636364, + "English,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Indonesian,Filipino,Malay": 0.0, + "English,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.25338022890417483, + "AC3_3": 0.10343346217966323, + "AC3_4": 0.03194746868718147, + "AC3_5": 0.010077474713461527, + "AC3_6": 0.003226138939162876, + "AC3_7": 0.0 + }, + "prompt_4": { + "overall_acc": 0.2564935064935065, + "language_acc": { + "English": 0.2784090909090909, + "Vietnamese": 0.24431818181818182, + "Chinese": 0.21022727272727273, + "Indonesian": 0.2897727272727273, + "Filipino": 0.23295454545454544, + "Spanish": 0.26704545454545453, + "Malay": 0.2727272727272727 + }, + "consistency_score_2": 0.24134199134199136, + "consistency_score_3": 0.057142857142857134, + "consistency_score_4": 0.013474025974025966, + "consistency_score_5": 0.003246753246753247, + "consistency_score_6": 0.0, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.24431818181818182, + "English,Chinese": 0.29545454545454547, + "English,Indonesian": 0.16477272727272727, + "English,Filipino": 0.26136363636363635, + "English,Spanish": 0.2556818181818182, + "English,Malay": 0.23295454545454544, + "Vietnamese,Chinese": 0.26136363636363635, + "Vietnamese,Indonesian": 0.2556818181818182, + "Vietnamese,Filipino": 0.23863636363636365, + "Vietnamese,Spanish": 0.1875, + "Vietnamese,Malay": 0.29545454545454547, + "Chinese,Indonesian": 0.25, + "Chinese,Filipino": 0.1875, + "Chinese,Spanish": 0.19318181818181818, + "Chinese,Malay": 0.24431818181818182, + "Indonesian,Filipino": 0.2159090909090909, + "Indonesian,Spanish": 0.30113636363636365, + "Indonesian,Malay": 0.24431818181818182, + "Filipino,Spanish": 0.2840909090909091, + "Filipino,Malay": 0.23295454545454544, + "Spanish,Malay": 0.2215909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.09090909090909091, + "English,Vietnamese,Indonesian": 0.03409090909090909, + "English,Vietnamese,Filipino": 0.056818181818181816, + "English,Vietnamese,Spanish": 0.05113636363636364, + "English,Vietnamese,Malay": 0.056818181818181816, + "English,Chinese,Indonesian": 0.07386363636363637, + "English,Chinese,Filipino": 0.06818181818181818, + "English,Chinese,Spanish": 0.07386363636363637, + "English,Chinese,Malay": 0.05113636363636364, + "English,Indonesian,Filipino": 0.0625, + "English,Indonesian,Spanish": 0.045454545454545456, + "English,Indonesian,Malay": 0.045454545454545456, + "English,Filipino,Spanish": 0.06818181818181818, + "English,Filipino,Malay": 0.05113636363636364, + "English,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian": 0.056818181818181816, + "Vietnamese,Chinese,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish": 0.07386363636363637, + "Vietnamese,Indonesian,Malay": 0.09090909090909091, + "Vietnamese,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Spanish,Malay": 0.045454545454545456, + "Chinese,Indonesian,Filipino": 0.056818181818181816, + "Chinese,Indonesian,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Malay": 0.05113636363636364, + "Chinese,Filipino,Spanish": 0.03977272727272727, + "Chinese,Filipino,Malay": 0.05113636363636364, + "Chinese,Spanish,Malay": 0.028409090909090908, + "Indonesian,Filipino,Spanish": 0.056818181818181816, + "Indonesian,Filipino,Malay": 0.03977272727272727, + "Indonesian,Spanish,Malay": 0.0625, + "Filipino,Spanish,Malay": 0.045454545454545456 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.005681818181818182, + "English,Vietnamese,Indonesian,Malay": 0.017045454545454544, + "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish": 0.011363636363636364, + "English,Chinese,Indonesian,Malay": 0.017045454545454544, + "English,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Chinese,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.24868718233288087, + "AC3_3": 0.09346347231565996, + "AC3_4": 0.025603080013319637, + "AC3_5": 0.006412337659868914, + "AC3_6": 0.0, + "AC3_7": 0.0 + }, + "prompt_5": { + "overall_acc": 0.24918831168831165, + "language_acc": { + "English": 0.20454545454545456, + "Vietnamese": 0.24431818181818182, + "Chinese": 0.23863636363636365, + "Indonesian": 0.22727272727272727, + "Filipino": 0.2784090909090909, + "Spanish": 0.2897727272727273, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.25027056277056275, + "consistency_score_3": 0.061363636363636356, + "consistency_score_4": 0.014610389610389603, + "consistency_score_5": 0.0024350649350649354, + "consistency_score_6": 0.0, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.20454545454545456, + "English,Chinese": 0.3409090909090909, + "English,Indonesian": 0.23863636363636365, + "English,Filipino": 0.23295454545454544, + "English,Spanish": 0.2556818181818182, + "English,Malay": 0.2556818181818182, + "Vietnamese,Chinese": 0.2840909090909091, + "Vietnamese,Indonesian": 0.24431818181818182, + "Vietnamese,Filipino": 0.21022727272727273, + "Vietnamese,Spanish": 0.20454545454545456, + "Vietnamese,Malay": 0.23863636363636365, + "Chinese,Indonesian": 0.2784090909090909, + "Chinese,Filipino": 0.18181818181818182, + "Chinese,Spanish": 0.22727272727272727, + "Chinese,Malay": 0.26136363636363635, + "Indonesian,Filipino": 0.2215909090909091, + "Indonesian,Spanish": 0.2784090909090909, + "Indonesian,Malay": 0.2897727272727273, + "Filipino,Spanish": 0.24431818181818182, + "Filipino,Malay": 0.3125, + "Spanish,Malay": 0.25 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.07954545454545454, + "English,Vietnamese,Indonesian": 0.03409090909090909, + "English,Vietnamese,Filipino": 0.05113636363636364, + "English,Vietnamese,Spanish": 0.045454545454545456, + "English,Vietnamese,Malay": 0.056818181818181816, + "English,Chinese,Indonesian": 0.10795454545454546, + "English,Chinese,Filipino": 0.05113636363636364, + "English,Chinese,Spanish": 0.09659090909090909, + "English,Chinese,Malay": 0.07954545454545454, + "English,Indonesian,Filipino": 0.045454545454545456, + "English,Indonesian,Spanish": 0.045454545454545456, + "English,Indonesian,Malay": 0.07954545454545454, + "English,Filipino,Spanish": 0.0625, + "English,Filipino,Malay": 0.0625, + "English,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian": 0.07386363636363637, + "Vietnamese,Chinese,Filipino": 0.0625, + "Vietnamese,Chinese,Spanish": 0.0625, + "Vietnamese,Chinese,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Malay": 0.056818181818181816, + "Vietnamese,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Spanish,Malay": 0.05113636363636364, + "Chinese,Indonesian,Filipino": 0.028409090909090908, + "Chinese,Indonesian,Spanish": 0.06818181818181818, + "Chinese,Indonesian,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish": 0.03409090909090909, + "Chinese,Filipino,Malay": 0.06818181818181818, + "Chinese,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish": 0.06818181818181818, + "Indonesian,Filipino,Malay": 0.07954545454545454, + "Indonesian,Spanish,Malay": 0.07954545454545454, + "Filipino,Spanish,Malay": 0.06818181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Indonesian,Spanish": 0.011363636363636364, + "English,Vietnamese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino": 0.005681818181818182, + "English,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Chinese,Indonesian,Malay": 0.022727272727272728, + "English,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Chinese,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.0, + "Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, + "Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.24972826464305364, + "AC3_3": 0.09847692816293874, + "AC3_4": 0.027602397591934155, + "AC3_5": 0.004822999579147345, + "AC3_6": 0.0, + "AC3_7": 0.0 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2912621359223301 + }, + "prompt_2": { + "accuracy": 0.24271844660194175 + }, + "prompt_3": { + "accuracy": 0.21359223300970873 + }, + "prompt_4": { + "accuracy": 0.24271844660194175 + }, + "prompt_5": { + "accuracy": 0.23300970873786409 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24761904761904763 + }, + "prompt_2": { + "accuracy": 0.22857142857142856 + }, + "prompt_3": { + "accuracy": 0.23809523809523808 + }, + "prompt_4": { + "accuracy": 0.24761904761904763 + }, + "prompt_5": { + "accuracy": 0.20952380952380953 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3364485981308411 + }, + "prompt_2": { + "accuracy": 0.205607476635514 + }, + "prompt_3": { + "accuracy": 0.21495327102803738 + }, + "prompt_4": { + "accuracy": 0.24299065420560748 + }, + "prompt_5": { + "accuracy": 0.2616822429906542 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.2, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.5, + "film": 0.4, + "law": 0.1, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.2, + "culture": 0.5, + "film": 0.3, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.1, + "demographics": 0.4, + "biology": 0.6, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.4, + "culture": 0.4, + "film": 0.3, + "law": 0.2, + "geography": 0.1 + } + }, + "prompt_4": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.4666666666666667, + "literature": 0.1, + "politics": 0.3, + "culture": 0.3, + "film": 0.4, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.23, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.3, + "culture": 0.3, + "film": 0.2, + "law": 0.2, + "geography": 0.2 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.008508397135511301 + }, + "prompt_2": { + "bleu_score": 0.008508397135511301 + }, + "prompt_3": { + "bleu_score": 0.008508397135511301 + }, + "prompt_4": { + "bleu_score": 0.008508397135511301 + }, + "prompt_5": { + "bleu_score": 0.008508397135511301 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23285933640429934, + "category_acc": { + "History": 0.18875502008032127, + "Geography": 0.19591836734693877, + "Lampungic": 0.2789115646258503, + "Social science": 0.25041736227045075, + "Balinese": 0.2484076433121019, + "Makassarese": 0.25268817204301075, + "Banjarese": 0.2777777777777778, + "Chemistry": 0.1854014598540146, + "Biology": 0.21420118343195266, + "Science": 0.26006191950464397, + "Christian religion": 0.17412935323383086, + "Art": 0.26455906821963393, + "Islam religion": 0.22190611664295876, + "Hindu religion": 0.26666666666666666, + "Madurese": 0.20677966101694914, + "Sport": 0.23648648648648649, + "Indonesian language": 0.2328767123287671, + "Physics": 0.24444444444444444, + "Minangkabau culture": 0.24120603015075376, + "Dayak language": 0.29357798165137616, + "Sociology": 0.21774193548387097, + "Economy": 0.2069672131147541, + "Sundanese": 0.25324114088159033, + "Javanese": 0.24092741935483872, + "Civic education": 0.23891273247496422 + } + }, + "prompt_2": { + "accuracy": 0.23192469457240136, + "category_acc": { + "History": 0.2289156626506024, + "Geography": 0.22448979591836735, + "Lampungic": 0.2925170068027211, + "Social science": 0.3021702838063439, + "Balinese": 0.25902335456475584, + "Makassarese": 0.23655913978494625, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.19124087591240876, + "Biology": 0.2106508875739645, + "Science": 0.22703818369453044, + "Christian religion": 0.2537313432835821, + "Art": 0.2113144758735441, + "Islam religion": 0.2119487908961593, + "Hindu religion": 0.25333333333333335, + "Madurese": 0.23389830508474577, + "Sport": 0.3108108108108108, + "Indonesian language": 0.22789539227895392, + "Physics": 0.26262626262626265, + "Minangkabau culture": 0.21105527638190955, + "Dayak language": 0.2018348623853211, + "Sociology": 0.1935483870967742, + "Economy": 0.19057377049180327, + "Sundanese": 0.2610198789974071, + "Javanese": 0.22580645161290322, + "Civic education": 0.2446351931330472 + } + }, + "prompt_3": { + "accuracy": 0.2318579344415515, + "category_acc": { + "History": 0.19678714859437751, + "Geography": 0.20204081632653062, + "Lampungic": 0.2857142857142857, + "Social science": 0.25041736227045075, + "Balinese": 0.25477707006369427, + "Makassarese": 0.25806451612903225, + "Banjarese": 0.2638888888888889, + "Chemistry": 0.1927007299270073, + "Biology": 0.19171597633136095, + "Science": 0.2528379772961816, + "Christian religion": 0.18407960199004975, + "Art": 0.21797004991680533, + "Islam religion": 0.22475106685633, + "Hindu religion": 0.26666666666666666, + "Madurese": 0.2745762711864407, + "Sport": 0.22297297297297297, + "Indonesian language": 0.24190535491905354, + "Physics": 0.21616161616161617, + "Minangkabau culture": 0.2663316582914573, + "Dayak language": 0.25688073394495414, + "Sociology": 0.22782258064516128, + "Economy": 0.1864754098360656, + "Sundanese": 0.25064822817631804, + "Javanese": 0.23387096774193547, + "Civic education": 0.24034334763948498 + } + }, + "prompt_4": { + "accuracy": 0.2347953801989452, + "category_acc": { + "History": 0.1646586345381526, + "Geography": 0.17755102040816326, + "Lampungic": 0.2789115646258503, + "Social science": 0.2988313856427379, + "Balinese": 0.25902335456475584, + "Makassarese": 0.25268817204301075, + "Banjarese": 0.20833333333333334, + "Chemistry": 0.2029197080291971, + "Biology": 0.19053254437869824, + "Science": 0.2518059855521156, + "Christian religion": 0.23383084577114427, + "Art": 0.23793677204658903, + "Islam religion": 0.2460881934566145, + "Hindu religion": 0.16, + "Madurese": 0.23389830508474577, + "Sport": 0.23648648648648649, + "Indonesian language": 0.25, + "Physics": 0.19595959595959597, + "Minangkabau culture": 0.20100502512562815, + "Dayak language": 0.3211009174311927, + "Sociology": 0.1814516129032258, + "Economy": 0.26024590163934425, + "Sundanese": 0.26534140017286084, + "Javanese": 0.23387096774193547, + "Civic education": 0.23319027181688126 + } + }, + "prompt_5": { + "accuracy": 0.23459509980639562, + "category_acc": { + "History": 0.20481927710843373, + "Geography": 0.19591836734693877, + "Lampungic": 0.2653061224489796, + "Social science": 0.27545909849749584, + "Balinese": 0.2208067940552017, + "Makassarese": 0.25806451612903225, + "Banjarese": 0.2847222222222222, + "Chemistry": 0.19854014598540146, + "Biology": 0.2165680473372781, + "Science": 0.239422084623323, + "Christian religion": 0.19900497512437812, + "Art": 0.27454242928452577, + "Islam religion": 0.24182076813655762, + "Hindu religion": 0.16666666666666666, + "Madurese": 0.29491525423728815, + "Sport": 0.21621621621621623, + "Indonesian language": 0.23567870485678705, + "Physics": 0.21212121212121213, + "Minangkabau culture": 0.24120603015075376, + "Dayak language": 0.22018348623853212, + "Sociology": 0.21169354838709678, + "Economy": 0.1885245901639344, + "Sundanese": 0.2644770959377701, + "Javanese": 0.24495967741935484, + "Civic education": 0.24177396280400573 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.008557988557763307 + }, + "prompt_2": { + "bleu_score": 0.008557988557763307 + }, + "prompt_3": { + "bleu_score": 0.008557988557763307 + }, + "prompt_4": { + "bleu_score": 0.008557988557763307 + }, + "prompt_5": { + "bleu_score": 0.008557988557763307 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.008557988557763307 + }, + "prompt_2": { + "bleu_score": 0.008557988557763307 + }, + "prompt_3": { + "bleu_score": 0.008557988557763307 + }, + "prompt_4": { + "bleu_score": 0.008557988557763307 + }, + "prompt_5": { + "bleu_score": 0.008557988557763307 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.008557988557763307 + }, + "prompt_2": { + "bleu_score": 0.008557988557763307 + }, + "prompt_3": { + "bleu_score": 0.008557988557763307 + }, + "prompt_4": { + "bleu_score": 0.008557988557763307 + }, + "prompt_5": { + "bleu_score": 0.008557988557763307 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.008557988557763307 + }, + "prompt_2": { + "bleu_score": 0.008557988557763307 + }, + "prompt_3": { + "bleu_score": 0.008557988557763307 + }, + "prompt_4": { + "bleu_score": 0.008557988557763307 + }, + "prompt_5": { + "bleu_score": 0.008557988557763307 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.22170361726954493 + }, + "prompt_2": { + "accuracy": 0.2543757292882147 + }, + "prompt_3": { + "accuracy": 0.24504084014002334 + }, + "prompt_4": { + "accuracy": 0.2812135355892649 + }, + "prompt_5": { + "accuracy": 0.24037339556592766 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24948158741508758, + "category_acc": { + "high_school_european_history": 0.25609756097560976, + "business_ethics": 0.26262626262626265, + "clinical_knowledge": 0.23106060606060605, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.26108374384236455, + "high_school_physics": 0.24, + "high_school_world_history": 0.2457627118644068, + "virology": 0.17575757575757575, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.2459546925566343, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.2277580071174377, + "philosophy": 0.2032258064516129, + "professional_medicine": 0.23985239852398524, + "nutrition": 0.23278688524590163, + "global_facts": 0.24242424242424243, + "machine_learning": 0.15315315315315314, + "security_studies": 0.23770491803278687, + "public_relations": 0.30275229357798167, + "professional_psychology": 0.23240589198036007, + "prehistory": 0.24458204334365324, + "anatomy": 0.30597014925373134, + "human_sexuality": 0.2923076923076923, + "college_medicine": 0.27906976744186046, + "high_school_government_and_politics": 0.28125, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.19753086419753085, + "high_school_geography": 0.233502538071066, + "elementary_mathematics": 0.22281167108753316, + "human_aging": 0.2972972972972973, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.24632352941176472, + "formal_logic": 0.296, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.21666666666666667, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.25252525252525254, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.2710997442455243, + "high_school_chemistry": 0.24257425742574257, + "marketing": 0.2446351931330472, + "professional_law": 0.2720156555772994, + "management": 0.23529411764705882, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.2336448598130841, + "world_religions": 0.22941176470588234, + "sociology": 0.205, + "us_foreign_policy": 0.20202020202020202, + "high_school_macroeconomics": 0.2596401028277635, + "computer_security": 0.18181818181818182, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.25217391304347825, + "electrical_engineering": 0.2222222222222222, + "astronomy": 0.24503311258278146, + "college_biology": 0.2937062937062937 + } + }, + "prompt_2": { + "accuracy": 0.25391490883089024, + "category_acc": { + "high_school_european_history": 0.21341463414634146, + "business_ethics": 0.20202020202020202, + "clinical_knowledge": 0.21212121212121213, + "medical_genetics": 0.31313131313131315, + "high_school_us_history": 0.2561576354679803, + "high_school_physics": 0.3333333333333333, + "high_school_world_history": 0.2754237288135593, + "virology": 0.296969696969697, + "high_school_microeconomics": 0.22784810126582278, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.24242424242424243, + "high_school_biology": 0.29449838187702265, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.298932384341637, + "philosophy": 0.22580645161290322, + "professional_medicine": 0.24354243542435425, + "nutrition": 0.25901639344262295, + "global_facts": 0.23232323232323232, + "machine_learning": 0.25225225225225223, + "security_studies": 0.20491803278688525, + "public_relations": 0.13761467889908258, + "professional_psychology": 0.2569558101472995, + "prehistory": 0.21981424148606812, + "anatomy": 0.26119402985074625, + "human_sexuality": 0.27692307692307694, + "college_medicine": 0.2441860465116279, + "high_school_government_and_politics": 0.2552083333333333, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.2345679012345679, + "high_school_geography": 0.26903553299492383, + "elementary_mathematics": 0.22015915119363394, + "human_aging": 0.2972972972972973, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.26286764705882354, + "formal_logic": 0.24, + "high_school_statistics": 0.21395348837209302, + "international_law": 0.3, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.25252525252525254, + "conceptual_physics": 0.2564102564102564, + "miscellaneous": 0.2531969309462916, + "high_school_chemistry": 0.22772277227722773, + "marketing": 0.27467811158798283, + "professional_law": 0.24983692106979777, + "management": 0.2647058823529412, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.19626168224299065, + "world_religions": 0.27647058823529413, + "sociology": 0.285, + "us_foreign_policy": 0.16161616161616163, + "high_school_macroeconomics": 0.2802056555269923, + "computer_security": 0.24242424242424243, + "moral_scenarios": 0.26733780760626397, + "moral_disputes": 0.2898550724637681, + "electrical_engineering": 0.25, + "astronomy": 0.19205298013245034, + "college_biology": 0.2517482517482518 + } + }, + "prompt_3": { + "accuracy": 0.2468358956024312, + "category_acc": { + "high_school_european_history": 0.23170731707317074, + "business_ethics": 0.23232323232323232, + "clinical_knowledge": 0.2765151515151515, + "medical_genetics": 0.24242424242424243, + "high_school_us_history": 0.27586206896551724, + "high_school_physics": 0.19333333333333333, + "high_school_world_history": 0.2754237288135593, + "virology": 0.21818181818181817, + "high_school_microeconomics": 0.24472573839662448, + "econometrics": 0.20353982300884957, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.24919093851132687, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.21708185053380782, + "philosophy": 0.2709677419354839, + "professional_medicine": 0.23616236162361623, + "nutrition": 0.2098360655737705, + "global_facts": 0.21212121212121213, + "machine_learning": 0.2072072072072072, + "security_studies": 0.26229508196721313, + "public_relations": 0.1926605504587156, + "professional_psychology": 0.24877250409165302, + "prehistory": 0.23529411764705882, + "anatomy": 0.21641791044776118, + "human_sexuality": 0.2692307692307692, + "college_medicine": 0.20930232558139536, + "high_school_government_and_politics": 0.2708333333333333, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.25925925925925924, + "high_school_geography": 0.2436548223350254, + "elementary_mathematics": 0.2440318302387268, + "human_aging": 0.22072072072072071, + "college_mathematics": 0.21212121212121213, + "high_school_psychology": 0.28125, + "formal_logic": 0.216, + "high_school_statistics": 0.2651162790697674, + "international_law": 0.2916666666666667, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.2222222222222222, + "conceptual_physics": 0.2777777777777778, + "miscellaneous": 0.23785166240409208, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.2875536480686695, + "professional_law": 0.23939986953685582, + "management": 0.19607843137254902, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.205607476635514, + "world_religions": 0.21764705882352942, + "sociology": 0.245, + "us_foreign_policy": 0.26262626262626265, + "high_school_macroeconomics": 0.20565552699228792, + "computer_security": 0.3333333333333333, + "moral_scenarios": 0.2684563758389262, + "moral_disputes": 0.2492753623188406, + "electrical_engineering": 0.2638888888888889, + "astronomy": 0.271523178807947, + "college_biology": 0.22377622377622378 + } + }, + "prompt_4": { + "accuracy": 0.24733643189131213, + "category_acc": { + "high_school_european_history": 0.24390243902439024, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.23106060606060605, + "medical_genetics": 0.2828282828282828, + "high_school_us_history": 0.1921182266009852, + "high_school_physics": 0.22, + "high_school_world_history": 0.21610169491525424, + "virology": 0.23636363636363636, + "high_school_microeconomics": 0.2320675105485232, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.2815533980582524, + "abstract_algebra": 0.1414141414141414, + "professional_accounting": 0.2491103202846975, + "philosophy": 0.23548387096774193, + "professional_medicine": 0.2915129151291513, + "nutrition": 0.24262295081967214, + "global_facts": 0.25252525252525254, + "machine_learning": 0.22522522522522523, + "security_studies": 0.2336065573770492, + "public_relations": 0.23853211009174313, + "professional_psychology": 0.24549918166939444, + "prehistory": 0.2631578947368421, + "anatomy": 0.2537313432835821, + "human_sexuality": 0.2076923076923077, + "college_medicine": 0.23255813953488372, + "high_school_government_and_politics": 0.21875, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.25925925925925924, + "high_school_geography": 0.25380710659898476, + "elementary_mathematics": 0.21750663129973474, + "human_aging": 0.25225225225225223, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.20220588235294118, + "formal_logic": 0.264, + "high_school_statistics": 0.2930232558139535, + "international_law": 0.30833333333333335, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.25252525252525254, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.2340153452685422, + "high_school_chemistry": 0.2376237623762376, + "marketing": 0.24892703862660945, + "professional_law": 0.2583170254403131, + "management": 0.28431372549019607, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.29906542056074764, + "world_religions": 0.24705882352941178, + "sociology": 0.21, + "us_foreign_policy": 0.31313131313131315, + "high_school_macroeconomics": 0.3110539845758355, + "computer_security": 0.15151515151515152, + "moral_scenarios": 0.22371364653243847, + "moral_disputes": 0.2753623188405797, + "electrical_engineering": 0.2847222222222222, + "astronomy": 0.24503311258278146, + "college_biology": 0.22377622377622378 + } + }, + "prompt_5": { + "accuracy": 0.2493385770468359, + "category_acc": { + "high_school_european_history": 0.24390243902439024, + "business_ethics": 0.31313131313131315, + "clinical_knowledge": 0.20833333333333334, + "medical_genetics": 0.30303030303030304, + "high_school_us_history": 0.2561576354679803, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.19915254237288135, + "virology": 0.23030303030303031, + "high_school_microeconomics": 0.18565400843881857, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.2222222222222222, + "high_school_biology": 0.2750809061488673, + "abstract_algebra": 0.20202020202020202, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.24193548387096775, + "professional_medicine": 0.25830258302583026, + "nutrition": 0.2786885245901639, + "global_facts": 0.2828282828282828, + "machine_learning": 0.1891891891891892, + "security_studies": 0.2786885245901639, + "public_relations": 0.22018348623853212, + "professional_psychology": 0.265139116202946, + "prehistory": 0.2724458204334365, + "anatomy": 0.26865671641791045, + "human_sexuality": 0.25384615384615383, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.2708333333333333, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.1728395061728395, + "high_school_geography": 0.2639593908629442, + "elementary_mathematics": 0.23607427055702918, + "human_aging": 0.25675675675675674, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.2536764705882353, + "formal_logic": 0.264, + "high_school_statistics": 0.21395348837209302, + "international_law": 0.25, + "high_school_mathematics": 0.20817843866171004, + "high_school_computer_science": 0.23232323232323232, + "conceptual_physics": 0.26495726495726496, + "miscellaneous": 0.25191815856777494, + "high_school_chemistry": 0.1782178217821782, + "marketing": 0.27896995708154504, + "professional_law": 0.2622309197651663, + "management": 0.29411764705882354, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.2336448598130841, + "world_religions": 0.2411764705882353, + "sociology": 0.225, + "us_foreign_policy": 0.2828282828282828, + "high_school_macroeconomics": 0.2159383033419023, + "computer_security": 0.2727272727272727, + "moral_scenarios": 0.22483221476510068, + "moral_disputes": 0.2492753623188406, + "electrical_engineering": 0.25, + "astronomy": 0.2119205298013245, + "college_biology": 0.23776223776223776 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26151560178306094 + }, + "prompt_2": { + "accuracy": 0.2161961367013373 + }, + "prompt_3": { + "accuracy": 0.26820208023774145 + }, + "prompt_4": { + "accuracy": 0.24145616641901932 + }, + "prompt_5": { + "accuracy": 0.25037147102526003 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24408468244084683, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.23809523809523808, + "college_physics": 0.125, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.21666666666666667, + "business_administration": 0.34210526315789475, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.11764705882352941, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.3333333333333333, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.17647058823529413, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.12, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.19230769230769232, + "sports_science": 0.20833333333333334, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.2777777777777778, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.1388888888888889, + "tax_accountant": 0.37037037037037035, + "physician": 0.3148148148148148 + } + }, + "prompt_2": { + "accuracy": 0.25280199252801994, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.30952380952380953, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.125, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.26666666666666666, + "business_administration": 0.2631578947368421, + "marxism": 0.25, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.4489795918367347, + "high_school_politics": 0.25, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.25925925925925924, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.125, + "high_school_history": 0.12, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.15384615384615385, + "sports_science": 0.3333333333333333, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.12962962962962962, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.18518518518518517, + "physician": 0.2037037037037037 + } + }, + "prompt_3": { + "accuracy": 0.24968866749688667, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.35714285714285715, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.125, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.041666666666666664, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.18333333333333332, + "business_administration": 0.34210526315789475, + "marxism": 0.08333333333333333, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.25, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.2222222222222222, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.10714285714285714, + "high_school_chinese": 0.25, + "high_school_history": 0.24, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.19230769230769232, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.25, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.35185185185185186, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3148148148148148, + "physician": 0.25925925925925924 + } + }, + "prompt_4": { + "accuracy": 0.24968866749688667, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.125, + "high_school_chemistry": 0.25, + "high_school_biology": 0.08333333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.2833333333333333, + "business_administration": 0.15789473684210525, + "marxism": 0.16666666666666666, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.14705882352941177, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.10714285714285714, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.2222222222222222, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.14705882352941177, + "legal_professional": 0.25, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.24, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.21153846153846154, + "sports_science": 0.3333333333333333, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2037037037037037, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2777777777777778, + "physician": 0.25925925925925924 + } + }, + "prompt_5": { + "accuracy": 0.2546699875466999, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.21428571428571427, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.25, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.10714285714285714, + "college_economics": 0.2, + "business_administration": 0.3157894736842105, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.14814814814814814, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.10714285714285714, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.20588235294117646, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.25, + "high_school_history": 0.24, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.25, + "sports_science": 0.2916666666666667, + "plant_protection": 0.1111111111111111, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.25925925925925924, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2962962962962963, + "physician": 0.24074074074074073 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27598566308243727 + }, + "prompt_2": { + "accuracy": 0.25806451612903225 + }, + "prompt_3": { + "accuracy": 0.2616487455197133 + }, + "prompt_4": { + "accuracy": 0.25089605734767023 + }, + "prompt_5": { + "accuracy": 0.23297491039426524 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.253151441892592, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.27439024390243905, + "arts": 0.25, + "astronomy": 0.23030303030303031, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.2375, + "chinese_driving_rule": 0.20610687022900764, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.21495327102803738, + "chinese_history": 0.22291021671826625, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.2905027932960894, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.19811320754716982, + "college_law": 0.2222222222222222, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.2673992673992674, + "computer_science": 0.2549019607843137, + "computer_security": 0.24561403508771928, + "conceptual_physics": 0.23809523809523808, + "construction_project_management": 0.2517985611510791, + "economics": 0.24528301886792453, + "education": 0.27607361963190186, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.25252525252525254, + "elementary_information_and_technology": 0.24369747899159663, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.21481481481481482, + "food_science": 0.24475524475524477, + "genetics": 0.26704545454545453, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.25, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.21678321678321677, + "human_sexuality": 0.30158730158730157, + "international_law": 0.25405405405405407, + "journalism": 0.2441860465116279, + "jurisprudence": 0.24817518248175183, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.2682926829268293, + "machine_learning": 0.2459016393442623, + "management": 0.23809523809523808, + "marketing": 0.23333333333333334, + "marxist_theory": 0.24338624338624337, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.2482758620689655, + "philosophy": 0.3047619047619048, + "professional_accounting": 0.28, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.21982758620689655, + "public_relations": 0.1896551724137931, + "security_study": 0.34074074074074073, + "sociology": 0.26991150442477874, + "sports_science": 0.21818181818181817, + "traditional_chinese_medicine": 0.1945945945945946, + "virology": 0.23668639053254437, + "world_history": 0.2795031055900621, + "world_religions": 0.2875 + } + }, + "prompt_2": { + "accuracy": 0.24650319461232947, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.25609756097560976, + "arts": 0.23125, + "astronomy": 0.28484848484848485, + "business_ethics": 0.21052631578947367, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.21671826625386997, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.24581005586592178, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.2336448598130841, + "college_engineering_hydrology": 0.1792452830188679, + "college_law": 0.21296296296296297, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.21245421245421245, + "computer_science": 0.23529411764705882, + "computer_security": 0.30994152046783624, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.26618705035971224, + "economics": 0.1761006289308176, + "education": 0.2331288343558282, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.25396825396825395, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.21428571428571427, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.1925925925925926, + "food_science": 0.23076923076923078, + "genetics": 0.2840909090909091, + "global_facts": 0.2080536912751678, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.22033898305084745, + "high_school_mathematics": 0.25, + "high_school_physics": 0.2, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2702702702702703, + "journalism": 0.20930232558139536, + "jurisprudence": 0.2360097323600973, + "legal_and_moral_basis": 0.27102803738317754, + "logical": 0.2682926829268293, + "machine_learning": 0.26229508196721313, + "management": 0.30952380952380953, + "marketing": 0.23333333333333334, + "marxist_theory": 0.24338624338624337, + "modern_chinese": 0.1896551724137931, + "nutrition": 0.2620689655172414, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.2914285714285714, + "professional_law": 0.2037914691943128, + "professional_medicine": 0.22872340425531915, + "professional_psychology": 0.21120689655172414, + "public_relations": 0.25287356321839083, + "security_study": 0.2074074074074074, + "sociology": 0.2610619469026549, + "sports_science": 0.24848484848484848, + "traditional_chinese_medicine": 0.31891891891891894, + "virology": 0.23668639053254437, + "world_history": 0.2732919254658385, + "world_religions": 0.28125 + } + }, + "prompt_3": { + "accuracy": 0.25090657917458126, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.21341463414634146, + "arts": 0.2375, + "astronomy": 0.30303030303030304, + "business_ethics": 0.23923444976076555, + "chinese_civil_service_exam": 0.2125, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.27205882352941174, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.330188679245283, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.2830188679245283, + "college_law": 0.25925925925925924, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.29304029304029305, + "computer_science": 0.28921568627450983, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.17266187050359713, + "economics": 0.23270440251572327, + "education": 0.22085889570552147, + "electrical_engineering": 0.26744186046511625, + "elementary_chinese": 0.24206349206349206, + "elementary_commonsense": 0.25252525252525254, + "elementary_information_and_technology": 0.25630252100840334, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.3037037037037037, + "food_science": 0.23076923076923078, + "genetics": 0.2840909090909091, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.17424242424242425, + "high_school_geography": 0.1864406779661017, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.2867132867132867, + "human_sexuality": 0.25396825396825395, + "international_law": 0.22702702702702704, + "journalism": 0.27325581395348836, + "jurisprudence": 0.24817518248175183, + "legal_and_moral_basis": 0.2523364485981308, + "logical": 0.2032520325203252, + "machine_learning": 0.20491803278688525, + "management": 0.2571428571428571, + "marketing": 0.22777777777777777, + "marxist_theory": 0.25925925925925924, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.1724137931034483, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.2393617021276596, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.3160919540229885, + "security_study": 0.3111111111111111, + "sociology": 0.23893805309734514, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.23783783783783785, + "virology": 0.1952662721893491, + "world_history": 0.2111801242236025, + "world_religions": 0.26875 + } + }, + "prompt_4": { + "accuracy": 0.2540148506302884, + "category_acc": { + "agronomy": 0.21301775147928995, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.25609756097560976, + "arts": 0.24375, + "astronomy": 0.2, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.20588235294117646, + "chinese_foreign_policy": 0.205607476635514, + "chinese_history": 0.26625386996904027, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.24022346368715083, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.32407407407407407, + "college_mathematics": 0.3619047619047619, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.2564102564102564, + "computer_science": 0.2696078431372549, + "computer_security": 0.2807017543859649, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.2446043165467626, + "economics": 0.24528301886792453, + "education": 0.2331288343558282, + "electrical_engineering": 0.28488372093023256, + "elementary_chinese": 0.23015873015873015, + "elementary_commonsense": 0.2878787878787879, + "elementary_information_and_technology": 0.2605042016806723, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.23703703703703705, + "food_science": 0.27972027972027974, + "genetics": 0.24431818181818182, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.2777777777777778, + "international_law": 0.2594594594594595, + "journalism": 0.19767441860465115, + "jurisprudence": 0.22384428223844283, + "legal_and_moral_basis": 0.24766355140186916, + "logical": 0.23577235772357724, + "machine_learning": 0.22131147540983606, + "management": 0.2714285714285714, + "marketing": 0.29444444444444445, + "marxist_theory": 0.2275132275132275, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.31724137931034485, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.21800947867298578, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.21982758620689655, + "public_relations": 0.27011494252873564, + "security_study": 0.2814814814814815, + "sociology": 0.24336283185840707, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.24260355029585798, + "world_history": 0.2919254658385093, + "world_religions": 0.25625 + } + }, + "prompt_5": { + "accuracy": 0.25168364703850804, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.2804878048780488, + "arts": 0.26875, + "astronomy": 0.24242424242424243, + "business_ethics": 0.28708133971291866, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.21495327102803738, + "chinese_history": 0.28173374613003094, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.29608938547486036, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.16037735849056603, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.17592592592592593, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.28205128205128205, + "computer_science": 0.25980392156862747, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.31654676258992803, + "economics": 0.32075471698113206, + "education": 0.26993865030674846, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.21739130434782608, + "ethnology": 0.2518518518518518, + "food_science": 0.26573426573426573, + "genetics": 0.2840909090909091, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.1694915254237288, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.2777777777777778, + "international_law": 0.23243243243243245, + "journalism": 0.27906976744186046, + "jurisprudence": 0.2360097323600973, + "legal_and_moral_basis": 0.20093457943925233, + "logical": 0.35772357723577236, + "machine_learning": 0.26229508196721313, + "management": 0.19523809523809524, + "marketing": 0.2222222222222222, + "marxist_theory": 0.2751322751322751, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.22758620689655173, + "philosophy": 0.19047619047619047, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.27393617021276595, + "professional_psychology": 0.2413793103448276, + "public_relations": 0.2413793103448276, + "security_study": 0.2222222222222222, + "sociology": 0.1902654867256637, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.22485207100591717, + "world_history": 0.2360248447204969, + "world_religions": 0.275 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18181818181818182 + }, + "prompt_2": { + "accuracy": 0.30303030303030304 + }, + "prompt_3": { + "accuracy": 0.30303030303030304 + }, + "prompt_4": { + "accuracy": 0.15151515151515152 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18636363636363637 + }, + "prompt_2": { + "accuracy": 0.15681818181818183 + }, + "prompt_3": { + "accuracy": 0.20909090909090908 + }, + "prompt_4": { + "accuracy": 0.175 + }, + "prompt_5": { + "accuracy": 0.15 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32915254237288133 + }, + "prompt_2": { + "accuracy": 0.32677966101694916 + }, + "prompt_3": { + "accuracy": 0.32915254237288133 + }, + "prompt_4": { + "accuracy": 0.32949152542372884 + }, + "prompt_5": { + "accuracy": 0.3288135593220339 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.275243081525804 + }, + "prompt_2": { + "accuracy": 0.2681376215407629 + }, + "prompt_3": { + "accuracy": 0.27225130890052357 + }, + "prompt_4": { + "accuracy": 0.2943156320119671 + }, + "prompt_5": { + "accuracy": 0.27299925205684367 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3287604115629593 + }, + "prompt_2": { + "accuracy": 0.3282704556589907 + }, + "prompt_3": { + "accuracy": 0.3160215580597746 + }, + "prompt_4": { + "accuracy": 0.3488486036256737 + }, + "prompt_5": { + "accuracy": 0.34051935325820676 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.030176401898066273, + "rouge2": 0.0, + "rougeL": 0.029301949331489687, + "avg_rouge": 0.019826117076518653 + }, + "prompt_2": { + "rouge1": 0.030176401898066273, + "rouge2": 0.0, + "rougeL": 0.029301949331489687, + "avg_rouge": 0.019826117076518653 + }, + "prompt_3": { + "rouge1": 0.030176401898066273, + "rouge2": 0.0, + "rougeL": 0.029301949331489687, + "avg_rouge": 0.019826117076518653 + }, + "prompt_4": { + "rouge1": 0.030176401898066273, + "rouge2": 0.0, + "rougeL": 0.029301949331489687, + "avg_rouge": 0.019826117076518653 + }, + "prompt_5": { + "rouge1": 0.030176401898066273, + "rouge2": 0.0, + "rougeL": 0.029301949331489687, + "avg_rouge": 0.019826117076518653 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.03339865867499272, + "rouge2": 0.0, + "rougeL": 0.03201375559955033, + "avg_rouge": 0.021804138091514352 + }, + "prompt_2": { + "rouge1": 0.03339865867499272, + "rouge2": 0.0, + "rougeL": 0.03201375559955033, + "avg_rouge": 0.021804138091514352 + }, + "prompt_3": { + "rouge1": 0.03339865867499272, + "rouge2": 0.0, + "rougeL": 0.03201375559955033, + "avg_rouge": 0.021804138091514352 + }, + "prompt_4": { + "rouge1": 0.03339865867499272, + "rouge2": 0.0, + "rougeL": 0.03201375559955033, + "avg_rouge": 0.021804138091514352 + }, + "prompt_5": { + "rouge1": 0.03339865867499272, + "rouge2": 0.0, + "rougeL": 0.03201375559955033, + "avg_rouge": 0.021804138091514352 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49770642201834864 + }, + "prompt_2": { + "accuracy": 0.4954128440366973 + }, + "prompt_3": { + "accuracy": 0.5172018348623854 + }, + "prompt_4": { + "accuracy": 0.5034403669724771 + }, + "prompt_5": { + "accuracy": 0.4782110091743119 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48897411313518696 + }, + "prompt_2": { + "accuracy": 0.49760306807286675 + }, + "prompt_3": { + "accuracy": 0.5417066155321189 + }, + "prompt_4": { + "accuracy": 0.5167785234899329 + }, + "prompt_5": { + "accuracy": 0.4937679769894535 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.487 + }, + "prompt_2": { + "accuracy": 0.493 + }, + "prompt_3": { + "accuracy": 0.502 + }, + "prompt_4": { + "accuracy": 0.5005 + }, + "prompt_5": { + "accuracy": 0.4925 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.332 + }, + "prompt_2": { + "accuracy": 0.3415 + }, + "prompt_3": { + "accuracy": 0.327 + }, + "prompt_4": { + "accuracy": 0.324 + }, + "prompt_5": { + "accuracy": 0.3045 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5145 + }, + "prompt_2": { + "accuracy": 0.497 + }, + "prompt_3": { + "accuracy": 0.482 + }, + "prompt_4": { + "accuracy": 0.508 + }, + "prompt_5": { + "accuracy": 0.5015 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.43661971830985913 + }, + "prompt_2": { + "accuracy": 0.49295774647887325 + }, + "prompt_3": { + "accuracy": 0.5070422535211268 + }, + "prompt_4": { + "accuracy": 0.4225352112676056 + }, + "prompt_5": { + "accuracy": 0.4507042253521127 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.592057761732852 + }, + "prompt_2": { + "accuracy": 0.516245487364621 + }, + "prompt_3": { + "accuracy": 0.5090252707581228 + }, + "prompt_4": { + "accuracy": 0.5415162454873647 + }, + "prompt_5": { + "accuracy": 0.4981949458483754 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5171568627450981 + }, + "prompt_2": { + "accuracy": 0.4803921568627451 + }, + "prompt_3": { + "accuracy": 0.47794117647058826 + }, + "prompt_4": { + "accuracy": 0.5147058823529411 + }, + "prompt_5": { + "accuracy": 0.5392156862745098 + } } }, "five_shot": { @@ -342,53 +3357,1733 @@ "model_link": "https://github.com/tatsu-lab/stanford_alpaca", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.34285714285714286, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.5066666666666667, + "Vietnamese": 0.2866666666666667, + "Spanish": 0.4, + "Indonesian": 0.28, + "Filipino": 0.2733333333333333, + "Chinese": 0.32 + }, + "consistency_score_2": 0.46634920634920635, + "consistency_score_3": 0.2775238095238096, + "consistency_score_4": 0.18495238095238092, + "consistency_score_5": 0.1307936507936508, + "consistency_score_6": 0.09428571428571429, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4666666666666667, + "Malay,Vietnamese": 0.38, + "Malay,Spanish": 0.5, + "Malay,Indonesian": 0.58, + "Malay,Filipino": 0.4866666666666667, + "Malay,Chinese": 0.41333333333333333, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.6066666666666667, + "English,Indonesian": 0.46, + "English,Filipino": 0.49333333333333335, + "English,Chinese": 0.5066666666666667, + "Vietnamese,Spanish": 0.31333333333333335, + "Vietnamese,Indonesian": 0.3333333333333333, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.5066666666666667, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.5133333333333333, + "Indonesian,Filipino": 0.4666666666666667, + "Indonesian,Chinese": 0.4666666666666667, + "Filipino,Chinese": 0.5066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22, + "Malay,English,Spanish": 0.35333333333333333, + "Malay,English,Indonesian": 0.3, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish": 0.2, + "Malay,Vietnamese,Indonesian": 0.22, + "Malay,Vietnamese,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Chinese": 0.22, + "Malay,Spanish,Indonesian": 0.35333333333333333, + "Malay,Spanish,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino": 0.31333333333333335, + "Malay,Indonesian,Chinese": 0.28, + "Malay,Filipino,Chinese": 0.28, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Indonesian": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian": 0.36, + "English,Spanish,Filipino": 0.36, + "English,Spanish,Chinese": 0.36, + "English,Indonesian,Filipino": 0.29333333333333333, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino": 0.22, + "Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino": 0.32666666666666666, + "Spanish,Indonesian,Chinese": 0.3, + "Spanish,Filipino,Chinese": 0.3466666666666667, + "Indonesian,Filipino,Chinese": 0.30666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.16, + "Malay,English,Vietnamese,Indonesian": 0.14, + "Malay,English,Vietnamese,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Chinese": 0.16, + "Malay,English,Spanish,Indonesian": 0.24666666666666667, + "Malay,English,Spanish,Filipino": 0.24666666666666667, + "Malay,English,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Indonesian,Chinese": 0.18, + "Malay,English,Filipino,Chinese": 0.2, + "Malay,Vietnamese,Spanish,Indonesian": 0.14, + "Malay,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian,Filipino": 0.24, + "English,Spanish,Indonesian,Chinese": 0.22, + "English,Spanish,Filipino,Chinese": 0.25333333333333335, + "English,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + } + }, + "AC3_2": 0.3951801826558666, + "AC3_3": 0.3067502960161715, + "AC3_4": 0.2402845800441797, + "AC3_5": 0.18935273837442332, + "AC3_6": 0.1478991596300324, + "AC3_7": 0.11162790694948621 + }, + "prompt_2": { + "overall_acc": 0.34380952380952384, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.47333333333333333, + "Vietnamese": 0.26, + "Spanish": 0.43333333333333335, + "Indonesian": 0.32, + "Filipino": 0.26666666666666666, + "Chinese": 0.34 + }, + "consistency_score_2": 0.493015873015873, + "consistency_score_3": 0.3064761904761904, + "consistency_score_4": 0.21600000000000005, + "consistency_score_5": 0.16349206349206352, + "consistency_score_6": 0.1295238095238095, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5, + "Malay,Vietnamese": 0.44, + "Malay,Spanish": 0.52, + "Malay,Indonesian": 0.5933333333333334, + "Malay,Filipino": 0.46, + "Malay,Chinese": 0.49333333333333335, + "English,Vietnamese": 0.38, + "English,Spanish": 0.62, + "English,Indonesian": 0.54, + "English,Filipino": 0.49333333333333335, + "English,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.4, + "Vietnamese,Indonesian": 0.4, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Chinese": 0.44666666666666666, + "Spanish,Indonesian": 0.5333333333333333, + "Spanish,Filipino": 0.48, + "Spanish,Chinese": 0.5066666666666667, + "Indonesian,Filipino": 0.5466666666666666, + "Indonesian,Chinese": 0.48, + "Filipino,Chinese": 0.46 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24666666666666667, + "Malay,English,Spanish": 0.36666666666666664, + "Malay,English,Indonesian": 0.37333333333333335, + "Malay,English,Filipino": 0.2866666666666667, + "Malay,English,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish": 0.26, + "Malay,Vietnamese,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Chinese": 0.28, + "Malay,Spanish,Indonesian": 0.38666666666666666, + "Malay,Spanish,Filipino": 0.2866666666666667, + "Malay,Spanish,Chinese": 0.30666666666666664, + "Malay,Indonesian,Filipino": 0.3466666666666667, + "Malay,Indonesian,Chinese": 0.35333333333333333, + "Malay,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.24, + "English,Vietnamese,Filipino": 0.2733333333333333, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Spanish,Indonesian": 0.41333333333333333, + "English,Spanish,Filipino": 0.3466666666666667, + "English,Spanish,Chinese": 0.3933333333333333, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Indonesian,Chinese": 0.35333333333333333, + "English,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Vietnamese,Spanish,Filipino": 0.26, + "Vietnamese,Spanish,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino": 0.34, + "Spanish,Indonesian,Chinese": 0.32666666666666666, + "Spanish,Filipino,Chinese": 0.3, + "Indonesian,Filipino,Chinese": 0.31333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.2, + "Malay,English,Vietnamese,Indonesian": 0.18666666666666668, + "Malay,English,Vietnamese,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian": 0.29333333333333333, + "Malay,English,Spanish,Filipino": 0.22, + "Malay,English,Spanish,Chinese": 0.26, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,Vietnamese,Spanish,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.2, + "Malay,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "English,Vietnamese,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Spanish,Indonesian,Chinese": 0.2866666666666667, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14, + "Malay,English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.40511091837055047, + "AC3_3": 0.32407119140272705, + "AC3_4": 0.26531473285495405, + "AC3_5": 0.2216043863917423, + "AC3_6": 0.18816134900690848, + "AC3_7": 0.16281888650367304 + }, + "prompt_3": { + "overall_acc": 0.3180952380952381, + "language_acc": { + "Malay": 0.26, + "English": 0.48, + "Vietnamese": 0.26666666666666666, + "Spanish": 0.4, + "Indonesian": 0.26666666666666666, + "Filipino": 0.25333333333333335, + "Chinese": 0.3 + }, + "consistency_score_2": 0.466984126984127, + "consistency_score_3": 0.2700952380952381, + "consistency_score_4": 0.17523809523809522, + "consistency_score_5": 0.1222222222222222, + "consistency_score_6": 0.08952380952380953, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.42, + "Malay,Vietnamese": 0.4266666666666667, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Filipino": 0.5133333333333333, + "Malay,Chinese": 0.44666666666666666, + "English,Vietnamese": 0.4066666666666667, + "English,Spanish": 0.5733333333333334, + "English,Indonesian": 0.4666666666666667, + "English,Filipino": 0.47333333333333333, + "English,Chinese": 0.46, + "Vietnamese,Spanish": 0.38, + "Vietnamese,Indonesian": 0.4533333333333333, + "Vietnamese,Filipino": 0.5266666666666666, + "Vietnamese,Chinese": 0.42, + "Spanish,Indonesian": 0.5066666666666667, + "Spanish,Filipino": 0.4266666666666667, + "Spanish,Chinese": 0.41333333333333333, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.4666666666666667, + "Filipino,Chinese": 0.41333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22, + "Malay,English,Spanish": 0.28, + "Malay,English,Indonesian": 0.30666666666666664, + "Malay,English,Filipino": 0.25333333333333335, + "Malay,English,Chinese": 0.24, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Filipino": 0.3, + "Malay,Vietnamese,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian": 0.3466666666666667, + "Malay,Spanish,Filipino": 0.26666666666666666, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.34, + "Malay,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.28, + "English,Vietnamese,Chinese": 0.24, + "English,Spanish,Indonesian": 0.32666666666666666, + "English,Spanish,Filipino": 0.29333333333333333, + "English,Spanish,Chinese": 0.28, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino": 0.2866666666666667, + "Spanish,Indonesian,Chinese": 0.28, + "Spanish,Filipino,Chinese": 0.22, + "Indonesian,Filipino,Chinese": 0.26 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian": 0.18, + "Malay,English,Vietnamese,Filipino": 0.16, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.21333333333333335, + "Malay,English,Spanish,Filipino": 0.18, + "Malay,English,Spanish,Chinese": 0.16, + "Malay,English,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,Vietnamese,Spanish,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.2, + "Malay,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.18, + "English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Spanish,Indonesian,Chinese": 0.2, + "English,Spanish,Filipino,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + } + }, + "AC3_2": 0.37842142756044794, + "AC3_3": 0.29213668882277316, + "AC3_4": 0.22598271736548153, + "AC3_5": 0.17659216530476335, + "AC3_6": 0.13972407651222835, + "AC3_7": 0.11023102307366109 + }, + "prompt_4": { + "overall_acc": 0.34476190476190477, + "language_acc": { + "Malay": 0.29333333333333333, + "English": 0.5, + "Vietnamese": 0.26666666666666666, + "Spanish": 0.43333333333333335, + "Indonesian": 0.2866666666666667, + "Filipino": 0.28, + "Chinese": 0.35333333333333333 + }, + "consistency_score_2": 0.508888888888889, + "consistency_score_3": 0.3255238095238095, + "consistency_score_4": 0.23409523809523813, + "consistency_score_5": 0.18000000000000002, + "consistency_score_6": 0.14476190476190479, + "consistency_score_7": 0.12, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.49333333333333335, + "Malay,Indonesian": 0.6, + "Malay,Filipino": 0.5533333333333333, + "Malay,Chinese": 0.48, + "English,Vietnamese": 0.4666666666666667, + "English,Spanish": 0.6333333333333333, + "English,Indonesian": 0.54, + "English,Filipino": 0.48, + "English,Chinese": 0.58, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.4066666666666667, + "Vietnamese,Filipino": 0.54, + "Vietnamese,Chinese": 0.47333333333333333, + "Spanish,Indonesian": 0.52, + "Spanish,Filipino": 0.52, + "Spanish,Chinese": 0.5266666666666666, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.4533333333333333, + "Filipino,Chinese": 0.5466666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.30666666666666664, + "Malay,English,Spanish": 0.36666666666666664, + "Malay,English,Indonesian": 0.36666666666666664, + "Malay,English,Filipino": 0.32, + "Malay,English,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish": 0.28, + "Malay,Vietnamese,Indonesian": 0.2866666666666667, + "Malay,Vietnamese,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Chinese": 0.31333333333333335, + "Malay,Spanish,Indonesian": 0.36, + "Malay,Spanish,Filipino": 0.34, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.32666666666666666, + "Malay,Filipino,Chinese": 0.36666666666666664, + "English,Vietnamese,Spanish": 0.30666666666666664, + "English,Vietnamese,Indonesian": 0.2866666666666667, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.32, + "English,Spanish,Indonesian": 0.41333333333333333, + "English,Spanish,Filipino": 0.37333333333333335, + "English,Spanish,Chinese": 0.41333333333333333, + "English,Indonesian,Filipino": 0.31333333333333335, + "English,Indonesian,Chinese": 0.32666666666666666, + "English,Filipino,Chinese": 0.37333333333333335, + "Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.26, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.26, + "Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino": 0.32666666666666666, + "Spanish,Indonesian,Chinese": 0.32, + "Spanish,Filipino,Chinese": 0.36, + "Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Chinese": 0.22, + "Malay,English,Spanish,Indonesian": 0.2866666666666667, + "Malay,English,Spanish,Filipino": 0.26666666666666666, + "Malay,English,Spanish,Chinese": 0.25333333333333335, + "Malay,English,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Indonesian,Chinese": 0.24, + "Malay,English,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.2, + "Malay,Vietnamese,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.22, + "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.2, + "English,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Spanish,Indonesian,Chinese": 0.26666666666666666, + "English,Spanish,Filipino,Chinese": 0.3, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.2, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + } + }, + "AC3_2": 0.4110474773288162, + "AC3_3": 0.33486677757896716, + "AC3_4": 0.27884987224691204, + "AC3_5": 0.23651542645220672, + "AC3_6": 0.20390587359184617, + "AC3_7": 0.17803278684693966 + }, + "prompt_5": { + "overall_acc": 0.34380952380952373, + "language_acc": { + "Malay": 0.2866666666666667, + "English": 0.47333333333333333, + "Vietnamese": 0.2733333333333333, + "Spanish": 0.4533333333333333, + "Indonesian": 0.29333333333333333, + "Filipino": 0.2866666666666667, + "Chinese": 0.34 + }, + "consistency_score_2": 0.5028571428571429, + "consistency_score_3": 0.3154285714285715, + "consistency_score_4": 0.22190476190476197, + "consistency_score_5": 0.1657142857142857, + "consistency_score_6": 0.12761904761904763, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.46, + "Malay,Spanish": 0.48, + "Malay,Indonesian": 0.6, + "Malay,Filipino": 0.6, + "Malay,Chinese": 0.4666666666666667, + "English,Vietnamese": 0.4533333333333333, + "English,Spanish": 0.5933333333333334, + "English,Indonesian": 0.5266666666666666, + "English,Filipino": 0.48, + "English,Chinese": 0.56, + "Vietnamese,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian": 0.4266666666666667, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Chinese": 0.44, + "Spanish,Indonesian": 0.5, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.5266666666666666, + "Indonesian,Filipino": 0.5266666666666666, + "Indonesian,Chinese": 0.4533333333333333, + "Filipino,Chinese": 0.5066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.3466666666666667, + "Malay,English,Indonesian": 0.3466666666666667, + "Malay,English,Filipino": 0.3333333333333333, + "Malay,English,Chinese": 0.31333333333333335, + "Malay,Vietnamese,Spanish": 0.2733333333333333, + "Malay,Vietnamese,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Filipino": 0.34, + "Malay,Vietnamese,Chinese": 0.2733333333333333, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.35333333333333333, + "Malay,Spanish,Chinese": 0.3, + "Malay,Indonesian,Filipino": 0.4, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish": 0.31333333333333335, + "English,Vietnamese,Indonesian": 0.2733333333333333, + "English,Vietnamese,Filipino": 0.2866666666666667, + "English,Vietnamese,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian": 0.36666666666666664, + "English,Spanish,Filipino": 0.36, + "English,Spanish,Chinese": 0.38666666666666666, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Indonesian,Chinese": 0.32, + "English,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Vietnamese,Spanish,Filipino": 0.3, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Chinese": 0.29333333333333333, + "Spanish,Indonesian,Filipino": 0.3333333333333333, + "Spanish,Indonesian,Chinese": 0.29333333333333333, + "Spanish,Filipino,Chinese": 0.36, + "Indonesian,Filipino,Chinese": 0.30666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.21333333333333335, + "Malay,English,Vietnamese,Filipino": 0.22, + "Malay,English,Vietnamese,Chinese": 0.18, + "Malay,English,Spanish,Indonesian": 0.26, + "Malay,English,Spanish,Filipino": 0.26, + "Malay,English,Spanish,Chinese": 0.23333333333333334, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.23333333333333334, + "Malay,English,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.26, + "Malay,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Spanish,Indonesian,Chinese": 0.24, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.14, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.18, + "Malay,English,Spanish,Filipino,Chinese": 0.2, + "Malay,English,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.40839466490435955, + "AC3_3": 0.329008131373711, + "AC3_4": 0.26972262300827815, + "AC3_5": 0.22363684908828485, + "AC3_6": 0.1861433381038532, + "AC3_7": 0.15493562228268615 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.29545454545454547, + "language_acc": { + "English": 0.3409090909090909, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.2897727272727273, + "Indonesian": 0.2897727272727273, + "Filipino": 0.3068181818181818, + "Spanish": 0.30113636363636365, + "Malay": 0.2727272727272727 + }, + "consistency_score_2": 0.3977272727272727, + "consistency_score_3": 0.20827922077922073, + "consistency_score_4": 0.1266233766233766, + "consistency_score_5": 0.0854978354978355, + "consistency_score_6": 0.0633116883116883, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.32386363636363635, + "English,Chinese": 0.4431818181818182, + "English,Indonesian": 0.4318181818181818, + "English,Filipino": 0.3352272727272727, + "English,Spanish": 0.5056818181818182, + "English,Malay": 0.35795454545454547, + "Vietnamese,Chinese": 0.32386363636363635, + "Vietnamese,Indonesian": 0.3806818181818182, + "Vietnamese,Filipino": 0.375, + "Vietnamese,Spanish": 0.3409090909090909, + "Vietnamese,Malay": 0.3693181818181818, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,Spanish": 0.38636363636363635, + "Chinese,Malay": 0.36363636363636365, + "Indonesian,Filipino": 0.45454545454545453, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Malay": 0.5284090909090909, + "Filipino,Spanish": 0.32954545454545453, + "Filipino,Malay": 0.4431818181818182, + "Spanish,Malay": 0.3693181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1590909090909091, + "English,Vietnamese,Indonesian": 0.1875, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.19318181818181818, + "English,Vietnamese,Malay": 0.1590909090909091, + "English,Chinese,Indonesian": 0.24431818181818182, + "English,Chinese,Filipino": 0.19318181818181818, + "English,Chinese,Spanish": 0.2556818181818182, + "English,Chinese,Malay": 0.19886363636363635, + "English,Indonesian,Filipino": 0.21022727272727273, + "English,Indonesian,Spanish": 0.2784090909090909, + "English,Indonesian,Malay": 0.24431818181818182, + "English,Filipino,Spanish": 0.18181818181818182, + "English,Filipino,Malay": 0.1875, + "English,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Vietnamese,Chinese,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino": 0.26136363636363635, + "Chinese,Indonesian,Spanish": 0.23863636363636365, + "Chinese,Indonesian,Malay": 0.25, + "Chinese,Filipino,Spanish": 0.1875, + "Chinese,Filipino,Malay": 0.21022727272727273, + "Chinese,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.2840909090909091, + "Indonesian,Spanish,Malay": 0.2784090909090909, + "Filipino,Spanish,Malay": 0.20454545454545456 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Malay": 0.125, + "Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish": 0.125, + "Vietnamese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.33904619965302585, + "AC3_3": 0.24432367498738644, + "AC3_4": 0.17727272723072726, + "AC3_5": 0.13261880161808012, + "AC3_6": 0.1042780748372444, + "AC3_7": 0.08718330846962938 + }, + "prompt_2": { + "overall_acc": 0.29464285714285715, + "language_acc": { + "English": 0.3465909090909091, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.2784090909090909, + "Indonesian": 0.2784090909090909, + "Filipino": 0.2897727272727273, + "Spanish": 0.3181818181818182, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.435064935064935, + "consistency_score_3": 0.237987012987013, + "consistency_score_4": 0.14545454545454548, + "consistency_score_5": 0.09388528138528138, + "consistency_score_6": 0.06331168831168832, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3125, + "English,Chinese": 0.4375, + "English,Indonesian": 0.42613636363636365, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.5852272727272727, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian": 0.3977272727272727, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.3409090909090909, + "Vietnamese,Malay": 0.4034090909090909, + "Chinese,Indonesian": 0.48295454545454547, + "Chinese,Filipino": 0.4034090909090909, + "Chinese,Spanish": 0.4431818181818182, + "Chinese,Malay": 0.375, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.4602272727272727, + "Indonesian,Malay": 0.6477272727272727, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.5397727272727273, + "Spanish,Malay": 0.42613636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13068181818181818, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.20454545454545456, + "English,Vietnamese,Spanish": 0.20454545454545456, + "English,Vietnamese,Malay": 0.17045454545454544, + "English,Chinese,Indonesian": 0.2556818181818182, + "English,Chinese,Filipino": 0.21022727272727273, + "English,Chinese,Spanish": 0.2897727272727273, + "English,Chinese,Malay": 0.2159090909090909, + "English,Indonesian,Filipino": 0.2840909090909091, + "English,Indonesian,Spanish": 0.30113636363636365, + "English,Indonesian,Malay": 0.3181818181818182, + "English,Filipino,Spanish": 0.29545454545454547, + "English,Filipino,Malay": 0.2840909090909091, + "English,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "Vietnamese,Chinese,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Spanish": 0.125, + "Vietnamese,Chinese,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino": 0.29545454545454547, + "Chinese,Indonesian,Spanish": 0.2727272727272727, + "Chinese,Indonesian,Malay": 0.30113636363636365, + "Chinese,Filipino,Spanish": 0.20454545454545456, + "Chinese,Filipino,Malay": 0.23295454545454544, + "Chinese,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish": 0.2897727272727273, + "Indonesian,Filipino,Malay": 0.3977272727272727, + "Indonesian,Spanish,Malay": 0.3352272727272727, + "Filipino,Spanish,Malay": 0.26704545454545453 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Spanish,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.18181818181818182, + "English,Chinese,Indonesian,Malay": 0.17613636363636365, + "English,Chinese,Filipino,Spanish": 0.14772727272727273, + "English,Chinese,Filipino,Malay": 0.13636363636363635, + "English,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Indonesian,Spanish,Malay": 0.22727272727272727, + "English,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.22727272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.125, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.35134276175067564, + "AC3_3": 0.26330169368505435, + "AC3_4": 0.1947620803688774, + "AC3_5": 0.14239703537918727, + "AC3_6": 0.10422740521869592, + "AC3_7": 0.07875894985751049 + }, + "prompt_3": { + "overall_acc": 0.28733766233766234, + "language_acc": { + "English": 0.3522727272727273, + "Vietnamese": 0.23295454545454544, + "Chinese": 0.26136363636363635, + "Indonesian": 0.26704545454545453, + "Filipino": 0.2897727272727273, + "Spanish": 0.3465909090909091, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.4228896103896103, + "consistency_score_3": 0.2251623376623377, + "consistency_score_4": 0.1368506493506494, + "consistency_score_5": 0.09036796536796535, + "consistency_score_6": 0.06493506493506493, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.30113636363636365, + "English,Chinese": 0.42613636363636365, + "English,Indonesian": 0.4147727272727273, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.5511363636363636, + "English,Malay": 0.3125, + "Vietnamese,Chinese": 0.2784090909090909, + "Vietnamese,Indonesian": 0.4318181818181818, + "Vietnamese,Filipino": 0.4147727272727273, + "Vietnamese,Spanish": 0.36363636363636365, + "Vietnamese,Malay": 0.4431818181818182, + "Chinese,Indonesian": 0.42613636363636365, + "Chinese,Filipino": 0.39204545454545453, + "Chinese,Spanish": 0.4147727272727273, + "Chinese,Malay": 0.375, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.4943181818181818, + "Indonesian,Malay": 0.6647727272727273, + "Filipino,Spanish": 0.35795454545454547, + "Filipino,Malay": 0.5113636363636364, + "Spanish,Malay": 0.4147727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13636363636363635, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.13636363636363635, + "English,Chinese,Indonesian": 0.22727272727272727, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.26704545454545453, + "English,Chinese,Malay": 0.18181818181818182, + "English,Indonesian,Filipino": 0.25, + "English,Indonesian,Spanish": 0.3068181818181818, + "English,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Spanish": 0.2159090909090909, + "English,Filipino,Malay": 0.19886363636363635, + "English,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "Vietnamese,Chinese,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino": 0.2784090909090909, + "Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.19318181818181818, + "Chinese,Indonesian,Filipino": 0.2727272727272727, + "Chinese,Indonesian,Spanish": 0.23863636363636365, + "Chinese,Indonesian,Malay": 0.2897727272727273, + "Chinese,Filipino,Spanish": 0.19886363636363635, + "Chinese,Filipino,Malay": 0.2215909090909091, + "Chinese,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.4034090909090909, + "Indonesian,Spanish,Malay": 0.3465909090909091, + "Filipino,Spanish,Malay": 0.23863636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.125, + "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.3421781075585003, + "AC3_3": 0.25247851604977783, + "AC3_4": 0.18540042043134025, + "AC3_5": 0.1374939530019632, + "AC3_6": 0.10593093539395225, + "AC3_7": 0.08682145190371475 + }, + "prompt_4": { + "overall_acc": 0.3108766233766234, + "language_acc": { + "English": 0.35795454545454547, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.3125, + "Indonesian": 0.30113636363636365, + "Filipino": 0.3181818181818182, + "Spanish": 0.32954545454545453, + "Malay": 0.2897727272727273 + }, + "consistency_score_2": 0.4688852813852815, + "consistency_score_3": 0.27711038961038964, + "consistency_score_4": 0.18327922077922085, + "consistency_score_5": 0.13014069264069264, + "consistency_score_6": 0.0982142857142857, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3693181818181818, + "English,Chinese": 0.48295454545454547, + "English,Indonesian": 0.4943181818181818, + "English,Filipino": 0.4602272727272727, + "English,Spanish": 0.6193181818181818, + "English,Malay": 0.45454545454545453, + "Vietnamese,Chinese": 0.3352272727272727, + "Vietnamese,Indonesian": 0.45454545454545453, + "Vietnamese,Filipino": 0.4318181818181818, + "Vietnamese,Spanish": 0.36363636363636365, + "Vietnamese,Malay": 0.4772727272727273, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Filipino": 0.42045454545454547, + "Chinese,Spanish": 0.48295454545454547, + "Chinese,Malay": 0.42613636363636365, + "Indonesian,Filipino": 0.5454545454545454, + "Indonesian,Spanish": 0.5113636363636364, + "Indonesian,Malay": 0.625, + "Filipino,Spanish": 0.4318181818181818, + "Filipino,Malay": 0.5284090909090909, + "Spanish,Malay": 0.4602272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1875, + "English,Vietnamese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Filipino": 0.22727272727272727, + "English,Vietnamese,Spanish": 0.24431818181818182, + "English,Vietnamese,Malay": 0.25, + "English,Chinese,Indonesian": 0.30113636363636365, + "English,Chinese,Filipino": 0.26136363636363635, + "English,Chinese,Spanish": 0.3352272727272727, + "English,Chinese,Malay": 0.2727272727272727, + "English,Indonesian,Filipino": 0.3125, + "English,Indonesian,Spanish": 0.36363636363636365, + "English,Indonesian,Malay": 0.3465909090909091, + "English,Filipino,Spanish": 0.3125, + "English,Filipino,Malay": 0.30113636363636365, + "English,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian": 0.2159090909090909, + "Vietnamese,Chinese,Filipino": 0.17613636363636365, + "Vietnamese,Chinese,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino": 0.2897727272727273, + "Vietnamese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Vietnamese,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Filipino,Malay": 0.2897727272727273, + "Vietnamese,Spanish,Malay": 0.2556818181818182, + "Chinese,Indonesian,Filipino": 0.2784090909090909, + "Chinese,Indonesian,Spanish": 0.29545454545454547, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.2556818181818182, + "Chinese,Filipino,Malay": 0.26704545454545453, + "Chinese,Spanish,Malay": 0.26136363636363635, + "Indonesian,Filipino,Spanish": 0.30113636363636365, + "Indonesian,Filipino,Malay": 0.375, + "Indonesian,Spanish,Malay": 0.375, + "Filipino,Spanish,Malay": 0.2784090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish": 0.17613636363636365, + "English,Vietnamese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Chinese,Filipino,Spanish": 0.1875, + "English,Chinese,Filipino,Malay": 0.17613636363636365, + "English,Chinese,Spanish,Malay": 0.19318181818181818, + "English,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Indonesian,Spanish,Malay": 0.26704545454545453, + "English,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + } + }, + "AC3_2": 0.37387174756599373, + "AC3_3": 0.2930239624592186, + "AC3_4": 0.23060427576950182, + "AC3_5": 0.18347442430705493, + "AC3_6": 0.14927012468006504, + "AC3_7": 0.12667737664493042 + }, + "prompt_5": { + "overall_acc": 0.30844155844155846, + "language_acc": { + "English": 0.35795454545454547, + "Vietnamese": 0.2840909090909091, + "Chinese": 0.30113636363636365, + "Indonesian": 0.2897727272727273, + "Filipino": 0.3352272727272727, + "Spanish": 0.2897727272727273, + "Malay": 0.30113636363636365 + }, + "consistency_score_2": 0.4759199134199134, + "consistency_score_3": 0.2897727272727273, + "consistency_score_4": 0.1987012987012987, + "consistency_score_5": 0.145021645021645, + "consistency_score_6": 0.10957792207792208, + "consistency_score_7": 0.08522727272727272, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38636363636363635, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.48295454545454547, + "English,Filipino": 0.4602272727272727, + "English,Spanish": 0.5965909090909091, + "English,Malay": 0.4659090909090909, + "Vietnamese,Chinese": 0.36363636363636365, + "Vietnamese,Indonesian": 0.48295454545454547, + "Vietnamese,Filipino": 0.4659090909090909, + "Vietnamese,Spanish": 0.375, + "Vietnamese,Malay": 0.4715909090909091, + "Chinese,Indonesian": 0.44886363636363635, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,Spanish": 0.48863636363636365, + "Chinese,Malay": 0.38636363636363635, + "Indonesian,Filipino": 0.5284090909090909, + "Indonesian,Spanish": 0.5681818181818182, + "Indonesian,Malay": 0.6761363636363636, + "Filipino,Spanish": 0.4375, + "Filipino,Malay": 0.5227272727272727, + "Spanish,Malay": 0.48295454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2159090909090909, + "English,Vietnamese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Filipino": 0.24431818181818182, + "English,Vietnamese,Spanish": 0.2556818181818182, + "English,Vietnamese,Malay": 0.26136363636363635, + "English,Chinese,Indonesian": 0.29545454545454547, + "English,Chinese,Filipino": 0.2784090909090909, + "English,Chinese,Spanish": 0.3465909090909091, + "English,Chinese,Malay": 0.2556818181818182, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.375, + "English,Indonesian,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.3125, + "English,Filipino,Malay": 0.3068181818181818, + "English,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian": 0.2159090909090909, + "Vietnamese,Chinese,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino": 0.3125, + "Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Spanish,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino": 0.26136363636363635, + "Chinese,Indonesian,Spanish": 0.3125, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.26704545454545453, + "Chinese,Filipino,Malay": 0.25, + "Chinese,Spanish,Malay": 0.2727272727272727, + "Indonesian,Filipino,Spanish": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.3977272727272727, + "Indonesian,Spanish,Malay": 0.4318181818181818, + "Filipino,Spanish,Malay": 0.3125 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Spanish": 0.1534090909090909, + "English,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino": 0.1875, + "English,Chinese,Indonesian,Spanish": 0.22727272727272727, + "English,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Chinese,Filipino,Spanish": 0.20454545454545456, + "English,Chinese,Filipino,Malay": 0.1875, + "English,Chinese,Spanish,Malay": 0.19886363636363635, + "English,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Indonesian,Spanish,Malay": 0.29545454545454547, + "English,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.25, + "Vietnamese,Filipino,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.2727272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + } + }, + "AC3_2": 0.37430058725735116, + "AC3_3": 0.29881583811459783, + "AC3_4": 0.2416981225723589, + "AC3_5": 0.19728481537973397, + "AC3_6": 0.16170722477533678, + "AC3_7": 0.133552014961389 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49514563106796117 + }, + "prompt_2": { + "accuracy": 0.47572815533980584 + }, + "prompt_3": { + "accuracy": 0.39805825242718446 + }, + "prompt_4": { + "accuracy": 0.4563106796116505 + }, + "prompt_5": { + "accuracy": 0.47572815533980584 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26666666666666666 + }, + "prompt_2": { + "accuracy": 0.29523809523809524 + }, + "prompt_3": { + "accuracy": 0.3333333333333333 + }, + "prompt_4": { + "accuracy": 0.2571428571428571 + }, + "prompt_5": { + "accuracy": 0.24761904761904763 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48598130841121495 + }, + "prompt_2": { + "accuracy": 0.42990654205607476 + }, + "prompt_3": { + "accuracy": 0.4485981308411215 + }, + "prompt_4": { + "accuracy": 0.48598130841121495 + }, + "prompt_5": { + "accuracy": 0.48598130841121495 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.1, + "politics": 0.5, + "culture": 0.5, + "film": 0.4, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.2, + "history": 0.4, + "literature": 0.1, + "politics": 0.7, + "culture": 0.6, + "film": 0.5, + "law": 0.3, + "geography": 0.2 + } + }, + "prompt_3": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.1, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.1, + "politics": 0.6, + "culture": 0.5, + "film": 0.4, + "law": 0.4, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.1, + "politics": 0.4, + "culture": 0.5, + "film": 0.4, + "law": 0.4, + "geography": 0.4 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2016390629013497 + }, + "prompt_2": { + "bleu_score": 0.20583596210028363 + }, + "prompt_3": { + "bleu_score": 0.2067612347639634 + }, + "prompt_4": { + "bleu_score": 0.20054980677768708 + }, + "prompt_5": { + "bleu_score": 0.17948250071369648 + } }, "indommlu": { "prompt_1": -1, @@ -398,179 +5093,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2199955256351759 + }, + "prompt_2": { + "bleu_score": 0.22265744397561796 + }, + "prompt_3": { + "bleu_score": 0.22148322088042713 + }, + "prompt_4": { + "bleu_score": 0.2216544737604712 + }, + "prompt_5": { + "bleu_score": 0.20567610296725278 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07162891878353948 + }, + "prompt_2": { + "bleu_score": 0.0765066876408635 + }, + "prompt_3": { + "bleu_score": 0.07792801404642842 + }, + "prompt_4": { + "bleu_score": 0.07799458904611967 + }, + "prompt_5": { + "bleu_score": 0.062442537549600385 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.14415940425908821 + }, + "prompt_2": { + "bleu_score": 0.1500303826303978 + }, + "prompt_3": { + "bleu_score": 0.1476337503575094 + }, + "prompt_4": { + "bleu_score": 0.14600062790741067 + }, + "prompt_5": { + "bleu_score": 0.14380237924030906 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1976372171264548 + }, + "prompt_2": { + "bleu_score": 0.19904345385667802 + }, + "prompt_3": { + "bleu_score": 0.1973773844653644 + }, + "prompt_4": { + "bleu_score": 0.20233368429739337 + }, + "prompt_5": { + "bleu_score": 0.19144307682581632 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41890315052508753 + }, + "prompt_2": { + "accuracy": 0.4002333722287048 + }, + "prompt_3": { + "accuracy": 0.4130688448074679 + }, + "prompt_4": { + "accuracy": 0.411901983663944 + }, + "prompt_5": { + "accuracy": 0.40606767794632437 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41022524132999644, + "category_acc": { + "high_school_european_history": 0.5304878048780488, + "business_ethics": 0.3939393939393939, + "clinical_knowledge": 0.4090909090909091, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.5714285714285714, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5296610169491526, + "virology": 0.4, + "high_school_microeconomics": 0.38396624472573837, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.47896440129449835, + "abstract_algebra": 0.35353535353535354, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.4290322580645161, + "professional_medicine": 0.4022140221402214, + "nutrition": 0.4360655737704918, + "global_facts": 0.32323232323232326, + "machine_learning": 0.22522522522522523, + "security_studies": 0.4098360655737705, + "public_relations": 0.44954128440366975, + "professional_psychology": 0.3911620294599018, + "prehistory": 0.47058823529411764, + "anatomy": 0.40298507462686567, + "human_sexuality": 0.5, + "college_medicine": 0.3546511627906977, + "high_school_government_and_politics": 0.5677083333333334, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.5123456790123457, + "high_school_geography": 0.49746192893401014, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.42342342342342343, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.5441176470588235, + "formal_logic": 0.28, + "high_school_statistics": 0.3023255813953488, + "international_law": 0.55, + "high_school_mathematics": 0.20074349442379183, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.5869565217391305, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.6523605150214592, + "professional_law": 0.33202870189171557, + "management": 0.5588235294117647, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.4766355140186916, + "world_religions": 0.6176470588235294, + "sociology": 0.625, + "us_foreign_policy": 0.6464646464646465, + "high_school_macroeconomics": 0.35989717223650386, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.4318840579710145, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.44370860927152317, + "college_biology": 0.40559440559440557 + } + }, + "prompt_2": { + "accuracy": 0.4029317125491598, + "category_acc": { + "high_school_european_history": 0.5121951219512195, + "business_ethics": 0.36363636363636365, + "clinical_knowledge": 0.4053030303030303, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.5073891625615764, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5296610169491526, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.4050632911392405, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.44660194174757284, + "abstract_algebra": 0.37373737373737376, + "professional_accounting": 0.298932384341637, + "philosophy": 0.432258064516129, + "professional_medicine": 0.39114391143911437, + "nutrition": 0.4524590163934426, + "global_facts": 0.3333333333333333, + "machine_learning": 0.24324324324324326, + "security_studies": 0.4262295081967213, + "public_relations": 0.3944954128440367, + "professional_psychology": 0.3911620294599018, + "prehistory": 0.47678018575851394, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.3313953488372093, + "high_school_government_and_politics": 0.5572916666666666, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5246913580246914, + "high_school_geography": 0.4873096446700508, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.4009009009009009, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.5349264705882353, + "formal_logic": 0.264, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.575, + "high_school_mathematics": 0.21561338289962825, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.3247863247863248, + "miscellaneous": 0.5601023017902813, + "high_school_chemistry": 0.32673267326732675, + "marketing": 0.6051502145922747, + "professional_law": 0.33268101761252444, + "management": 0.43137254901960786, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.4205607476635514, + "world_religions": 0.611764705882353, + "sociology": 0.57, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.3676092544987147, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.42318840579710143, + "electrical_engineering": 0.375, + "astronomy": 0.4304635761589404, + "college_biology": 0.4125874125874126 + } + }, + "prompt_3": { + "accuracy": 0.39964247407937076, + "category_acc": { + "high_school_european_history": 0.5, + "business_ethics": 0.3838383838383838, + "clinical_knowledge": 0.4090909090909091, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.5172413793103449, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.559322033898305, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3924050632911392, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.45307443365695793, + "abstract_algebra": 0.35353535353535354, + "professional_accounting": 0.3274021352313167, + "philosophy": 0.4258064516129032, + "professional_medicine": 0.3874538745387454, + "nutrition": 0.4459016393442623, + "global_facts": 0.3939393939393939, + "machine_learning": 0.21621621621621623, + "security_studies": 0.430327868852459, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.397708674304419, + "prehistory": 0.4674922600619195, + "anatomy": 0.417910447761194, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.3430232558139535, + "high_school_government_and_politics": 0.515625, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.5, + "high_school_geography": 0.4720812182741117, + "elementary_mathematics": 0.2493368700265252, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.5294117647058824, + "formal_logic": 0.256, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.5416666666666666, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.3504273504273504, + "miscellaneous": 0.5575447570332481, + "high_school_chemistry": 0.2871287128712871, + "marketing": 0.5836909871244635, + "professional_law": 0.31833007175472927, + "management": 0.4117647058823529, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.4392523364485981, + "world_religions": 0.6058823529411764, + "sociology": 0.565, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.34704370179948585, + "computer_security": 0.5353535353535354, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.4260869565217391, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.423841059602649, + "college_biology": 0.3986013986013986 + } + }, + "prompt_4": { + "accuracy": 0.409009653199857, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.4431818181818182, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.5615763546798029, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.5423728813559322, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.37130801687763715, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.47896440129449835, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.3309608540925267, + "philosophy": 0.41935483870967744, + "professional_medicine": 0.41697416974169743, + "nutrition": 0.4426229508196721, + "global_facts": 0.32323232323232326, + "machine_learning": 0.22522522522522523, + "security_studies": 0.45901639344262296, + "public_relations": 0.47706422018348627, + "professional_psychology": 0.37315875613747956, + "prehistory": 0.48606811145510836, + "anatomy": 0.3805970149253731, + "human_sexuality": 0.5, + "college_medicine": 0.37209302325581395, + "high_school_government_and_politics": 0.5625, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.48148148148148145, + "high_school_geography": 0.5025380710659898, + "elementary_mathematics": 0.2519893899204244, + "human_aging": 0.4099099099099099, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.5386029411764706, + "formal_logic": 0.256, + "high_school_statistics": 0.3395348837209302, + "international_law": 0.575, + "high_school_mathematics": 0.20074349442379183, + "high_school_computer_science": 0.43434343434343436, + "conceptual_physics": 0.3803418803418803, + "miscellaneous": 0.571611253196931, + "high_school_chemistry": 0.3118811881188119, + "marketing": 0.6008583690987125, + "professional_law": 0.34442270058708413, + "management": 0.5196078431372549, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.6, + "sociology": 0.585, + "us_foreign_policy": 0.5959595959595959, + "high_school_macroeconomics": 0.36503856041131105, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.41739130434782606, + "electrical_engineering": 0.3819444444444444, + "astronomy": 0.46357615894039733, + "college_biology": 0.3776223776223776 + } + }, + "prompt_5": { + "accuracy": 0.4057204147300679, + "category_acc": { + "high_school_european_history": 0.5121951219512195, + "business_ethics": 0.3838383838383838, + "clinical_knowledge": 0.42803030303030304, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.5714285714285714, + "high_school_physics": 0.28, + "high_school_world_history": 0.5338983050847458, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.3670886075949367, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.47249190938511326, + "abstract_algebra": 0.36363636363636365, + "professional_accounting": 0.3096085409252669, + "philosophy": 0.45161290322580644, + "professional_medicine": 0.3874538745387454, + "nutrition": 0.43934426229508194, + "global_facts": 0.3434343434343434, + "machine_learning": 0.21621621621621623, + "security_studies": 0.45081967213114754, + "public_relations": 0.44954128440366975, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.47058823529411764, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.3430232558139535, + "high_school_government_and_politics": 0.5364583333333334, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.47530864197530864, + "high_school_geography": 0.4873096446700508, + "elementary_mathematics": 0.23872679045092837, + "human_aging": 0.4189189189189189, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.5330882352941176, + "formal_logic": 0.264, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.5833333333333334, + "high_school_mathematics": 0.1970260223048327, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.5741687979539642, + "high_school_chemistry": 0.3217821782178218, + "marketing": 0.5836909871244635, + "professional_law": 0.33529028049575993, + "management": 0.5, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.5882352941176471, + "sociology": 0.585, + "us_foreign_policy": 0.5959595959595959, + "high_school_macroeconomics": 0.38303341902313626, + "computer_security": 0.48484848484848486, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.4405797101449275, + "electrical_engineering": 0.3819444444444444, + "astronomy": 0.4304635761589404, + "college_biology": 0.36363636363636365 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3016344725111441 + }, + "prompt_2": { + "accuracy": 0.30089153046062406 + }, + "prompt_3": { + "accuracy": 0.3016344725111441 + }, + "prompt_4": { + "accuracy": 0.29940564635958394 + }, + "prompt_5": { + "accuracy": 0.2949479940564636 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3138231631382316, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2857142857142857, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.23333333333333334, + "business_administration": 0.3684210526315789, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.125, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.48148148148148145, + "law": 0.4827586206896552, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.24, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.38461538461538464, + "sports_science": 0.375, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.25925925925925924, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3148148148148148, + "physician": 0.3333333333333333 + } + }, + "prompt_2": { + "accuracy": 0.31880448318804483, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.30952380952380953, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.4827586206896552, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.25, + "college_economics": 0.3, + "business_administration": 0.3684210526315789, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.25, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.24, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.34615384615384615, + "sports_science": 0.4166666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.5, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.2222222222222222, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.3333333333333333, + "physician": 0.37037037037037035 + } + }, + "prompt_3": { + "accuracy": 0.31320049813200496, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.3333333333333333, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.4827586206896552, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.31666666666666665, + "business_administration": 0.3684210526315789, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.125, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.3333333333333333, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.25, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.32, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.3076923076923077, + "sports_science": 0.4166666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.2222222222222222, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3333333333333333, + "physician": 0.35185185185185186 + } + }, + "prompt_4": { + "accuracy": 0.32378580323785805, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.25, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.3333333333333333, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.25, + "college_economics": 0.2833333333333333, + "business_administration": 0.3157894736842105, + "marxism": 0.5, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.375, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.48148148148148145, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.25, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.25, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.2, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.38461538461538464, + "sports_science": 0.5, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.5, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.32378580323785805, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.35714285714285715, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.25, + "college_economics": 0.26666666666666666, + "business_administration": 0.3684210526315789, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.3877551020408163, + "high_school_politics": 0.5, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.48148148148148145, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.25, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.25, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.2, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.36538461538461536, + "sports_science": 0.4166666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.5, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.3333333333333333 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2867383512544803 + }, + "prompt_2": { + "accuracy": 0.3118279569892473 + }, + "prompt_3": { + "accuracy": 0.2939068100358423 + }, + "prompt_4": { + "accuracy": 0.26881720430107525 + }, + "prompt_5": { + "accuracy": 0.2616487455197133 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29554481091348644, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2621951219512195, + "arts": 0.24375, + "astronomy": 0.2606060606060606, + "business_ethics": 0.3062200956937799, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.25077399380804954, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.29608938547486036, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.2830188679245283, + "college_medicine": 0.27472527472527475, + "computer_science": 0.37254901960784315, + "computer_security": 0.3391812865497076, + "conceptual_physics": 0.30612244897959184, + "construction_project_management": 0.3597122302158273, + "economics": 0.3018867924528302, + "education": 0.3067484662576687, + "electrical_engineering": 0.3430232558139535, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.3697478991596639, + "elementary_mathematics": 0.21739130434782608, + "ethnology": 0.2962962962962963, + "food_science": 0.32867132867132864, + "genetics": 0.29545454545454547, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.30158730158730157, + "international_law": 0.3081081081081081, + "journalism": 0.3023255813953488, + "jurisprudence": 0.30413625304136255, + "legal_and_moral_basis": 0.45794392523364486, + "logical": 0.21951219512195122, + "machine_learning": 0.27049180327868855, + "management": 0.29523809523809524, + "marketing": 0.3611111111111111, + "marxist_theory": 0.3333333333333333, + "modern_chinese": 0.25, + "nutrition": 0.31724137931034485, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.28, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.3448275862068966, + "security_study": 0.25925925925925924, + "sociology": 0.3407079646017699, + "sports_science": 0.38181818181818183, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.2781065088757396, + "world_history": 0.2795031055900621, + "world_religions": 0.2875 + } + }, + "prompt_2": { + "accuracy": 0.29554481091348644, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.25, + "ancient_chinese": 0.24390243902439024, + "arts": 0.2375, + "astronomy": 0.26666666666666666, + "business_ethics": 0.3397129186602871, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.32061068702290074, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.36764705882352944, + "chinese_teacher_qualification": 0.2849162011173184, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.25925925925925924, + "college_mathematics": 0.2, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.326007326007326, + "computer_science": 0.35294117647058826, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.3597122302158273, + "economics": 0.27672955974842767, + "education": 0.3006134969325153, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.3487394957983193, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.3037037037037037, + "food_science": 0.2867132867132867, + "genetics": 0.25, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.31952662721893493, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.20121951219512196, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.3006993006993007, + "human_sexuality": 0.29365079365079366, + "international_law": 0.3675675675675676, + "journalism": 0.313953488372093, + "jurisprudence": 0.3236009732360097, + "legal_and_moral_basis": 0.42990654205607476, + "logical": 0.23577235772357724, + "machine_learning": 0.2459016393442623, + "management": 0.30952380952380953, + "marketing": 0.38333333333333336, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.31724137931034485, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.3175355450236967, + "professional_medicine": 0.25, + "professional_psychology": 0.3017241379310345, + "public_relations": 0.3275862068965517, + "security_study": 0.2740740740740741, + "sociology": 0.33185840707964603, + "sports_science": 0.34545454545454546, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.3254437869822485, + "world_history": 0.2732919254658385, + "world_religions": 0.275 + } + }, + "prompt_3": { + "accuracy": 0.2962355379036436, + "category_acc": { + "agronomy": 0.28994082840236685, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.22560975609756098, + "arts": 0.2375, + "astronomy": 0.296969696969697, + "business_ethics": 0.31100478468899523, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.32061068702290074, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.25386996904024767, + "chinese_literature": 0.35784313725490197, + "chinese_teacher_qualification": 0.29608938547486036, + "clinical_knowledge": 0.21940928270042195, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.32710280373831774, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.23148148148148148, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.3333333333333333, + "computer_science": 0.37254901960784315, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.3197278911564626, + "construction_project_management": 0.3381294964028777, + "economics": 0.29559748427672955, + "education": 0.26380368098159507, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.36134453781512604, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.2740740740740741, + "food_science": 0.2867132867132867, + "genetics": 0.26136363636363635, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.20121951219512196, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.3076923076923077, + "human_sexuality": 0.30952380952380953, + "international_law": 0.3567567567567568, + "journalism": 0.29069767441860467, + "jurisprudence": 0.3309002433090024, + "legal_and_moral_basis": 0.4392523364485981, + "logical": 0.23577235772357724, + "machine_learning": 0.26229508196721313, + "management": 0.3, + "marketing": 0.3888888888888889, + "marxist_theory": 0.37037037037037035, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.2896551724137931, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.3275862068965517, + "security_study": 0.23703703703703705, + "sociology": 0.336283185840708, + "sports_science": 0.36363636363636365, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.3136094674556213, + "world_history": 0.2670807453416149, + "world_religions": 0.28125 + } + }, + "prompt_4": { + "accuracy": 0.2889829045069936, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.24390243902439024, + "arts": 0.25625, + "astronomy": 0.22424242424242424, + "business_ethics": 0.32057416267942584, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.2900763358778626, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.26006191950464397, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.2905027932960894, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.2830188679245283, + "college_law": 0.26851851851851855, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.27472527472527475, + "computer_science": 0.37254901960784315, + "computer_security": 0.3216374269005848, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.31654676258992803, + "economics": 0.32075471698113206, + "education": 0.2883435582822086, + "electrical_engineering": 0.3430232558139535, + "elementary_chinese": 0.25793650793650796, + "elementary_commonsense": 0.2222222222222222, + "elementary_information_and_technology": 0.3445378151260504, + "elementary_mathematics": 0.21739130434782608, + "ethnology": 0.3333333333333333, + "food_science": 0.32167832167832167, + "genetics": 0.26136363636363635, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.30952380952380953, + "international_law": 0.2918918918918919, + "journalism": 0.27325581395348836, + "jurisprudence": 0.2871046228710462, + "legal_and_moral_basis": 0.4205607476635514, + "logical": 0.2032520325203252, + "machine_learning": 0.26229508196721313, + "management": 0.2904761904761905, + "marketing": 0.31666666666666665, + "marxist_theory": 0.328042328042328, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.2896551724137931, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.3175355450236967, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.31896551724137934, + "public_relations": 0.3333333333333333, + "security_study": 0.2962962962962963, + "sociology": 0.30973451327433627, + "sports_science": 0.32727272727272727, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.28402366863905326, + "world_history": 0.2732919254658385, + "world_religions": 0.31875 + } + }, + "prompt_5": { + "accuracy": 0.29148678984631327, + "category_acc": { + "agronomy": 0.31952662721893493, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.24390243902439024, + "arts": 0.25625, + "astronomy": 0.23636363636363636, + "business_ethics": 0.3062200956937799, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.2748091603053435, + "chinese_food_culture": 0.3161764705882353, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.2631578947368421, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.2905027932960894, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.2777777777777778, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.2673992673992674, + "computer_science": 0.35294117647058826, + "computer_security": 0.30994152046783624, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3237410071942446, + "economics": 0.2893081761006289, + "education": 0.3006134969325153, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.3445378151260504, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.35555555555555557, + "food_science": 0.32867132867132864, + "genetics": 0.2727272727272727, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.34911242603550297, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.30952380952380953, + "international_law": 0.2918918918918919, + "journalism": 0.3081395348837209, + "jurisprudence": 0.2944038929440389, + "legal_and_moral_basis": 0.4205607476635514, + "logical": 0.2032520325203252, + "machine_learning": 0.28688524590163933, + "management": 0.2904761904761905, + "marketing": 0.3055555555555556, + "marxist_theory": 0.3492063492063492, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.30344827586206896, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.28, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.28879310344827586, + "public_relations": 0.3448275862068966, + "security_study": 0.3111111111111111, + "sociology": 0.30973451327433627, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.28402366863905326, + "world_history": 0.2732919254658385, + "world_religions": 0.30625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.21212121212121213 + }, + "prompt_2": { + "accuracy": 0.21212121212121213 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.2727272727272727 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29545454545454547 + }, + "prompt_2": { + "accuracy": 0.2681818181818182 + }, + "prompt_3": { + "accuracy": 0.19772727272727272 + }, + "prompt_4": { + "accuracy": 0.30227272727272725 + }, + "prompt_5": { + "accuracy": 0.31136363636363634 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3369491525423729 + }, + "prompt_2": { + "accuracy": 0.35322033898305083 + }, + "prompt_3": { + "accuracy": 0.35627118644067796 + }, + "prompt_4": { + "accuracy": 0.3284745762711864 + }, + "prompt_5": { + "accuracy": 0.3335593220338983 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49214659685863876 + }, + "prompt_2": { + "accuracy": 0.48653702318623787 + }, + "prompt_3": { + "accuracy": 0.4925205684367988 + }, + "prompt_4": { + "accuracy": 0.47643979057591623 + }, + "prompt_5": { + "accuracy": 0.4648466716529544 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6805487506124449 + }, + "prompt_2": { + "accuracy": 0.6756491915727585 + }, + "prompt_3": { + "accuracy": 0.6820186183243508 + }, + "prompt_4": { + "accuracy": 0.6751592356687898 + }, + "prompt_5": { + "accuracy": 0.6780989710926016 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3691198162586926, + "rouge2": 0.14736683177001716, + "rougeL": 0.28657506918662473, + "avg_rouge": 0.26768723907177816 + }, + "prompt_2": { + "rouge1": 0.3748377890636475, + "rouge2": 0.14943605082778966, + "rougeL": 0.29076004722816995, + "avg_rouge": 0.27167796237320235 + }, + "prompt_3": { + "rouge1": 0.3587826072709965, + "rouge2": 0.1409981204937326, + "rougeL": 0.2766079915665392, + "avg_rouge": 0.2587962397770894 + }, + "prompt_4": { + "rouge1": 0.3634327519726892, + "rouge2": 0.13864011955478023, + "rougeL": 0.2807269060631162, + "avg_rouge": 0.26093325919686183 + }, + "prompt_5": { + "rouge1": 0.37175607230831326, + "rouge2": 0.14778753816175283, + "rougeL": 0.28977534560321966, + "avg_rouge": 0.26977298535776195 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2359319623476865, + "rouge2": 0.06465979168450203, + "rougeL": 0.17518825056507217, + "avg_rouge": 0.15859333486575355 + }, + "prompt_2": { + "rouge1": 0.23568362116268263, + "rouge2": 0.0636226761304983, + "rougeL": 0.1751394146474463, + "avg_rouge": 0.15814857064687574 + }, + "prompt_3": { + "rouge1": 0.23693035201164248, + "rouge2": 0.06481439300997853, + "rougeL": 0.176383865559464, + "avg_rouge": 0.15937620352702833 + }, + "prompt_4": { + "rouge1": 0.2345783490334219, + "rouge2": 0.0622284933637625, + "rougeL": 0.17300916318210757, + "avg_rouge": 0.15660533519309733 + }, + "prompt_5": { + "rouge1": 0.21064224871895745, + "rouge2": 0.059712177055403735, + "rougeL": 0.17036244193317573, + "avg_rouge": 0.14690562256917897 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.75 + }, + "prompt_2": { + "accuracy": 0.7339449541284404 + }, + "prompt_3": { + "accuracy": 0.7637614678899083 + }, + "prompt_4": { + "accuracy": 0.8027522935779816 + }, + "prompt_5": { + "accuracy": 0.9128440366972477 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.62607861936721 + }, + "prompt_2": { + "accuracy": 0.5877277085330777 + }, + "prompt_3": { + "accuracy": 0.6500479386385427 + }, + "prompt_4": { + "accuracy": 0.6586768935762224 + }, + "prompt_5": { + "accuracy": 0.6510067114093959 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5015 + }, + "prompt_2": { + "accuracy": 0.448 + }, + "prompt_3": { + "accuracy": 0.4905 + }, + "prompt_4": { + "accuracy": 0.4785 + }, + "prompt_5": { + "accuracy": 0.42 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3645 + }, + "prompt_2": { + "accuracy": 0.377 + }, + "prompt_3": { + "accuracy": 0.377 + }, + "prompt_4": { + "accuracy": 0.36 + }, + "prompt_5": { + "accuracy": 0.37 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5105 + }, + "prompt_2": { + "accuracy": 0.5085 + }, + "prompt_3": { + "accuracy": 0.5115 + }, + "prompt_4": { + "accuracy": 0.514 + }, + "prompt_5": { + "accuracy": 0.514 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5492957746478874 + }, + "prompt_2": { + "accuracy": 0.5070422535211268 + }, + "prompt_3": { + "accuracy": 0.4507042253521127 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.43661971830985913 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5234657039711191 + }, + "prompt_2": { + "accuracy": 0.5342960288808665 + }, + "prompt_3": { + "accuracy": 0.5306859205776173 + }, + "prompt_4": { + "accuracy": 0.5270758122743683 + }, + "prompt_5": { + "accuracy": 0.5270758122743683 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46568627450980393 + }, + "prompt_2": { + "accuracy": 0.47549019607843135 + }, + "prompt_3": { + "accuracy": 0.4681372549019608 + }, + "prompt_4": { + "accuracy": 0.4950980392156863 + }, + "prompt_5": { + "accuracy": 0.47794117647058826 + } } }, "five_shot": { @@ -680,53 +6565,1733 @@ "model_link": "https://huggingface.co/google/flan-t5-small", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.26952380952380955, + "language_acc": { + "Malay": 0.23333333333333334, + "English": 0.26666666666666666, + "Vietnamese": 0.3, + "Spanish": 0.3, + "Indonesian": 0.26, + "Filipino": 0.24666666666666667, + "Chinese": 0.28 + }, + "consistency_score_2": 0.4498412698412699, + "consistency_score_3": 0.2584761904761904, + "consistency_score_4": 0.17142857142857143, + "consistency_score_5": 0.12539682539682537, + "consistency_score_6": 0.09809523809523811, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4266666666666667, + "Malay,Vietnamese": 0.5, + "Malay,Spanish": 0.46, + "Malay,Indonesian": 0.62, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.42, + "English,Vietnamese": 0.4, + "English,Spanish": 0.4666666666666667, + "English,Indonesian": 0.4066666666666667, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.43333333333333335, + "Vietnamese,Spanish": 0.4266666666666667, + "Vietnamese,Indonesian": 0.5066666666666667, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Chinese": 0.54, + "Spanish,Indonesian": 0.38666666666666666, + "Spanish,Filipino": 0.44666666666666666, + "Spanish,Chinese": 0.4066666666666667, + "Indonesian,Filipino": 0.43333333333333335, + "Indonesian,Chinese": 0.42, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22666666666666666, + "Malay,English,Spanish": 0.26, + "Malay,English,Indonesian": 0.29333333333333333, + "Malay,English,Filipino": 0.26666666666666666, + "Malay,English,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish": 0.2733333333333333, + "Malay,Vietnamese,Indonesian": 0.36666666666666664, + "Malay,Vietnamese,Filipino": 0.28, + "Malay,Vietnamese,Chinese": 0.3, + "Malay,Spanish,Indonesian": 0.30666666666666664, + "Malay,Spanish,Filipino": 0.26, + "Malay,Spanish,Chinese": 0.22, + "Malay,Indonesian,Filipino": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.3, + "Malay,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish": 0.26, + "English,Vietnamese,Indonesian": 0.22, + "English,Vietnamese,Filipino": 0.25333333333333335, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian": 0.24, + "English,Spanish,Filipino": 0.2733333333333333, + "English,Spanish,Chinese": 0.25333333333333335, + "English,Indonesian,Filipino": 0.24666666666666667, + "English,Indonesian,Chinese": 0.22, + "English,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian": 0.24, + "Vietnamese,Spanish,Filipino": 0.24, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino": 0.28, + "Vietnamese,Indonesian,Chinese": 0.29333333333333333, + "Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Spanish,Indonesian,Filipino": 0.22666666666666666, + "Spanish,Indonesian,Chinese": 0.21333333333333335, + "Spanish,Filipino,Chinese": 0.21333333333333335, + "Indonesian,Filipino,Chinese": 0.22 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian": 0.2, + "Malay,English,Spanish,Filipino": 0.18, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Indonesian,Chinese": 0.16, + "Malay,English,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.22, + "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.16, + "English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "English,Spanish,Filipino,Chinese": 0.16, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + } + }, + "AC3_2": 0.33708317567709406, + "AC3_3": 0.26388442240587284, + "AC3_4": 0.20956494904226158, + "AC3_5": 0.17116061854493078, + "AC3_6": 0.14383913146839808, + "AC3_7": 0.12337874655870636 + }, + "prompt_2": { + "overall_acc": 0.25999999999999995, + "language_acc": { + "Malay": 0.22, + "English": 0.30666666666666664, + "Vietnamese": 0.26, + "Spanish": 0.2866666666666667, + "Indonesian": 0.24666666666666667, + "Filipino": 0.24666666666666667, + "Chinese": 0.25333333333333335 + }, + "consistency_score_2": 0.4380952380952382, + "consistency_score_3": 0.25542857142857134, + "consistency_score_4": 0.1782857142857143, + "consistency_score_5": 0.13936507936507936, + "consistency_score_6": 0.1180952380952381, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.44, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.4866666666666667, + "Malay,Indonesian": 0.6733333333333333, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.35333333333333333, + "English,Vietnamese": 0.38, + "English,Spanish": 0.54, + "English,Indonesian": 0.4, + "English,Filipino": 0.4533333333333333, + "English,Chinese": 0.3466666666666667, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Chinese": 0.3333333333333333, + "Spanish,Indonesian": 0.49333333333333335, + "Spanish,Filipino": 0.5066666666666667, + "Spanish,Chinese": 0.24, + "Indonesian,Filipino": 0.49333333333333335, + "Indonesian,Chinese": 0.38, + "Filipino,Chinese": 0.26666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24, + "Malay,English,Spanish": 0.31333333333333335, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.2733333333333333, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.29333333333333333, + "Malay,Vietnamese,Indonesian": 0.38, + "Malay,Vietnamese,Filipino": 0.3, + "Malay,Vietnamese,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian": 0.37333333333333335, + "Malay,Spanish,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.26666666666666666, + "Malay,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish": 0.28, + "English,Vietnamese,Indonesian": 0.24, + "English,Vietnamese,Filipino": 0.2866666666666667, + "English,Vietnamese,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian": 0.3, + "English,Spanish,Filipino": 0.3333333333333333, + "English,Spanish,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.18666666666666668, + "English,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Vietnamese,Spanish,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.16666666666666666, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.18 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.2, + "Malay,English,Vietnamese,Indonesian": 0.21333333333333335, + "Malay,English,Vietnamese,Filipino": 0.2, + "Malay,English,Vietnamese,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian": 0.26, + "Malay,English,Spanish,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Chinese": 0.14, + "Malay,English,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.18, + "Malay,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.2, + "English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "English,Spanish,Indonesian,Chinese": 0.14, + "English,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Vietnamese,Spanish,Filipino,Chinese": 0.14, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.2, + "Malay,English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.32633015002146704, + "AC3_3": 0.2576940132537732, + "AC3_4": 0.21152542368055155, + "AC3_5": 0.18146263906426013, + "AC3_6": 0.1624181359771942, + "AC3_7": 0.15127272723147106 + }, + "prompt_3": { + "overall_acc": 0.2714285714285714, + "language_acc": { + "Malay": 0.2733333333333333, + "English": 0.2733333333333333, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.2866666666666667, + "Indonesian": 0.25333333333333335, + "Filipino": 0.2733333333333333, + "Chinese": 0.24666666666666667 + }, + "consistency_score_2": 0.392063492063492, + "consistency_score_3": 0.20495238095238094, + "consistency_score_4": 0.13219047619047616, + "consistency_score_5": 0.09619047619047622, + "consistency_score_6": 0.07523809523809523, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.37333333333333335, + "Malay,Vietnamese": 0.36666666666666664, + "Malay,Spanish": 0.4533333333333333, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.3, + "English,Vietnamese": 0.2733333333333333, + "English,Spanish": 0.46, + "English,Indonesian": 0.42, + "English,Filipino": 0.38666666666666666, + "English,Chinese": 0.3, + "Vietnamese,Spanish": 0.36, + "Vietnamese,Indonesian": 0.3466666666666667, + "Vietnamese,Filipino": 0.34, + "Vietnamese,Chinese": 0.31333333333333335, + "Spanish,Indonesian": 0.4666666666666667, + "Spanish,Filipino": 0.48, + "Spanish,Chinese": 0.29333333333333333, + "Indonesian,Filipino": 0.54, + "Indonesian,Chinese": 0.3333333333333333, + "Filipino,Chinese": 0.31333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.14, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.2733333333333333, + "Malay,English,Filipino": 0.2, + "Malay,English,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish": 0.2, + "Malay,Vietnamese,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Chinese": 0.14, + "Malay,Spanish,Indonesian": 0.34, + "Malay,Spanish,Filipino": 0.2733333333333333, + "Malay,Spanish,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.22666666666666666, + "Malay,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish": 0.16666666666666666, + "English,Vietnamese,Indonesian": 0.14666666666666667, + "English,Vietnamese,Filipino": 0.15333333333333332, + "English,Vietnamese,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian": 0.26, + "English,Spanish,Filipino": 0.24, + "English,Spanish,Chinese": 0.16666666666666666, + "English,Indonesian,Filipino": 0.24666666666666667, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian": 0.2, + "Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Chinese": 0.14, + "Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Filipino,Chinese": 0.14, + "Spanish,Indonesian,Filipino": 0.32666666666666666, + "Spanish,Indonesian,Chinese": 0.18666666666666668, + "Spanish,Filipino,Chinese": 0.19333333333333333, + "Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian": 0.20666666666666667, + "Malay,English,Spanish,Filipino": 0.16, + "Malay,English,Spanish,Chinese": 0.12666666666666668, + "Malay,English,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Indonesian,Chinese": 0.12, + "Malay,English,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.14, + "Malay,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.12, + "English,Vietnamese,Spanish,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.08, + "English,Vietnamese,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Spanish,Indonesian,Chinese": 0.12, + "English,Spanish,Filipino,Chinese": 0.12, + "English,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.1, + "Malay,English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.32077922073087367, + "AC3_3": 0.23355229331934055, + "AC3_4": 0.17779275934380176, + "AC3_5": 0.14204293112346353, + "AC3_6": 0.11781789635533901, + "AC3_7": 0.09827586203931332 + }, + "prompt_4": { + "overall_acc": 0.26666666666666666, + "language_acc": { + "Malay": 0.21333333333333335, + "English": 0.3, + "Vietnamese": 0.3, + "Spanish": 0.2866666666666667, + "Indonesian": 0.26, + "Filipino": 0.26666666666666666, + "Chinese": 0.24 + }, + "consistency_score_2": 0.42857142857142855, + "consistency_score_3": 0.24304761904761907, + "consistency_score_4": 0.16666666666666669, + "consistency_score_5": 0.1292063492063492, + "consistency_score_6": 0.10952380952380954, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.43333333333333335, + "Malay,Vietnamese": 0.49333333333333335, + "Malay,Spanish": 0.4266666666666667, + "Malay,Indonesian": 0.6533333333333333, + "Malay,Filipino": 0.4266666666666667, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.38666666666666666, + "English,Spanish": 0.5, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.5133333333333333, + "English,Chinese": 0.32666666666666666, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Filipino": 0.4666666666666667, + "Vietnamese,Chinese": 0.3466666666666667, + "Spanish,Indonesian": 0.44666666666666666, + "Spanish,Filipino": 0.4533333333333333, + "Spanish,Chinese": 0.24666666666666667, + "Indonesian,Filipino": 0.48, + "Indonesian,Chinese": 0.38666666666666666, + "Filipino,Chinese": 0.2866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26, + "Malay,English,Spanish": 0.26, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.29333333333333333, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.28, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian": 0.31333333333333335, + "Malay,Spanish,Filipino": 0.23333333333333334, + "Malay,Spanish,Chinese": 0.15333333333333332, + "Malay,Indonesian,Filipino": 0.3466666666666667, + "Malay,Indonesian,Chinese": 0.26, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.26, + "English,Vietnamese,Filipino": 0.29333333333333333, + "English,Vietnamese,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian": 0.26666666666666666, + "English,Spanish,Filipino": 0.32666666666666666, + "English,Spanish,Chinese": 0.16, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.18666666666666668, + "English,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino": 0.32, + "Vietnamese,Indonesian,Chinese": 0.22, + "Vietnamese,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino": 0.26, + "Spanish,Indonesian,Chinese": 0.16, + "Spanish,Filipino,Chinese": 0.15333333333333332, + "Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.18, + "Malay,English,Vietnamese,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian": 0.21333333333333335, + "Malay,English,Spanish,Filipino": 0.2, + "Malay,English,Spanish,Chinese": 0.12, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.16, + "Malay,English,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian": 0.22, + "Malay,Vietnamese,Spanish,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.22, + "English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.12666666666666668, + "English,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.3287671232403828, + "AC3_3": 0.2543099152465697, + "AC3_4": 0.20512820508086785, + "AC3_5": 0.1740711039389208, + "AC3_6": 0.15527426156210009, + "AC3_7": 0.14545454541487604 + }, + "prompt_5": { + "overall_acc": 0.2761904761904762, + "language_acc": { + "Malay": 0.28, + "English": 0.29333333333333333, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.26666666666666666, + "Indonesian": 0.23333333333333334, + "Filipino": 0.2866666666666667, + "Chinese": 0.28 + }, + "consistency_score_2": 0.4495238095238096, + "consistency_score_3": 0.25504761904761913, + "consistency_score_4": 0.16952380952380958, + "consistency_score_5": 0.12666666666666665, + "consistency_score_6": 0.10190476190476193, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.43333333333333335, + "Malay,Vietnamese": 0.5, + "Malay,Spanish": 0.5, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.4866666666666667, + "Malay,Chinese": 0.3333333333333333, + "English,Vietnamese": 0.3933333333333333, + "English,Spanish": 0.5066666666666667, + "English,Indonesian": 0.4533333333333333, + "English,Filipino": 0.48, + "English,Chinese": 0.34, + "Vietnamese,Spanish": 0.42, + "Vietnamese,Indonesian": 0.43333333333333335, + "Vietnamese,Filipino": 0.44, + "Vietnamese,Chinese": 0.5133333333333333, + "Spanish,Indonesian": 0.49333333333333335, + "Spanish,Filipino": 0.4866666666666667, + "Spanish,Chinese": 0.36666666666666664, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.36, + "Filipino,Chinese": 0.3 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.25333333333333335, + "Malay,English,Spanish": 0.29333333333333333, + "Malay,English,Indonesian": 0.34, + "Malay,English,Filipino": 0.29333333333333333, + "Malay,English,Chinese": 0.16, + "Malay,Vietnamese,Spanish": 0.2866666666666667, + "Malay,Vietnamese,Indonesian": 0.36, + "Malay,Vietnamese,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Chinese": 0.24, + "Malay,Spanish,Indonesian": 0.3933333333333333, + "Malay,Spanish,Filipino": 0.29333333333333333, + "Malay,Spanish,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino": 0.38, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian": 0.32666666666666666, + "English,Spanish,Filipino": 0.32, + "English,Spanish,Chinese": 0.19333333333333333, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian": 0.28, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.24666666666666667, + "Vietnamese,Indonesian,Filipino": 0.24, + "Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Filipino,Chinese": 0.20666666666666667, + "Spanish,Indonesian,Filipino": 0.30666666666666664, + "Spanish,Indonesian,Chinese": 0.20666666666666667, + "Spanish,Filipino,Chinese": 0.18666666666666668, + "Indonesian,Filipino,Chinese": 0.15333333333333332 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.2, + "Malay,English,Vietnamese,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian": 0.26, + "Malay,English,Spanish,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.24, + "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.12, + "Malay,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.16, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino": 0.24666666666666667, + "English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + } + }, + "AC3_2": 0.34215723029905915, + "AC3_3": 0.2651983130508664, + "AC3_4": 0.21009361004647376, + "AC3_5": 0.17368006299865307, + "AC3_6": 0.14887849342350043, + "AC3_7": 0.13193350827510145 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2767857142857143, + "language_acc": { + "English": 0.2897727272727273, + "Vietnamese": 0.29545454545454547, + "Chinese": 0.24431818181818182, + "Indonesian": 0.26704545454545453, + "Filipino": 0.2727272727272727, + "Spanish": 0.2897727272727273, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.39150432900432897, + "consistency_score_3": 0.19740259740259747, + "consistency_score_4": 0.11493506493506493, + "consistency_score_5": 0.0727813852813853, + "consistency_score_6": 0.048701298701298704, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.36363636363636365, + "English,Chinese": 0.26136363636363635, + "English,Indonesian": 0.39204545454545453, + "English,Filipino": 0.42613636363636365, + "English,Spanish": 0.5454545454545454, + "English,Malay": 0.4318181818181818, + "Vietnamese,Chinese": 0.3806818181818182, + "Vietnamese,Indonesian": 0.3409090909090909, + "Vietnamese,Filipino": 0.375, + "Vietnamese,Spanish": 0.3465909090909091, + "Vietnamese,Malay": 0.35795454545454547, + "Chinese,Indonesian": 0.24431818181818182, + "Chinese,Filipino": 0.3181818181818182, + "Chinese,Spanish": 0.3181818181818182, + "Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.4090909090909091, + "Indonesian,Malay": 0.5511363636363636, + "Filipino,Spanish": 0.4715909090909091, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.42613636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1590909090909091, + "English,Vietnamese,Indonesian": 0.14772727272727273, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.21022727272727273, + "English,Vietnamese,Malay": 0.1875, + "English,Chinese,Indonesian": 0.11363636363636363, + "English,Chinese,Filipino": 0.14772727272727273, + "English,Chinese,Spanish": 0.17045454545454544, + "English,Chinese,Malay": 0.13636363636363635, + "English,Indonesian,Filipino": 0.25, + "English,Indonesian,Spanish": 0.25, + "English,Indonesian,Malay": 0.2727272727272727, + "English,Filipino,Spanish": 0.29545454545454547, + "English,Filipino,Malay": 0.2727272727272727, + "English,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "Vietnamese,Chinese,Filipino": 0.17613636363636365, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish": 0.1875, + "Vietnamese,Filipino,Malay": 0.19886363636363635, + "Vietnamese,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino": 0.14772727272727273, + "Chinese,Indonesian,Spanish": 0.13068181818181818, + "Chinese,Indonesian,Malay": 0.16477272727272727, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.17045454545454544, + "Chinese,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.32386363636363635, + "Indonesian,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.2897727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Chinese,Indonesian,Malay": 0.07954545454545454, + "English,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Indonesian,Spanish,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.32429872753967465, + "AC3_3": 0.23044945463913058, + "AC3_4": 0.1624237759339073, + "AC3_5": 0.11525597077640534, + "AC3_6": 0.08282864265356914, + "AC3_7": 0.06070496081598212 + }, + "prompt_2": { + "overall_acc": 0.2800324675324675, + "language_acc": { + "English": 0.29545454545454547, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.2897727272727273, + "Indonesian": 0.2897727272727273, + "Filipino": 0.2556818181818182, + "Spanish": 0.2784090909090909, + "Malay": 0.2840909090909091 + }, + "consistency_score_2": 0.3858225108225108, + "consistency_score_3": 0.18116883116883115, + "consistency_score_4": 0.09237012987012985, + "consistency_score_5": 0.04924242424242423, + "consistency_score_6": 0.027597402597402596, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.375, + "English,Chinese": 0.2784090909090909, + "English,Indonesian": 0.3806818181818182, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.5056818181818182, + "English,Malay": 0.42613636363636365, + "Vietnamese,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian": 0.3693181818181818, + "Vietnamese,Filipino": 0.39204545454545453, + "Vietnamese,Spanish": 0.32954545454545453, + "Vietnamese,Malay": 0.35795454545454547, + "Chinese,Indonesian": 0.2840909090909091, + "Chinese,Filipino": 0.17613636363636365, + "Chinese,Spanish": 0.23863636363636365, + "Chinese,Malay": 0.2840909090909091, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.4090909090909091, + "Indonesian,Malay": 0.6022727272727273, + "Filipino,Spanish": 0.45454545454545453, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.4772727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.08522727272727272, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.20454545454545456, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.17613636363636365, + "English,Chinese,Indonesian": 0.11363636363636363, + "English,Chinese,Filipino": 0.10795454545454546, + "English,Chinese,Spanish": 0.125, + "English,Chinese,Malay": 0.14204545454545456, + "English,Indonesian,Filipino": 0.2556818181818182, + "English,Indonesian,Spanish": 0.22727272727272727, + "English,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Spanish": 0.2556818181818182, + "English,Filipino,Malay": 0.26704545454545453, + "English,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "Vietnamese,Chinese,Filipino": 0.0625, + "Vietnamese,Chinese,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino": 0.11363636363636363, + "Chinese,Indonesian,Spanish": 0.10227272727272728, + "Chinese,Indonesian,Malay": 0.17613636363636365, + "Chinese,Filipino,Spanish": 0.07954545454545454, + "Chinese,Filipino,Malay": 0.10795454545454546, + "Chinese,Spanish,Malay": 0.11931818181818182, + "Indonesian,Filipino,Spanish": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.3806818181818182, + "Indonesian,Spanish,Malay": 0.3181818181818182, + "Filipino,Spanish,Malay": 0.30113636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Indonesian,Filipino,Malay": 0.1875, + "English,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.3245236071851497, + "AC3_3": 0.2200043883832963, + "AC3_4": 0.1389175884770596, + "AC3_5": 0.08375662954056015, + "AC3_6": 0.05024329231139266, + "AC3_7": 0.03213487331258093 + }, + "prompt_3": { + "overall_acc": 0.2840909090909091, + "language_acc": { + "English": 0.30113636363636365, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.29545454545454547, + "Indonesian": 0.3068181818181818, + "Filipino": 0.25, + "Spanish": 0.2840909090909091, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.4932359307359308, + "consistency_score_3": 0.3107142857142858, + "consistency_score_4": 0.21428571428571425, + "consistency_score_5": 0.15530303030303033, + "consistency_score_6": 0.11688311688311688, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3977272727272727, + "English,Chinese": 0.3181818181818182, + "English,Indonesian": 0.45454545454545453, + "English,Filipino": 0.4715909090909091, + "English,Spanish": 0.5511363636363636, + "English,Malay": 0.4715909090909091, + "Vietnamese,Chinese": 0.4034090909090909, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,Spanish": 0.4431818181818182, + "Vietnamese,Malay": 0.5113636363636364, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Filipino": 0.3409090909090909, + "Chinese,Spanish": 0.3125, + "Chinese,Malay": 0.42613636363636365, + "Indonesian,Filipino": 0.6704545454545454, + "Indonesian,Spanish": 0.5454545454545454, + "Indonesian,Malay": 0.7102272727272727, + "Filipino,Spanish": 0.5852272727272727, + "Filipino,Malay": 0.7045454545454546, + "Spanish,Malay": 0.6022727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Filipino": 0.2784090909090909, + "English,Vietnamese,Spanish": 0.2727272727272727, + "English,Vietnamese,Malay": 0.26136363636363635, + "English,Chinese,Indonesian": 0.20454545454545456, + "English,Chinese,Filipino": 0.17613636363636365, + "English,Chinese,Spanish": 0.18181818181818182, + "English,Chinese,Malay": 0.19886363636363635, + "English,Indonesian,Filipino": 0.3522727272727273, + "English,Indonesian,Spanish": 0.3465909090909091, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Filipino,Spanish": 0.3693181818181818, + "English,Filipino,Malay": 0.3806818181818182, + "English,Spanish,Malay": 0.38636363636363635, + "Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "Vietnamese,Chinese,Filipino": 0.2215909090909091, + "Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Filipino": 0.375, + "Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay": 0.4034090909090909, + "Vietnamese,Filipino,Spanish": 0.32954545454545453, + "Vietnamese,Filipino,Malay": 0.4034090909090909, + "Vietnamese,Spanish,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.23295454545454544, + "Chinese,Indonesian,Malay": 0.3352272727272727, + "Chinese,Filipino,Spanish": 0.21022727272727273, + "Chinese,Filipino,Malay": 0.2897727272727273, + "Chinese,Spanish,Malay": 0.2556818181818182, + "Indonesian,Filipino,Spanish": 0.42613636363636365, + "Indonesian,Filipino,Malay": 0.5568181818181818, + "Indonesian,Spanish,Malay": 0.4659090909090909, + "Filipino,Spanish,Malay": 0.48295454545454547 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino": 0.13068181818181818, + "English,Vietnamese,Chinese,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "English,Vietnamese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Spanish,Malay": 0.1590909090909091, + "English,Indonesian,Filipino,Spanish": 0.29545454545454547, + "English,Indonesian,Filipino,Malay": 0.32386363636363635, + "English,Indonesian,Spanish,Malay": 0.3181818181818182, + "English,Filipino,Spanish,Malay": 0.3352272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.2840909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Malay": 0.25, + "Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.39204545454545453 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.125, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + } + }, + "AC3_2": 0.3605274815218252, + "AC3_3": 0.29680676850905213, + "AC3_4": 0.24429967421808185, + "AC3_5": 0.2008228839668347, + "AC3_6": 0.1656238497930708, + "AC3_7": 0.137741046795225 + }, + "prompt_4": { + "overall_acc": 0.28490259740259744, + "language_acc": { + "English": 0.32386363636363635, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.2727272727272727, + "Indonesian": 0.2727272727272727, + "Filipino": 0.25, + "Spanish": 0.32954545454545453, + "Malay": 0.2840909090909091 + }, + "consistency_score_2": 0.39880952380952384, + "consistency_score_3": 0.19269480519480522, + "consistency_score_4": 0.099512987012987, + "consistency_score_5": 0.051677489177489176, + "consistency_score_6": 0.02516233766233766, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3352272727272727, + "English,Chinese": 0.3068181818181818, + "English,Indonesian": 0.36363636363636365, + "English,Filipino": 0.4147727272727273, + "English,Spanish": 0.5056818181818182, + "English,Malay": 0.42045454545454547, + "Vietnamese,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian": 0.4375, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.35795454545454547, + "Vietnamese,Malay": 0.44886363636363635, + "Chinese,Indonesian": 0.26136363636363635, + "Chinese,Filipino": 0.16477272727272727, + "Chinese,Spanish": 0.23295454545454544, + "Chinese,Malay": 0.23863636363636365, + "Indonesian,Filipino": 0.5795454545454546, + "Indonesian,Spanish": 0.4034090909090909, + "Indonesian,Malay": 0.6931818181818182, + "Filipino,Spanish": 0.4943181818181818, + "Filipino,Malay": 0.6136363636363636, + "Spanish,Malay": 0.4659090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.07954545454545454, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.19886363636363635, + "English,Vietnamese,Malay": 0.1875, + "English,Chinese,Indonesian": 0.10795454545454546, + "English,Chinese,Filipino": 0.09090909090909091, + "English,Chinese,Spanish": 0.13068181818181818, + "English,Chinese,Malay": 0.11931818181818182, + "English,Indonesian,Filipino": 0.23863636363636365, + "English,Indonesian,Spanish": 0.2159090909090909, + "English,Indonesian,Malay": 0.2897727272727273, + "English,Filipino,Spanish": 0.26704545454545453, + "English,Filipino,Malay": 0.2727272727272727, + "English,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "Vietnamese,Chinese,Filipino": 0.05113636363636364, + "Vietnamese,Chinese,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Filipino,Spanish": 0.2159090909090909, + "Vietnamese,Filipino,Malay": 0.29545454545454547, + "Vietnamese,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino": 0.09659090909090909, + "Chinese,Indonesian,Spanish": 0.10795454545454546, + "Chinese,Indonesian,Malay": 0.16477272727272727, + "Chinese,Filipino,Spanish": 0.07954545454545454, + "Chinese,Filipino,Malay": 0.08522727272727272, + "Chinese,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish": 0.2897727272727273, + "Indonesian,Filipino,Malay": 0.4659090909090909, + "Indonesian,Spanish,Malay": 0.32954545454545453, + "Filipino,Spanish,Malay": 0.3409090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.0625, + "English,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Filipino,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "Indonesian,Filipino,Spanish,Malay": 0.24431818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.3323675730193961, + "AC3_3": 0.22989760914077137, + "AC3_4": 0.14750446973172365, + "AC3_5": 0.08748616734361521, + "AC3_6": 0.046240735689173244, + "AC3_7": 0.021855541711178424 + }, + "prompt_5": { + "overall_acc": 0.2905844155844156, + "language_acc": { + "English": 0.3465909090909091, + "Vietnamese": 0.29545454545454547, + "Chinese": 0.2727272727272727, + "Indonesian": 0.3181818181818182, + "Filipino": 0.24431818181818182, + "Spanish": 0.2897727272727273, + "Malay": 0.26704545454545453 + }, + "consistency_score_2": 0.3568722943722944, + "consistency_score_3": 0.16233766233766234, + "consistency_score_4": 0.0878246753246753, + "consistency_score_5": 0.05303030303030302, + "consistency_score_6": 0.034090909090909095, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.35795454545454547, + "English,Chinese": 0.2784090909090909, + "English,Indonesian": 0.4034090909090909, + "English,Filipino": 0.4602272727272727, + "English,Spanish": 0.5511363636363636, + "English,Malay": 0.38636363636363635, + "Vietnamese,Chinese": 0.42613636363636365, + "Vietnamese,Indonesian": 0.32954545454545453, + "Vietnamese,Filipino": 0.30113636363636365, + "Vietnamese,Spanish": 0.3465909090909091, + "Vietnamese,Malay": 0.3068181818181818, + "Chinese,Indonesian": 0.19318181818181818, + "Chinese,Filipino": 0.125, + "Chinese,Spanish": 0.1590909090909091, + "Chinese,Malay": 0.14204545454545456, + "Indonesian,Filipino": 0.42045454545454547, + "Indonesian,Spanish": 0.42045454545454547, + "Indonesian,Malay": 0.5568181818181818, + "Filipino,Spanish": 0.4375, + "Filipino,Malay": 0.4602272727272727, + "Spanish,Malay": 0.4318181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.14204545454545456, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.19318181818181818, + "English,Vietnamese,Spanish": 0.21022727272727273, + "English,Vietnamese,Malay": 0.1590909090909091, + "English,Chinese,Indonesian": 0.09659090909090909, + "English,Chinese,Filipino": 0.08522727272727272, + "English,Chinese,Spanish": 0.11931818181818182, + "English,Chinese,Malay": 0.08522727272727272, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.2727272727272727, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.2897727272727273, + "English,Filipino,Malay": 0.23295454545454544, + "English,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "Vietnamese,Chinese,Filipino": 0.0625, + "Vietnamese,Chinese,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Filipino,Spanish": 0.1875, + "Vietnamese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino": 0.056818181818181816, + "Chinese,Indonesian,Spanish": 0.0625, + "Chinese,Indonesian,Malay": 0.09659090909090909, + "Chinese,Filipino,Spanish": 0.056818181818181816, + "Chinese,Filipino,Malay": 0.056818181818181816, + "Chinese,Spanish,Malay": 0.056818181818181816, + "Indonesian,Filipino,Spanish": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.2840909090909091, + "Indonesian,Spanish,Malay": 0.30113636363636365, + "Filipino,Spanish,Malay": 0.24431818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Malay": 0.06818181818181818, + "English,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Chinese,Filipino,Malay": 0.045454545454545456, + "English,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Indonesian,Spanish,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.3203350138709956, + "AC3_3": 0.20830424051631388, + "AC3_4": 0.13488302770452582, + "AC3_5": 0.08969219754011074, + "AC3_6": 0.06102272725393228, + "AC3_7": 0.04215732452728884 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3592233009708738 + }, + "prompt_2": { + "accuracy": 0.36893203883495146 + }, + "prompt_3": { + "accuracy": 0.33980582524271846 + }, + "prompt_4": { + "accuracy": 0.3300970873786408 + }, + "prompt_5": { + "accuracy": 0.34951456310679613 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2571428571428571 + }, + "prompt_2": { + "accuracy": 0.2571428571428571 + }, + "prompt_3": { + "accuracy": 0.2571428571428571 + }, + "prompt_4": { + "accuracy": 0.2571428571428571 + }, + "prompt_5": { + "accuracy": 0.2571428571428571 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27102803738317754 + }, + "prompt_2": { + "accuracy": 0.2803738317757009 + }, + "prompt_3": { + "accuracy": 0.2523364485981308 + }, + "prompt_4": { + "accuracy": 0.27102803738317754 + }, + "prompt_5": { + "accuracy": 0.2523364485981308 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.4, + "demographics": 0.8, + "biology": 0.1, + "history": 0.4666666666666667, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.2, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.4, + "demographics": 0.6, + "biology": 0.1, + "history": 0.4666666666666667, + "literature": 0.2, + "politics": 0.3, + "culture": 0.2, + "film": 0.2, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_3": { + "accuracy": 0.23, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.1, + "law": 0.1, + "geography": 0.3 + } + }, + "prompt_4": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.4, + "demographics": 0.8, + "biology": 0.2, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.2, + "culture": 0.2, + "film": 0.2, + "law": 0.3, + "geography": 0.2 + } + }, + "prompt_5": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.3, + "demographics": 0.8, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.2, + "culture": 0.2, + "film": 0.2, + "law": 0.2, + "geography": 0.2 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17810358145699634 + }, + "prompt_2": { + "bleu_score": 0.175853365660639 + }, + "prompt_3": { + "bleu_score": 0.1801644960912337 + }, + "prompt_4": { + "bleu_score": 0.1753047228176966 + }, + "prompt_5": { + "bleu_score": 0.158706447391941 + } }, "indommlu": { "prompt_1": -1, @@ -736,179 +8301,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.033767259812539335 + }, + "prompt_2": { + "bleu_score": 0.033528715283644796 + }, + "prompt_3": { + "bleu_score": 0.03048363282056134 + }, + "prompt_4": { + "bleu_score": 0.033356071322697016 + }, + "prompt_5": { + "bleu_score": 0.03405930793924453 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.02359603189585223 + }, + "prompt_2": { + "bleu_score": 0.023072025673752772 + }, + "prompt_3": { + "bleu_score": 0.020975755183025524 + }, + "prompt_4": { + "bleu_score": 0.024071487802262984 + }, + "prompt_5": { + "bleu_score": 0.023872960053763232 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0061232164241256165 + }, + "prompt_2": { + "bleu_score": 0.008002461847711183 + }, + "prompt_3": { + "bleu_score": 0.008729631564916076 + }, + "prompt_4": { + "bleu_score": 0.009572599450896885 + }, + "prompt_5": { + "bleu_score": 0.008871463696435702 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.028752553781304813 + }, + "prompt_2": { + "bleu_score": 0.031333239489516315 + }, + "prompt_3": { + "bleu_score": 0.030155058541882915 + }, + "prompt_4": { + "bleu_score": 0.02986152962236264 + }, + "prompt_5": { + "bleu_score": 0.03051794760947034 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29871645274212366 + }, + "prompt_2": { + "accuracy": 0.29638273045507585 + }, + "prompt_3": { + "accuracy": 0.28354725787631274 + }, + "prompt_4": { + "accuracy": 0.294049008168028 + }, + "prompt_5": { + "accuracy": 0.2823803967327888 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29002502681444403, + "category_acc": { + "high_school_european_history": 0.3170731707317073, + "business_ethics": 0.36363636363636365, + "clinical_knowledge": 0.2916666666666667, + "medical_genetics": 0.20202020202020202, + "high_school_us_history": 0.3497536945812808, + "high_school_physics": 0.28, + "high_school_world_history": 0.3177966101694915, + "virology": 0.26666666666666666, + "high_school_microeconomics": 0.3333333333333333, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.2912621359223301, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.26334519572953735, + "philosophy": 0.3, + "professional_medicine": 0.3726937269372694, + "nutrition": 0.3442622950819672, + "global_facts": 0.18181818181818182, + "machine_learning": 0.16216216216216217, + "security_studies": 0.24180327868852458, + "public_relations": 0.3394495412844037, + "professional_psychology": 0.27168576104746317, + "prehistory": 0.29102167182662536, + "anatomy": 0.23134328358208955, + "human_sexuality": 0.34615384615384615, + "college_medicine": 0.2616279069767442, + "high_school_government_and_politics": 0.3333333333333333, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.345679012345679, + "high_school_geography": 0.27411167512690354, + "elementary_mathematics": 0.2546419098143236, + "human_aging": 0.24774774774774774, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.3180147058823529, + "formal_logic": 0.312, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.2833333333333333, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.2727272727272727, + "conceptual_physics": 0.3034188034188034, + "miscellaneous": 0.26982097186700765, + "high_school_chemistry": 0.24752475247524752, + "marketing": 0.4248927038626609, + "professional_law": 0.3039791258969341, + "management": 0.38235294117647056, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.308411214953271, + "world_religions": 0.24705882352941178, + "sociology": 0.325, + "us_foreign_policy": 0.3434343434343434, + "high_school_macroeconomics": 0.30077120822622105, + "computer_security": 0.1919191919191919, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.28695652173913044, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.33774834437086093, + "college_biology": 0.2867132867132867 + } + }, + "prompt_2": { + "accuracy": 0.2936002860207365, + "category_acc": { + "high_school_european_history": 0.32926829268292684, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.32575757575757575, + "medical_genetics": 0.1919191919191919, + "high_school_us_history": 0.3399014778325123, + "high_school_physics": 0.3, + "high_school_world_history": 0.3347457627118644, + "virology": 0.2606060606060606, + "high_school_microeconomics": 0.27848101265822783, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.28802588996763756, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.25622775800711745, + "philosophy": 0.3225806451612903, + "professional_medicine": 0.33579335793357934, + "nutrition": 0.32131147540983607, + "global_facts": 0.29292929292929293, + "machine_learning": 0.27927927927927926, + "security_studies": 0.22950819672131148, + "public_relations": 0.3119266055045872, + "professional_psychology": 0.2765957446808511, + "prehistory": 0.30340557275541796, + "anatomy": 0.22388059701492538, + "human_sexuality": 0.3076923076923077, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.3072916666666667, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.3395061728395062, + "high_school_geography": 0.28426395939086296, + "elementary_mathematics": 0.23076923076923078, + "human_aging": 0.3153153153153153, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.3235294117647059, + "formal_logic": 0.312, + "high_school_statistics": 0.24651162790697675, + "international_law": 0.3333333333333333, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.32323232323232326, + "conceptual_physics": 0.2948717948717949, + "miscellaneous": 0.3069053708439898, + "high_school_chemistry": 0.22277227722772278, + "marketing": 0.44635193133047213, + "professional_law": 0.294194390084801, + "management": 0.3235294117647059, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.32710280373831774, + "world_religions": 0.2529411764705882, + "sociology": 0.35, + "us_foreign_policy": 0.30303030303030304, + "high_school_macroeconomics": 0.29562982005141386, + "computer_security": 0.31313131313131315, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.32463768115942027, + "electrical_engineering": 0.3263888888888889, + "astronomy": 0.2847682119205298, + "college_biology": 0.26573426573426573 + } + }, + "prompt_3": { + "accuracy": 0.285305684662138, + "category_acc": { + "high_school_european_history": 0.3231707317073171, + "business_ethics": 0.30303030303030304, + "clinical_knowledge": 0.29924242424242425, + "medical_genetics": 0.16161616161616163, + "high_school_us_history": 0.3399014778325123, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.3305084745762712, + "virology": 0.3090909090909091, + "high_school_microeconomics": 0.29535864978902954, + "econometrics": 0.1504424778761062, + "college_computer_science": 0.24242424242424243, + "high_school_biology": 0.3106796116504854, + "abstract_algebra": 0.21212121212121213, + "professional_accounting": 0.2313167259786477, + "philosophy": 0.27741935483870966, + "professional_medicine": 0.35793357933579334, + "nutrition": 0.29180327868852457, + "global_facts": 0.2222222222222222, + "machine_learning": 0.2702702702702703, + "security_studies": 0.2581967213114754, + "public_relations": 0.3211009174311927, + "professional_psychology": 0.2635024549918167, + "prehistory": 0.28792569659442724, + "anatomy": 0.21641791044776118, + "human_sexuality": 0.27692307692307694, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.3541666666666667, + "college_chemistry": 0.2727272727272727, + "logical_fallacies": 0.3333333333333333, + "high_school_geography": 0.27918781725888325, + "elementary_mathematics": 0.23076923076923078, + "human_aging": 0.2702702702702703, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.33639705882352944, + "formal_logic": 0.272, + "high_school_statistics": 0.20465116279069767, + "international_law": 0.25, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.23232323232323232, + "conceptual_physics": 0.29914529914529914, + "miscellaneous": 0.27621483375959077, + "high_school_chemistry": 0.19306930693069307, + "marketing": 0.43776824034334766, + "professional_law": 0.2857142857142857, + "management": 0.3431372549019608, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.3364485981308411, + "world_religions": 0.24705882352941178, + "sociology": 0.355, + "us_foreign_policy": 0.30303030303030304, + "high_school_macroeconomics": 0.30848329048843187, + "computer_security": 0.25252525252525254, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.33043478260869563, + "electrical_engineering": 0.3402777777777778, + "astronomy": 0.3576158940397351, + "college_biology": 0.27972027972027974 + } + }, + "prompt_4": { + "accuracy": 0.2908830890239542, + "category_acc": { + "high_school_european_history": 0.34146341463414637, + "business_ethics": 0.29292929292929293, + "clinical_knowledge": 0.2689393939393939, + "medical_genetics": 0.1919191919191919, + "high_school_us_history": 0.37438423645320196, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.3008474576271186, + "virology": 0.26666666666666666, + "high_school_microeconomics": 0.2869198312236287, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.30097087378640774, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.23843416370106763, + "philosophy": 0.2903225806451613, + "professional_medicine": 0.35793357933579334, + "nutrition": 0.32131147540983607, + "global_facts": 0.20202020202020202, + "machine_learning": 0.27927927927927926, + "security_studies": 0.29098360655737704, + "public_relations": 0.3211009174311927, + "professional_psychology": 0.2896890343698854, + "prehistory": 0.29411764705882354, + "anatomy": 0.2835820895522388, + "human_sexuality": 0.3230769230769231, + "college_medicine": 0.27906976744186046, + "high_school_government_and_politics": 0.3072916666666667, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.35802469135802467, + "high_school_geography": 0.2893401015228426, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.26576576576576577, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.31433823529411764, + "formal_logic": 0.272, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.275, + "high_school_mathematics": 0.22304832713754646, + "high_school_computer_science": 0.29292929292929293, + "conceptual_physics": 0.29914529914529914, + "miscellaneous": 0.29411764705882354, + "high_school_chemistry": 0.22772277227722773, + "marketing": 0.3948497854077253, + "professional_law": 0.29093281148075667, + "management": 0.3431372549019608, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.3364485981308411, + "world_religions": 0.24705882352941178, + "sociology": 0.34, + "us_foreign_policy": 0.30303030303030304, + "high_school_macroeconomics": 0.2930591259640103, + "computer_security": 0.29292929292929293, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.3188405797101449, + "electrical_engineering": 0.3333333333333333, + "astronomy": 0.3576158940397351, + "college_biology": 0.3076923076923077 + } + }, + "prompt_5": { + "accuracy": 0.29846263854129423, + "category_acc": { + "high_school_european_history": 0.3902439024390244, + "business_ethics": 0.2828282828282828, + "clinical_knowledge": 0.3143939393939394, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.3645320197044335, + "high_school_physics": 0.22666666666666666, + "high_school_world_history": 0.3135593220338983, + "virology": 0.3090909090909091, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.25252525252525254, + "high_school_biology": 0.32038834951456313, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.298932384341637, + "philosophy": 0.3193548387096774, + "professional_medicine": 0.3062730627306273, + "nutrition": 0.33114754098360655, + "global_facts": 0.3434343434343434, + "machine_learning": 0.3333333333333333, + "security_studies": 0.2459016393442623, + "public_relations": 0.3577981651376147, + "professional_psychology": 0.28477905073649756, + "prehistory": 0.3219814241486068, + "anatomy": 0.26119402985074625, + "human_sexuality": 0.2846153846153846, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.2916666666666667, + "college_chemistry": 0.21212121212121213, + "logical_fallacies": 0.36419753086419754, + "high_school_geography": 0.3096446700507614, + "elementary_mathematics": 0.23076923076923078, + "human_aging": 0.2882882882882883, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.32536764705882354, + "formal_logic": 0.304, + "high_school_statistics": 0.21395348837209302, + "international_law": 0.38333333333333336, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.29292929292929293, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.3145780051150895, + "high_school_chemistry": 0.28217821782178215, + "marketing": 0.44206008583690987, + "professional_law": 0.29093281148075667, + "management": 0.23529411764705882, + "college_physics": 0.16831683168316833, + "jurisprudence": 0.35514018691588783, + "world_religions": 0.27647058823529413, + "sociology": 0.365, + "us_foreign_policy": 0.36363636363636365, + "high_school_macroeconomics": 0.29048843187660667, + "computer_security": 0.31313131313131315, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.3130434782608696, + "electrical_engineering": 0.3333333333333333, + "astronomy": 0.33112582781456956, + "college_biology": 0.2727272727272727 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2473997028231798 + }, + "prompt_2": { + "accuracy": 0.24665676077265974 + }, + "prompt_3": { + "accuracy": 0.24665676077265974 + }, + "prompt_4": { + "accuracy": 0.2436849925705795 + }, + "prompt_5": { + "accuracy": 0.24665676077265974 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.2, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2962962962962963 + } + }, + "prompt_2": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.2, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.2, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2962962962962963 + } + }, + "prompt_4": { + "accuracy": 0.2590286425902864, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.375, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.2, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.2577833125778331, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.21428571428571427, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.2, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2962962962962963 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2078853046594982 + }, + "prompt_2": { + "accuracy": 0.2078853046594982 + }, + "prompt_3": { + "accuracy": 0.2078853046594982 + }, + "prompt_4": { + "accuracy": 0.21505376344086022 + }, + "prompt_5": { + "accuracy": 0.2078853046594982 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24866171645657054, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25, + "astronomy": 0.2727272727272727, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.14953271028037382, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.20754716981132076, + "college_medicine": 0.2271062271062271, + "computer_science": 0.23529411764705882, + "computer_security": 0.24561403508771928, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.2446043165467626, + "economics": 0.2578616352201258, + "education": 0.25153374233128833, + "electrical_engineering": 0.25, + "elementary_chinese": 0.20634920634920634, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.2184873949579832, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2518518518518518, + "food_science": 0.2727272727272727, + "genetics": 0.2727272727272727, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.1893939393939394, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2698412698412698, + "international_law": 0.25405405405405407, + "journalism": 0.2558139534883721, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.23577235772357724, + "machine_learning": 0.2540983606557377, + "management": 0.24761904761904763, + "marketing": 0.2388888888888889, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25, + "nutrition": 0.27586206896551724, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.25, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.23563218390804597, + "security_study": 0.2222222222222222, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.23076923076923078, + "world_history": 0.2608695652173913, + "world_religions": 0.26875 + } + }, + "prompt_2": { + "accuracy": 0.24883439820410982, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25, + "astronomy": 0.2727272727272727, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.25077399380804954, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.14953271028037382, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.20754716981132076, + "college_medicine": 0.2271062271062271, + "computer_science": 0.24019607843137256, + "computer_security": 0.24561403508771928, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.2446043165467626, + "economics": 0.2578616352201258, + "education": 0.25153374233128833, + "electrical_engineering": 0.25, + "elementary_chinese": 0.20634920634920634, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.22268907563025211, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2518518518518518, + "food_science": 0.26573426573426573, + "genetics": 0.26704545454545453, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.19696969696969696, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2698412698412698, + "international_law": 0.25405405405405407, + "journalism": 0.2558139534883721, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.23577235772357724, + "machine_learning": 0.2540983606557377, + "management": 0.24761904761904763, + "marketing": 0.2388888888888889, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25, + "nutrition": 0.27586206896551724, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.25, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.23563218390804597, + "security_study": 0.22962962962962963, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.23076923076923078, + "world_history": 0.2608695652173913, + "world_religions": 0.26875 + } + }, + "prompt_3": { + "accuracy": 0.2490070799516491, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25, + "astronomy": 0.2727272727272727, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.25077399380804954, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.14953271028037382, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.32407407407407407, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.20754716981132076, + "college_medicine": 0.2271062271062271, + "computer_science": 0.23529411764705882, + "computer_security": 0.25146198830409355, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.2446043165467626, + "economics": 0.2578616352201258, + "education": 0.25153374233128833, + "electrical_engineering": 0.25, + "elementary_chinese": 0.20634920634920634, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.22268907563025211, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2518518518518518, + "food_science": 0.2727272727272727, + "genetics": 0.2727272727272727, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.1893939393939394, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2698412698412698, + "international_law": 0.25405405405405407, + "journalism": 0.2558139534883721, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.23577235772357724, + "machine_learning": 0.2540983606557377, + "management": 0.24761904761904763, + "marketing": 0.2388888888888889, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25, + "nutrition": 0.27586206896551724, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.25, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.23563218390804597, + "security_study": 0.2222222222222222, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.23076923076923078, + "world_history": 0.2608695652173913, + "world_religions": 0.26875 + } + }, + "prompt_4": { + "accuracy": 0.24866171645657054, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25, + "astronomy": 0.26666666666666666, + "business_ethics": 0.2679425837320574, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.22137404580152673, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.2549019607843137, + "chinese_teacher_qualification": 0.24581005586592178, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.14953271028037382, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.2169811320754717, + "college_medicine": 0.2271062271062271, + "computer_science": 0.22549019607843138, + "computer_security": 0.25146198830409355, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.2446043165467626, + "economics": 0.2578616352201258, + "education": 0.25153374233128833, + "electrical_engineering": 0.23837209302325582, + "elementary_chinese": 0.20238095238095238, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.22268907563025211, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.24444444444444444, + "food_science": 0.25874125874125875, + "genetics": 0.26704545454545453, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2594594594594595, + "journalism": 0.2558139534883721, + "jurisprudence": 0.26034063260340634, + "legal_and_moral_basis": 0.22897196261682243, + "logical": 0.23577235772357724, + "machine_learning": 0.2459016393442623, + "management": 0.2523809523809524, + "marketing": 0.2388888888888889, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25, + "nutrition": 0.2827586206896552, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.24202127659574468, + "professional_psychology": 0.25, + "public_relations": 0.2413793103448276, + "security_study": 0.23703703703703705, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.25443786982248523, + "world_history": 0.2608695652173913, + "world_religions": 0.26875 + } + }, + "prompt_5": { + "accuracy": 0.24892073907787945, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25, + "astronomy": 0.2727272727272727, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.14953271028037382, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.20754716981132076, + "college_medicine": 0.2271062271062271, + "computer_science": 0.24019607843137256, + "computer_security": 0.25146198830409355, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.2446043165467626, + "economics": 0.2578616352201258, + "education": 0.25153374233128833, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.20634920634920634, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.22268907563025211, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2518518518518518, + "food_science": 0.2727272727272727, + "genetics": 0.26704545454545453, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.19696969696969696, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2698412698412698, + "international_law": 0.25405405405405407, + "journalism": 0.2558139534883721, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.23577235772357724, + "machine_learning": 0.2540983606557377, + "management": 0.24761904761904763, + "marketing": 0.2388888888888889, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25, + "nutrition": 0.27586206896551724, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.25, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.23563218390804597, + "security_study": 0.2222222222222222, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.23076923076923078, + "world_history": 0.2608695652173913, + "world_religions": 0.26875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3939393939393939 + }, + "prompt_2": { + "accuracy": 0.3939393939393939 + }, + "prompt_3": { + "accuracy": 0.3939393939393939 + }, + "prompt_4": { + "accuracy": 0.42424242424242425 + }, + "prompt_5": { + "accuracy": 0.3939393939393939 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.19318181818181818 + }, + "prompt_2": { + "accuracy": 0.20909090909090908 + }, + "prompt_3": { + "accuracy": 0.20454545454545456 + }, + "prompt_4": { + "accuracy": 0.24772727272727274 + }, + "prompt_5": { + "accuracy": 0.2409090909090909 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3447457627118644 + }, + "prompt_2": { + "accuracy": 0.3247457627118644 + }, + "prompt_3": { + "accuracy": 0.32305084745762713 + }, + "prompt_4": { + "accuracy": 0.3352542372881356 + }, + "prompt_5": { + "accuracy": 0.3352542372881356 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2830964846671653 + }, + "prompt_2": { + "accuracy": 0.27973074046372476 + }, + "prompt_3": { + "accuracy": 0.2801047120418848 + }, + "prompt_4": { + "accuracy": 0.28272251308900526 + }, + "prompt_5": { + "accuracy": 0.27973074046372476 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5110240078392945 + }, + "prompt_2": { + "accuracy": 0.5193532582067614 + }, + "prompt_3": { + "accuracy": 0.514453699167075 + }, + "prompt_4": { + "accuracy": 0.5149436550710436 + }, + "prompt_5": { + "accuracy": 0.5085742283194512 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.43450453690012114, + "rouge2": 0.18942558216936187, + "rougeL": 0.35498599298997957, + "avg_rouge": 0.3263053706864875 + }, + "prompt_2": { + "rouge1": 0.43316188995128135, + "rouge2": 0.1859387460379173, + "rougeL": 0.35283300861368344, + "avg_rouge": 0.323977881534294 + }, + "prompt_3": { + "rouge1": 0.4305864054637615, + "rouge2": 0.18463150575706694, + "rougeL": 0.35001602147764865, + "avg_rouge": 0.3217446442328257 + }, + "prompt_4": { + "rouge1": 0.4308126196532795, + "rouge2": 0.18564091697548368, + "rougeL": 0.3526767158387147, + "avg_rouge": 0.32304341748915927 + }, + "prompt_5": { + "rouge1": 0.43068054743349354, + "rouge2": 0.18547624892916834, + "rougeL": 0.3527324872938922, + "avg_rouge": 0.3229630945521847 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.23422070387031205, + "rouge2": 0.06710442971197686, + "rougeL": 0.1777099524618065, + "avg_rouge": 0.15967836201469846 + }, + "prompt_2": { + "rouge1": 0.2490774442492666, + "rouge2": 0.07508282935067671, + "rougeL": 0.19008917364613892, + "avg_rouge": 0.17141648241536075 + }, + "prompt_3": { + "rouge1": 0.23788823335724005, + "rouge2": 0.07055380047877649, + "rougeL": 0.1814952716605705, + "avg_rouge": 0.163312435165529 + }, + "prompt_4": { + "rouge1": 0.2551843098593104, + "rouge2": 0.07677293900653273, + "rougeL": 0.19966609396283927, + "avg_rouge": 0.17720778094289413 + }, + "prompt_5": { + "rouge1": 0.22551405106901598, + "rouge2": 0.063700826548528, + "rougeL": 0.16823416395970445, + "avg_rouge": 0.1524830138590828 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.856651376146789 + }, + "prompt_2": { + "accuracy": 0.8486238532110092 + }, + "prompt_3": { + "accuracy": 0.8474770642201835 + }, + "prompt_4": { + "accuracy": 0.8577981651376146 + }, + "prompt_5": { + "accuracy": 0.8692660550458715 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6720997123681688 + }, + "prompt_2": { + "accuracy": 0.6663470757430489 + }, + "prompt_3": { + "accuracy": 0.6816874400767018 + }, + "prompt_4": { + "accuracy": 0.6644295302013423 + }, + "prompt_5": { + "accuracy": 0.6519654841802492 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4745 + }, + "prompt_2": { + "accuracy": 0.4775 + }, + "prompt_3": { + "accuracy": 0.447 + }, + "prompt_4": { + "accuracy": 0.499 + }, + "prompt_5": { + "accuracy": 0.4935 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.361 + }, + "prompt_2": { + "accuracy": 0.358 + }, + "prompt_3": { + "accuracy": 0.3615 + }, + "prompt_4": { + "accuracy": 0.36 + }, + "prompt_5": { + "accuracy": 0.356 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4215 + }, + "prompt_2": { + "accuracy": 0.4605 + }, + "prompt_3": { + "accuracy": 0.4245 + }, + "prompt_4": { + "accuracy": 0.437 + }, + "prompt_5": { + "accuracy": 0.434 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5774647887323944 + }, + "prompt_2": { + "accuracy": 0.6197183098591549 + }, + "prompt_3": { + "accuracy": 0.5492957746478874 + }, + "prompt_4": { + "accuracy": 0.5633802816901409 + }, + "prompt_5": { + "accuracy": 0.5633802816901409 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4657039711191336 + }, + "prompt_2": { + "accuracy": 0.49097472924187724 + }, + "prompt_3": { + "accuracy": 0.4259927797833935 + }, + "prompt_4": { + "accuracy": 0.45126353790613716 + }, + "prompt_5": { + "accuracy": 0.4584837545126354 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6740196078431373 + }, + "prompt_2": { + "accuracy": 0.6666666666666666 + }, + "prompt_3": { + "accuracy": 0.6813725490196079 + }, + "prompt_4": { + "accuracy": 0.48284313725490197 + }, + "prompt_5": { + "accuracy": 0.5906862745098039 + } } }, "five_shot": { @@ -1018,55 +9773,1735 @@ "model_link": "https://huggingface.co/google/flan-t5-base", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.31333333333333335, + "language_acc": { + "Malay": 0.2866666666666667, + "English": 0.41333333333333333, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.34, + "Indonesian": 0.26666666666666666, + "Filipino": 0.2866666666666667, + "Chinese": 0.30666666666666664 + }, + "consistency_score_2": 0.42920634920634915, + "consistency_score_3": 0.24209523809523809, + "consistency_score_4": 0.15885714285714284, + "consistency_score_5": 0.1123809523809524, + "consistency_score_6": 0.08190476190476191, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.42, + "Malay,Vietnamese": 0.4666666666666667, + "Malay,Spanish": 0.41333333333333333, + "Malay,Indonesian": 0.5866666666666667, + "Malay,Filipino": 0.5933333333333334, + "Malay,Chinese": 0.37333333333333335, + "English,Vietnamese": 0.37333333333333335, + "English,Spanish": 0.48, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.52, + "English,Chinese": 0.32666666666666666, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.42, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Chinese": 0.36666666666666664, + "Spanish,Indonesian": 0.44666666666666666, + "Spanish,Filipino": 0.43333333333333335, + "Spanish,Chinese": 0.25333333333333335, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.32666666666666666, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24666666666666667, + "Malay,English,Spanish": 0.26, + "Malay,English,Indonesian": 0.3, + "Malay,English,Filipino": 0.3333333333333333, + "Malay,English,Chinese": 0.16, + "Malay,Vietnamese,Spanish": 0.28, + "Malay,Vietnamese,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian": 0.3, + "Malay,Spanish,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese": 0.14, + "Malay,Indonesian,Filipino": 0.3933333333333333, + "Malay,Indonesian,Chinese": 0.22, + "Malay,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish": 0.26666666666666666, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Filipino": 0.2733333333333333, + "English,Vietnamese,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian": 0.26666666666666666, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.14, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.15333333333333332, + "English,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian": 0.28, + "Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese": 0.18, + "Vietnamese,Filipino,Chinese": 0.20666666666666667, + "Spanish,Indonesian,Filipino": 0.2733333333333333, + "Spanish,Indonesian,Chinese": 0.14, + "Spanish,Filipino,Chinese": 0.14666666666666667, + "Indonesian,Filipino,Chinese": 0.2 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian": 0.2, + "Malay,English,Spanish,Filipino": 0.22, + "Malay,English,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.1, + "English,Vietnamese,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino": 0.2, + "English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Spanish,Filipino,Chinese": 0.1, + "English,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.08, + "Malay,English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.36222887268885473, + "AC3_3": 0.27314586186211653, + "AC3_4": 0.2108269463046857, + "AC3_5": 0.16542878445032808, + "AC3_6": 0.12986345378240402, + "AC3_7": 0.10071428568730867 + }, + "prompt_2": { + "overall_acc": 0.31333333333333335, + "language_acc": { + "Malay": 0.30666666666666664, + "English": 0.41333333333333333, + "Vietnamese": 0.3, + "Spanish": 0.3333333333333333, + "Indonesian": 0.29333333333333333, + "Filipino": 0.3, + "Chinese": 0.24666666666666667 + }, + "consistency_score_2": 0.41555555555555557, + "consistency_score_3": 0.22704761904761905, + "consistency_score_4": 0.144952380952381, + "consistency_score_5": 0.10253968253968254, + "consistency_score_6": 0.07714285714285715, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38666666666666666, + "Malay,Vietnamese": 0.5066666666666667, + "Malay,Spanish": 0.44666666666666666, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Filipino": 0.52, + "Malay,Chinese": 0.34, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.4666666666666667, + "English,Indonesian": 0.38, + "English,Filipino": 0.49333333333333335, + "English,Chinese": 0.25333333333333335, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.44, + "Vietnamese,Filipino": 0.43333333333333335, + "Vietnamese,Chinese": 0.28, + "Spanish,Indonesian": 0.4866666666666667, + "Spanish,Filipino": 0.4666666666666667, + "Spanish,Chinese": 0.29333333333333333, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.32666666666666666, + "Filipino,Chinese": 0.3 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.2866666666666667, + "Malay,English,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish": 0.30666666666666664, + "Malay,Vietnamese,Indonesian": 0.3466666666666667, + "Malay,Vietnamese,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian": 0.34, + "Malay,Spanish,Filipino": 0.2866666666666667, + "Malay,Spanish,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.21333333333333335, + "Malay,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish": 0.26666666666666666, + "English,Vietnamese,Indonesian": 0.22, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian": 0.25333333333333335, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.14, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.12, + "English,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Spanish,Indonesian,Filipino": 0.2866666666666667, + "Spanish,Indonesian,Chinese": 0.17333333333333334, + "Spanish,Filipino,Chinese": 0.15333333333333332, + "Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.2, + "Malay,English,Vietnamese,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino": 0.2, + "Malay,English,Vietnamese,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Chinese": 0.08, + "Malay,English,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Indonesian,Chinese": 0.1, + "Malay,English,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.1, + "Malay,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Spanish,Filipino,Chinese": 0.08666666666666667, + "English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.35727642271521104, + "AC3_3": 0.26330160963286336, + "AC3_4": 0.19821003043610688, + "AC3_5": 0.15451399487378736, + "AC3_6": 0.12380487801707436, + "AC3_7": 0.10071428568730867 + }, + "prompt_3": { + "overall_acc": 0.32571428571428573, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.4066666666666667, + "Vietnamese": 0.32666666666666666, + "Spanish": 0.3466666666666667, + "Indonesian": 0.29333333333333333, + "Filipino": 0.3, + "Chinese": 0.28 + }, + "consistency_score_2": 0.4438095238095238, + "consistency_score_3": 0.2624761904761905, + "consistency_score_4": 0.17923809523809522, + "consistency_score_5": 0.1323809523809524, + "consistency_score_6": 0.10190476190476191, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.46, + "Malay,Vietnamese": 0.5333333333333333, + "Malay,Spanish": 0.4266666666666667, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Filipino": 0.5266666666666666, + "Malay,Chinese": 0.3933333333333333, + "English,Vietnamese": 0.38, + "English,Spanish": 0.5133333333333333, + "English,Indonesian": 0.43333333333333335, + "English,Filipino": 0.54, + "English,Chinese": 0.3466666666666667, + "Vietnamese,Spanish": 0.4666666666666667, + "Vietnamese,Indonesian": 0.4533333333333333, + "Vietnamese,Filipino": 0.41333333333333333, + "Vietnamese,Chinese": 0.32666666666666666, + "Spanish,Indonesian": 0.44666666666666666, + "Spanish,Filipino": 0.4666666666666667, + "Spanish,Chinese": 0.3466666666666667, + "Indonesian,Filipino": 0.48, + "Indonesian,Chinese": 0.36666666666666664, + "Filipino,Chinese": 0.35333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2866666666666667, + "Malay,English,Spanish": 0.2733333333333333, + "Malay,English,Indonesian": 0.34, + "Malay,English,Filipino": 0.34, + "Malay,English,Chinese": 0.22, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.36666666666666664, + "Malay,Vietnamese,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian": 0.3333333333333333, + "Malay,Spanish,Filipino": 0.3, + "Malay,Spanish,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.38666666666666666, + "Malay,Indonesian,Chinese": 0.24666666666666667, + "Malay,Filipino,Chinese": 0.22, + "English,Vietnamese,Spanish": 0.29333333333333333, + "English,Vietnamese,Indonesian": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.28, + "English,Vietnamese,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian": 0.28, + "English,Spanish,Filipino": 0.3466666666666667, + "English,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Indonesian,Chinese": 0.18666666666666668, + "English,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Filipino": 0.26, + "Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Spanish,Indonesian,Filipino": 0.2866666666666667, + "Spanish,Indonesian,Chinese": 0.2, + "Spanish,Filipino,Chinese": 0.20666666666666667, + "Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22, + "Malay,English,Vietnamese,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Filipino": 0.24, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Spanish,Indonesian": 0.26, + "Malay,Vietnamese,Spanish,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.22, + "English,Vietnamese,Spanish,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.12, + "English,Vietnamese,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Spanish,Filipino,Chinese": 0.16666666666666666, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + } + }, + "AC3_2": 0.37570014139389335, + "AC3_3": 0.29069578085361275, + "AC3_4": 0.23123134122704236, + "AC3_5": 0.18825066820957406, + "AC3_6": 0.1552402163174987, + "AC3_7": 0.12845070419369173 + }, + "prompt_4": { + "overall_acc": 0.3304761904761905, + "language_acc": { + "Malay": 0.30666666666666664, + "English": 0.4, + "Vietnamese": 0.3333333333333333, + "Spanish": 0.3466666666666667, + "Indonesian": 0.30666666666666664, + "Filipino": 0.31333333333333335, + "Chinese": 0.30666666666666664 + }, + "consistency_score_2": 0.4622222222222221, + "consistency_score_3": 0.2811428571428571, + "consistency_score_4": 0.2005714285714286, + "consistency_score_5": 0.15777777777777777, + "consistency_score_6": 0.13142857142857142, + "consistency_score_7": 0.11333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.42, + "Malay,Vietnamese": 0.5333333333333333, + "Malay,Spanish": 0.44, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Filipino": 0.6, + "Malay,Chinese": 0.4533333333333333, + "English,Vietnamese": 0.38666666666666666, + "English,Spanish": 0.5066666666666667, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.52, + "English,Chinese": 0.36666666666666664, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian": 0.4866666666666667, + "Vietnamese,Filipino": 0.4533333333333333, + "Vietnamese,Chinese": 0.42, + "Spanish,Indonesian": 0.49333333333333335, + "Spanish,Filipino": 0.4666666666666667, + "Spanish,Chinese": 0.30666666666666664, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.4066666666666667, + "Filipino,Chinese": 0.3933333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26666666666666666, + "Malay,English,Spanish": 0.29333333333333333, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.3333333333333333, + "Malay,English,Chinese": 0.2, + "Malay,Vietnamese,Spanish": 0.32, + "Malay,Vietnamese,Indonesian": 0.4, + "Malay,Vietnamese,Filipino": 0.36666666666666664, + "Malay,Vietnamese,Chinese": 0.3, + "Malay,Spanish,Indonesian": 0.36, + "Malay,Spanish,Filipino": 0.31333333333333335, + "Malay,Spanish,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.4266666666666667, + "Malay,Indonesian,Chinese": 0.31333333333333335, + "Malay,Filipino,Chinese": 0.3, + "English,Vietnamese,Spanish": 0.2866666666666667, + "English,Vietnamese,Indonesian": 0.24666666666666667, + "English,Vietnamese,Filipino": 0.26, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian": 0.29333333333333333, + "English,Spanish,Filipino": 0.34, + "English,Spanish,Chinese": 0.19333333333333333, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Indonesian,Chinese": 0.21333333333333335, + "English,Filipino,Chinese": 0.22, + "Vietnamese,Spanish,Indonesian": 0.31333333333333335, + "Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.20666666666666667, + "Spanish,Filipino,Chinese": 0.18666666666666668, + "Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian": 0.25333333333333335, + "Malay,English,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.2733333333333333, + "Malay,English,Indonesian,Chinese": 0.18, + "Malay,English,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.18, + "Malay,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.22, + "English,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino": 0.24, + "English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + } + }, + "AC3_2": 0.3854011479957009, + "AC3_3": 0.30381990474189, + "AC3_4": 0.2496351710946193, + "AC3_5": 0.21358474204554884, + "AC3_6": 0.18806480113748816, + "AC3_7": 0.16878397707212664 + }, + "prompt_5": { + "overall_acc": 0.3019047619047619, + "language_acc": { + "Malay": 0.29333333333333333, + "English": 0.36666666666666664, + "Vietnamese": 0.2866666666666667, + "Spanish": 0.3, + "Indonesian": 0.26666666666666666, + "Filipino": 0.3, + "Chinese": 0.3 + }, + "consistency_score_2": 0.44095238095238093, + "consistency_score_3": 0.2571428571428571, + "consistency_score_4": 0.17333333333333334, + "consistency_score_5": 0.126031746031746, + "consistency_score_6": 0.09523809523809523, + "consistency_score_7": 0.07333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.42, + "Malay,Vietnamese": 0.5066666666666667, + "Malay,Spanish": 0.46, + "Malay,Indonesian": 0.6533333333333333, + "Malay,Filipino": 0.5133333333333333, + "Malay,Chinese": 0.44, + "English,Vietnamese": 0.34, + "English,Spanish": 0.5066666666666667, + "English,Indonesian": 0.38666666666666666, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.26666666666666666, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Chinese": 0.46, + "Spanish,Indonesian": 0.48, + "Spanish,Filipino": 0.4533333333333333, + "Spanish,Chinese": 0.31333333333333335, + "Indonesian,Filipino": 0.49333333333333335, + "Indonesian,Chinese": 0.4, + "Filipino,Chinese": 0.36666666666666664 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22666666666666666, + "Malay,English,Spanish": 0.3, + "Malay,English,Indonesian": 0.31333333333333335, + "Malay,English,Filipino": 0.29333333333333333, + "Malay,English,Chinese": 0.18, + "Malay,Vietnamese,Spanish": 0.28, + "Malay,Vietnamese,Indonesian": 0.38, + "Malay,Vietnamese,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian": 0.36, + "Malay,Spanish,Filipino": 0.29333333333333333, + "Malay,Spanish,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Indonesian": 0.22, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian": 0.26666666666666666, + "English,Spanish,Filipino": 0.2866666666666667, + "English,Spanish,Chinese": 0.16, + "English,Indonesian,Filipino": 0.26, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Vietnamese,Spanish,Filipino": 0.26, + "Vietnamese,Spanish,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino": 0.3, + "Spanish,Indonesian,Chinese": 0.21333333333333335, + "Spanish,Filipino,Chinese": 0.18, + "Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.18, + "Malay,English,Vietnamese,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Spanish,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Chinese": 0.12, + "Malay,English,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.18, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Spanish,Indonesian,Chinese": 0.12, + "English,Spanish,Filipino,Chinese": 0.13333333333333333, + "English,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.14, + "Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + } + }, + "AC3_2": 0.35841514036689215, + "AC3_3": 0.2777318081761662, + "AC3_4": 0.22022712086214327, + "AC3_5": 0.1778281757391432, + "AC3_6": 0.14479844692026028, + "AC3_7": 0.11800338406330704 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.27516233766233766, + "language_acc": { + "English": 0.42045454545454547, + "Vietnamese": 0.23295454545454544, + "Chinese": 0.23295454545454544, + "Indonesian": 0.24431818181818182, + "Filipino": 0.26704545454545453, + "Spanish": 0.2897727272727273, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.351461038961039, + "consistency_score_3": 0.1529220779220779, + "consistency_score_4": 0.07499999999999998, + "consistency_score_5": 0.03896103896103895, + "consistency_score_6": 0.022727272727272728, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.23863636363636365, + "English,Chinese": 0.25, + "English,Indonesian": 0.26136363636363635, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.3693181818181818, + "English,Malay": 0.29545454545454547, + "Vietnamese,Chinese": 0.1875, + "Vietnamese,Indonesian": 0.4375, + "Vietnamese,Filipino": 0.45454545454545453, + "Vietnamese,Spanish": 0.3068181818181818, + "Vietnamese,Malay": 0.4659090909090909, + "Chinese,Indonesian": 0.26704545454545453, + "Chinese,Filipino": 0.26136363636363635, + "Chinese,Spanish": 0.2556818181818182, + "Chinese,Malay": 0.24431818181818182, + "Indonesian,Filipino": 0.48295454545454547, + "Indonesian,Spanish": 0.42613636363636365, + "Indonesian,Malay": 0.5340909090909091, + "Filipino,Spanish": 0.4431818181818182, + "Filipino,Malay": 0.4772727272727273, + "Spanish,Malay": 0.3693181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.05113636363636364, + "English,Vietnamese,Indonesian": 0.10227272727272728, + "English,Vietnamese,Filipino": 0.13636363636363635, + "English,Vietnamese,Spanish": 0.09659090909090909, + "English,Vietnamese,Malay": 0.125, + "English,Chinese,Indonesian": 0.07386363636363637, + "English,Chinese,Filipino": 0.10227272727272728, + "English,Chinese,Spanish": 0.08522727272727272, + "English,Chinese,Malay": 0.09659090909090909, + "English,Indonesian,Filipino": 0.1534090909090909, + "English,Indonesian,Spanish": 0.1534090909090909, + "English,Indonesian,Malay": 0.14772727272727273, + "English,Filipino,Spanish": 0.19886363636363635, + "English,Filipino,Malay": 0.17613636363636365, + "English,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian": 0.07954545454545454, + "Vietnamese,Chinese,Filipino": 0.09090909090909091, + "Vietnamese,Chinese,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino": 0.13636363636363635, + "Chinese,Indonesian,Spanish": 0.11931818181818182, + "Chinese,Indonesian,Malay": 0.1534090909090909, + "Chinese,Filipino,Spanish": 0.11363636363636363, + "Chinese,Filipino,Malay": 0.13068181818181818, + "Chinese,Spanish,Malay": 0.10227272727272728, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Malay": 0.30113636363636365, + "Indonesian,Spanish,Malay": 0.24431818181818182, + "Filipino,Spanish,Malay": 0.2215909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Malay": 0.06818181818181818, + "English,Vietnamese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino": 0.045454545454545456, + "English,Chinese,Indonesian,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Filipino,Malay": 0.0625, + "English,Chinese,Spanish,Malay": 0.045454545454545456, + "English,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Chinese,Filipino,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.3086665600745553, + "AC3_3": 0.19658924688462084, + "AC3_4": 0.11787204447259655, + "AC3_5": 0.06825732404954808, + "AC3_6": 0.04198662371639825, + "AC3_7": 0.032102272716286614 + }, + "prompt_2": { + "overall_acc": 0.27029220779220775, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.23295454545454544, + "Chinese": 0.25, + "Indonesian": 0.22727272727272727, + "Filipino": 0.2556818181818182, + "Spanish": 0.2784090909090909, + "Malay": 0.23295454545454544 + }, + "consistency_score_2": 0.35984848484848486, + "consistency_score_3": 0.15925324675324679, + "consistency_score_4": 0.07808441558441559, + "consistency_score_5": 0.04004329004329005, + "consistency_score_6": 0.021103896103896104, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.25, + "English,Chinese": 0.26136363636363635, + "English,Indonesian": 0.2897727272727273, + "English,Filipino": 0.36363636363636365, + "English,Spanish": 0.3693181818181818, + "English,Malay": 0.26704545454545453, + "Vietnamese,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian": 0.4715909090909091, + "Vietnamese,Filipino": 0.5625, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Malay": 0.5113636363636364, + "Chinese,Indonesian": 0.2215909090909091, + "Chinese,Filipino": 0.20454545454545456, + "Chinese,Spanish": 0.2556818181818182, + "Chinese,Malay": 0.19318181818181818, + "Indonesian,Filipino": 0.48295454545454547, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Malay": 0.5909090909090909, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.4715909090909091, + "Spanish,Malay": 0.38636363636363635 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.056818181818181816, + "English,Vietnamese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Filipino": 0.17045454545454544, + "English,Vietnamese,Spanish": 0.11931818181818182, + "English,Vietnamese,Malay": 0.125, + "English,Chinese,Indonesian": 0.0625, + "English,Chinese,Filipino": 0.09090909090909091, + "English,Chinese,Spanish": 0.09090909090909091, + "English,Chinese,Malay": 0.0625, + "English,Indonesian,Filipino": 0.17613636363636365, + "English,Indonesian,Spanish": 0.18181818181818182, + "English,Indonesian,Malay": 0.1590909090909091, + "English,Filipino,Spanish": 0.17613636363636365, + "English,Filipino,Malay": 0.1590909090909091, + "English,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian": 0.09659090909090909, + "Vietnamese,Chinese,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino": 0.3125, + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay": 0.3352272727272727, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.3352272727272727, + "Vietnamese,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino": 0.09659090909090909, + "Chinese,Indonesian,Spanish": 0.125, + "Chinese,Indonesian,Malay": 0.10795454545454546, + "Chinese,Filipino,Spanish": 0.10795454545454546, + "Chinese,Filipino,Malay": 0.07386363636363637, + "Chinese,Spanish,Malay": 0.09090909090909091, + "Indonesian,Filipino,Spanish": 0.2556818181818182, + "Indonesian,Filipino,Malay": 0.32386363636363635, + "Indonesian,Spanish,Malay": 0.2840909090909091, + "Filipino,Spanish,Malay": 0.2159090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Malay": 0.022727272727272728, + "English,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Chinese,Indonesian,Spanish,Malay": 0.0625, + "Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.3087064287733683, + "AC3_3": 0.2004207526700123, + "AC3_4": 0.12116547242379588, + "AC3_5": 0.06975282779486737, + "AC3_6": 0.03915096044671701, + "AC3_7": 0.021810322234855295 + }, + "prompt_3": { + "overall_acc": 0.27191558441558444, + "language_acc": { + "English": 0.4034090909090909, + "Vietnamese": 0.24431818181818182, + "Chinese": 0.23295454545454544, + "Indonesian": 0.2215909090909091, + "Filipino": 0.26136363636363635, + "Spanish": 0.30113636363636365, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.35714285714285715, + "consistency_score_3": 0.15633116883116882, + "consistency_score_4": 0.075, + "consistency_score_5": 0.036525974025974024, + "consistency_score_6": 0.017045454545454548, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.23863636363636365, + "English,Chinese": 0.22727272727272727, + "English,Indonesian": 0.29545454545454547, + "English,Filipino": 0.3409090909090909, + "English,Spanish": 0.4147727272727273, + "English,Malay": 0.2784090909090909, + "Vietnamese,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Filipino": 0.5340909090909091, + "Vietnamese,Spanish": 0.3181818181818182, + "Vietnamese,Malay": 0.5340909090909091, + "Chinese,Indonesian": 0.18181818181818182, + "Chinese,Filipino": 0.24431818181818182, + "Chinese,Spanish": 0.2556818181818182, + "Chinese,Malay": 0.18181818181818182, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.4318181818181818, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.36363636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.045454545454545456, + "English,Vietnamese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Filipino": 0.16477272727272727, + "English,Vietnamese,Spanish": 0.125, + "English,Vietnamese,Malay": 0.13636363636363635, + "English,Chinese,Indonesian": 0.03977272727272727, + "English,Chinese,Filipino": 0.07386363636363637, + "English,Chinese,Spanish": 0.09090909090909091, + "English,Chinese,Malay": 0.056818181818181816, + "English,Indonesian,Filipino": 0.18181818181818182, + "English,Indonesian,Spanish": 0.18181818181818182, + "English,Indonesian,Malay": 0.17613636363636365, + "English,Filipino,Spanish": 0.1875, + "English,Filipino,Malay": 0.16477272727272727, + "English,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian": 0.08522727272727272, + "Vietnamese,Chinese,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino": 0.3125, + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay": 0.3522727272727273, + "Vietnamese,Filipino,Spanish": 0.2159090909090909, + "Vietnamese,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino": 0.08522727272727272, + "Chinese,Indonesian,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Malay": 0.07386363636363637, + "Chinese,Filipino,Spanish": 0.11931818181818182, + "Chinese,Filipino,Malay": 0.09090909090909091, + "Chinese,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish": 0.24431818181818182, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.2556818181818182, + "Filipino,Spanish,Malay": 0.21022727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Chinese,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Indonesian,Filipino,Spanish": 0.125, + "English,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + } + }, + "AC3_2": 0.3087557603195814, + "AC3_3": 0.19852517644508114, + "AC3_4": 0.11757136168814974, + "AC3_5": 0.06440105944596944, + "AC3_6": 0.03207992848736601, + "AC3_7": 0.01113104731124806 + }, + "prompt_4": { + "overall_acc": 0.273538961038961, + "language_acc": { + "English": 0.4090909090909091, + "Vietnamese": 0.22727272727272727, + "Chinese": 0.24431818181818182, + "Indonesian": 0.25, + "Filipino": 0.25, + "Spanish": 0.2840909090909091, + "Malay": 0.25 + }, + "consistency_score_2": 0.3609307359307359, + "consistency_score_3": 0.15925324675324673, + "consistency_score_4": 0.07499999999999998, + "consistency_score_5": 0.0349025974025974, + "consistency_score_6": 0.01542207792207792, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.25, + "English,Chinese": 0.26704545454545453, + "English,Indonesian": 0.30113636363636365, + "English,Filipino": 0.35795454545454547, + "English,Spanish": 0.38636363636363635, + "English,Malay": 0.2727272727272727, + "Vietnamese,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian": 0.48863636363636365, + "Vietnamese,Filipino": 0.5113636363636364, + "Vietnamese,Spanish": 0.3522727272727273, + "Vietnamese,Malay": 0.48863636363636365, + "Chinese,Indonesian": 0.26704545454545453, + "Chinese,Filipino": 0.17613636363636365, + "Chinese,Spanish": 0.25, + "Chinese,Malay": 0.20454545454545456, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.5, + "Indonesian,Malay": 0.5511363636363636, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.4943181818181818, + "Spanish,Malay": 0.4375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.03977272727272727, + "English,Vietnamese,Indonesian": 0.125, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.11931818181818182, + "English,Vietnamese,Malay": 0.11363636363636363, + "English,Chinese,Indonesian": 0.10227272727272728, + "English,Chinese,Filipino": 0.08522727272727272, + "English,Chinese,Spanish": 0.09659090909090909, + "English,Chinese,Malay": 0.07386363636363637, + "English,Indonesian,Filipino": 0.17045454545454544, + "English,Indonesian,Spanish": 0.19318181818181818, + "English,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Spanish": 0.17613636363636365, + "English,Filipino,Malay": 0.16477272727272727, + "English,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "Vietnamese,Chinese,Filipino": 0.0625, + "Vietnamese,Chinese,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish": 0.23863636363636365, + "Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino": 0.09659090909090909, + "Chinese,Indonesian,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Malay": 0.13068181818181818, + "Chinese,Filipino,Spanish": 0.07386363636363637, + "Chinese,Filipino,Malay": 0.07386363636363637, + "Chinese,Spanish,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.3068181818181818, + "Filipino,Spanish,Malay": 0.24431818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish": 0.03409090909090909, + "English,Chinese,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.0625, + "Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + } + }, + "AC3_2": 0.3112161824890927, + "AC3_3": 0.2013066171922118, + "AC3_4": 0.11772240331974056, + "AC3_5": 0.06190618589927317, + "AC3_6": 0.02919797897720035, + "AC3_7": 0.011132399573180067 + }, + "prompt_5": { + "overall_acc": 0.28409090909090906, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.2727272727272727, + "Indonesian": 0.25, + "Filipino": 0.2727272727272727, + "Spanish": 0.26704545454545453, + "Malay": 0.2727272727272727 + }, + "consistency_score_2": 0.37662337662337664, + "consistency_score_3": 0.1798701298701299, + "consistency_score_4": 0.09837662337662338, + "consistency_score_5": 0.05871212121212121, + "consistency_score_6": 0.036525974025974024, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.26704545454545453, + "English,Chinese": 0.2840909090909091, + "English,Indonesian": 0.2897727272727273, + "English,Filipino": 0.3125, + "English,Spanish": 0.4034090909090909, + "English,Malay": 0.26136363636363635, + "Vietnamese,Chinese": 0.3522727272727273, + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.3522727272727273, + "Vietnamese,Malay": 0.4659090909090909, + "Chinese,Indonesian": 0.2897727272727273, + "Chinese,Filipino": 0.2556818181818182, + "Chinese,Spanish": 0.2727272727272727, + "Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino": 0.4659090909090909, + "Indonesian,Spanish": 0.4375, + "Indonesian,Malay": 0.5681818181818182, + "Filipino,Spanish": 0.48295454545454547, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.42045454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.10227272727272728, + "English,Vietnamese,Indonesian": 0.125, + "English,Vietnamese,Filipino": 0.14772727272727273, + "English,Vietnamese,Spanish": 0.14772727272727273, + "English,Vietnamese,Malay": 0.13636363636363635, + "English,Chinese,Indonesian": 0.08522727272727272, + "English,Chinese,Filipino": 0.09090909090909091, + "English,Chinese,Spanish": 0.11931818181818182, + "English,Chinese,Malay": 0.11931818181818182, + "English,Indonesian,Filipino": 0.1590909090909091, + "English,Indonesian,Spanish": 0.1875, + "English,Indonesian,Malay": 0.1590909090909091, + "English,Filipino,Spanish": 0.20454545454545456, + "English,Filipino,Malay": 0.17045454545454544, + "English,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "Vietnamese,Chinese,Filipino": 0.17045454545454544, + "Vietnamese,Chinese,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Filipino,Malay": 0.3125, + "Vietnamese,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino": 0.125, + "Chinese,Indonesian,Spanish": 0.10227272727272728, + "Chinese,Indonesian,Malay": 0.17045454545454544, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.17045454545454544, + "Chinese,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.26704545454545453, + "Filipino,Spanish,Malay": 0.30113636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Indonesian,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Malay": 0.125, + "Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.32387759655585, + "AC3_3": 0.2202748265954392, + "AC3_4": 0.14614523842451457, + "AC3_5": 0.09731290805800132, + "AC3_6": 0.06472957420305603, + "AC3_7": 0.04208754207382467 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4174757281553398 + }, + "prompt_2": { + "accuracy": 0.4174757281553398 + }, + "prompt_3": { + "accuracy": 0.39805825242718446 + }, + "prompt_4": { + "accuracy": 0.3883495145631068 + }, + "prompt_5": { + "accuracy": 0.4077669902912621 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2571428571428571 + }, + "prompt_2": { + "accuracy": 0.24761904761904763 + }, + "prompt_3": { + "accuracy": 0.24761904761904763 + }, + "prompt_4": { + "accuracy": 0.23809523809523808 + }, + "prompt_5": { + "accuracy": 0.2571428571428571 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "indommlu": { + "prompt_1": { + "accuracy": 0.27102803738317754 + }, + "prompt_2": { + "accuracy": 0.29906542056074764 + }, + "prompt_3": { + "accuracy": 0.2897196261682243 + }, + "prompt_4": { + "accuracy": 0.29906542056074764 + }, + "prompt_5": { + "accuracy": 0.308411214953271 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.18, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.1, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.2, + "law": 0.1, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.2, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.2, + "history": 0.2, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.3, + "law": 0.1, + "geography": 0.4 + } + }, + "prompt_3": { + "accuracy": 0.22, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.2, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.2, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.1, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.3, + "culture": 0.3, + "film": 0.2, + "law": 0.1, + "geography": 0.2 + } + }, + "prompt_5": { + "accuracy": 0.22, + "category_acc": { + "brand": 0.1, + "demographics": 0.4, + "biology": 0.1, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.2, + "law": 0.2, + "geography": 0.4 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.17665683450197942 + }, + "prompt_2": { + "bleu_score": 0.17735413604061173 + }, + "prompt_3": { + "bleu_score": 0.17959304270770785 + }, + "prompt_4": { + "bleu_score": 0.1838917713887228 + }, + "prompt_5": { + "bleu_score": 0.16309215405916494 + } + }, + "indommlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, @@ -1074,179 +11509,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06418602733996755 + }, + "prompt_2": { + "bleu_score": 0.06590060307955009 + }, + "prompt_3": { + "bleu_score": 0.06365931440582745 + }, + "prompt_4": { + "bleu_score": 0.06507896924645015 + }, + "prompt_5": { + "bleu_score": 0.060435287929413294 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.03221763747040638 + }, + "prompt_2": { + "bleu_score": 0.031204686027514566 + }, + "prompt_3": { + "bleu_score": 0.030777421177723783 + }, + "prompt_4": { + "bleu_score": 0.033936232510601766 + }, + "prompt_5": { + "bleu_score": 0.03126160261863766 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.00830523133588563 + }, + "prompt_2": { + "bleu_score": 0.014016667920214354 + }, + "prompt_3": { + "bleu_score": 0.013614130767628666 + }, + "prompt_4": { + "bleu_score": 0.012608727089008016 + }, + "prompt_5": { + "bleu_score": 0.01184024542485417 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.057301951844793024 + }, + "prompt_2": { + "bleu_score": 0.057879430015902456 + }, + "prompt_3": { + "bleu_score": 0.056389292384742225 + }, + "prompt_4": { + "bleu_score": 0.06082482099306333 + }, + "prompt_5": { + "bleu_score": 0.056897436148833094 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.338389731621937 + }, + "prompt_2": { + "accuracy": 0.3278879813302217 + }, + "prompt_3": { + "accuracy": 0.33022170361726955 + }, + "prompt_4": { + "accuracy": 0.33255542590431736 + }, + "prompt_5": { + "accuracy": 0.34072345390898484 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3506614229531641, + "category_acc": { + "high_school_european_history": 0.4878048780487805, + "business_ethics": 0.3939393939393939, + "clinical_knowledge": 0.3787878787878788, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.42857142857142855, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.4576271186440678, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.379746835443038, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.31715210355987056, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.33451957295373663, + "philosophy": 0.34838709677419355, + "professional_medicine": 0.3025830258302583, + "nutrition": 0.3377049180327869, + "global_facts": 0.30303030303030304, + "machine_learning": 0.26126126126126126, + "security_studies": 0.36475409836065575, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.353518821603928, + "prehistory": 0.35294117647058826, + "anatomy": 0.31343283582089554, + "human_sexuality": 0.3769230769230769, + "college_medicine": 0.32558139534883723, + "high_school_government_and_politics": 0.4166666666666667, + "college_chemistry": 0.21212121212121213, + "logical_fallacies": 0.41975308641975306, + "high_school_geography": 0.41116751269035534, + "elementary_mathematics": 0.26525198938992045, + "human_aging": 0.3783783783783784, + "college_mathematics": 0.21212121212121213, + "high_school_psychology": 0.4485294117647059, + "formal_logic": 0.32, + "high_school_statistics": 0.26046511627906976, + "international_law": 0.525, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.2863247863247863, + "miscellaneous": 0.4079283887468031, + "high_school_chemistry": 0.2623762376237624, + "marketing": 0.6008583690987125, + "professional_law": 0.33268101761252444, + "management": 0.4411764705882353, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.3925233644859813, + "world_religions": 0.29411764705882354, + "sociology": 0.535, + "us_foreign_policy": 0.5050505050505051, + "high_school_macroeconomics": 0.32904884318766064, + "computer_security": 0.43434343434343436, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.3884057971014493, + "electrical_engineering": 0.3194444444444444, + "astronomy": 0.304635761589404, + "college_biology": 0.2727272727272727 + } + }, + "prompt_2": { + "accuracy": 0.35030389703253484, + "category_acc": { + "high_school_european_history": 0.5, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.3977272727272727, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.4236453201970443, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.4491525423728814, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.2977346278317152, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3416370106761566, + "philosophy": 0.35161290322580646, + "professional_medicine": 0.31365313653136534, + "nutrition": 0.34098360655737703, + "global_facts": 0.2222222222222222, + "machine_learning": 0.32432432432432434, + "security_studies": 0.3770491803278688, + "public_relations": 0.3761467889908257, + "professional_psychology": 0.36661211129296234, + "prehistory": 0.33436532507739936, + "anatomy": 0.30597014925373134, + "human_sexuality": 0.36153846153846153, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.4322916666666667, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.41975308641975306, + "high_school_geography": 0.39593908629441626, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.3783783783783784, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.44485294117647056, + "formal_logic": 0.28, + "high_school_statistics": 0.26976744186046514, + "international_law": 0.5166666666666667, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.2777777777777778, + "miscellaneous": 0.391304347826087, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.5665236051502146, + "professional_law": 0.33659491193737767, + "management": 0.47058823529411764, + "college_physics": 0.297029702970297, + "jurisprudence": 0.37383177570093457, + "world_religions": 0.29411764705882354, + "sociology": 0.5, + "us_foreign_policy": 0.46464646464646464, + "high_school_macroeconomics": 0.33676092544987146, + "computer_security": 0.40404040404040403, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.3739130434782609, + "electrical_engineering": 0.2916666666666667, + "astronomy": 0.32450331125827814, + "college_biology": 0.2867132867132867 + } + }, + "prompt_3": { + "accuracy": 0.3378619949946371, + "category_acc": { + "high_school_european_history": 0.4878048780487805, + "business_ethics": 0.35353535353535354, + "clinical_knowledge": 0.3712121212121212, + "medical_genetics": 0.37373737373737376, + "high_school_us_history": 0.4088669950738916, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.4533898305084746, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.350210970464135, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.3042071197411003, + "abstract_algebra": 0.20202020202020202, + "professional_accounting": 0.3202846975088968, + "philosophy": 0.34838709677419355, + "professional_medicine": 0.2952029520295203, + "nutrition": 0.36721311475409835, + "global_facts": 0.26262626262626265, + "machine_learning": 0.24324324324324326, + "security_studies": 0.35655737704918034, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.3502454991816694, + "prehistory": 0.33746130030959753, + "anatomy": 0.31343283582089554, + "human_sexuality": 0.3153846153846154, + "college_medicine": 0.32558139534883723, + "high_school_government_and_politics": 0.4114583333333333, + "college_chemistry": 0.20202020202020202, + "logical_fallacies": 0.41358024691358025, + "high_school_geography": 0.38071065989847713, + "elementary_mathematics": 0.22811671087533156, + "human_aging": 0.36936936936936937, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.42463235294117646, + "formal_logic": 0.28, + "high_school_statistics": 0.2372093023255814, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.19330855018587362, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.2564102564102564, + "miscellaneous": 0.3836317135549872, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.5278969957081545, + "professional_law": 0.33529028049575993, + "management": 0.4215686274509804, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.2823529411764706, + "sociology": 0.5, + "us_foreign_policy": 0.45454545454545453, + "high_school_macroeconomics": 0.32390745501285345, + "computer_security": 0.35353535353535354, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.3710144927536232, + "electrical_engineering": 0.2708333333333333, + "astronomy": 0.32450331125827814, + "college_biology": 0.3006993006993007 + } + }, + "prompt_4": { + "accuracy": 0.3485877726135145, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.35353535353535354, + "clinical_knowledge": 0.38636363636363635, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.4236453201970443, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.4703389830508475, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.379746835443038, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.3106796116504854, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3416370106761566, + "philosophy": 0.34838709677419355, + "professional_medicine": 0.3173431734317343, + "nutrition": 0.3540983606557377, + "global_facts": 0.21212121212121213, + "machine_learning": 0.2702702702702703, + "security_studies": 0.36475409836065575, + "public_relations": 0.42201834862385323, + "professional_psychology": 0.353518821603928, + "prehistory": 0.34055727554179566, + "anatomy": 0.27611940298507465, + "human_sexuality": 0.35384615384615387, + "college_medicine": 0.3488372093023256, + "high_school_government_and_politics": 0.4166666666666667, + "college_chemistry": 0.2222222222222222, + "logical_fallacies": 0.4074074074074074, + "high_school_geography": 0.4010152284263959, + "elementary_mathematics": 0.26790450928381965, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.43566176470588236, + "formal_logic": 0.32, + "high_school_statistics": 0.26976744186046514, + "international_law": 0.5, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.3034188034188034, + "miscellaneous": 0.39258312020460356, + "high_school_chemistry": 0.27722772277227725, + "marketing": 0.5836909871244635, + "professional_law": 0.33202870189171557, + "management": 0.45098039215686275, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.27058823529411763, + "sociology": 0.54, + "us_foreign_policy": 0.47474747474747475, + "high_school_macroeconomics": 0.3393316195372751, + "computer_security": 0.41414141414141414, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.35942028985507246, + "electrical_engineering": 0.3194444444444444, + "astronomy": 0.32450331125827814, + "college_biology": 0.27972027972027974 + } + }, + "prompt_5": { + "accuracy": 0.34486950303897035, + "category_acc": { + "high_school_european_history": 0.47560975609756095, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.36363636363636365, + "medical_genetics": 0.3838383838383838, + "high_school_us_history": 0.43349753694581283, + "high_school_physics": 0.22666666666666666, + "high_school_world_history": 0.4703389830508475, + "virology": 0.3333333333333333, + "high_school_microeconomics": 0.34177215189873417, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.3106796116504854, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.35231316725978645, + "philosophy": 0.3387096774193548, + "professional_medicine": 0.31365313653136534, + "nutrition": 0.3442622950819672, + "global_facts": 0.30303030303030304, + "machine_learning": 0.24324324324324326, + "security_studies": 0.36475409836065575, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.353518821603928, + "prehistory": 0.34365325077399383, + "anatomy": 0.29850746268656714, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.31976744186046513, + "high_school_government_and_politics": 0.4166666666666667, + "college_chemistry": 0.21212121212121213, + "logical_fallacies": 0.41975308641975306, + "high_school_geography": 0.4010152284263959, + "elementary_mathematics": 0.2572944297082228, + "human_aging": 0.4009009009009009, + "college_mathematics": 0.2222222222222222, + "high_school_psychology": 0.4264705882352941, + "formal_logic": 0.336, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.3938618925831202, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.592274678111588, + "professional_law": 0.3268101761252446, + "management": 0.45098039215686275, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.3925233644859813, + "world_religions": 0.27058823529411763, + "sociology": 0.53, + "us_foreign_policy": 0.46464646464646464, + "high_school_macroeconomics": 0.3393316195372751, + "computer_security": 0.40404040404040403, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.3710144927536232, + "electrical_engineering": 0.2986111111111111, + "astronomy": 0.31125827814569534, + "college_biology": 0.2937062937062937 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24294205052005943 + }, + "prompt_2": { + "accuracy": 0.24294205052005943 + }, + "prompt_3": { + "accuracy": 0.24294205052005943 + }, + "prompt_4": { + "accuracy": 0.24294205052005943 + }, + "prompt_5": { + "accuracy": 0.24665676077265974 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26214196762141967, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.14285714285714285, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.0, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.3148148148148148, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_2": { + "accuracy": 0.26214196762141967, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.14285714285714285, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.0, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.3148148148148148, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.261519302615193, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.14285714285714285, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.0, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.3148148148148148, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_4": { + "accuracy": 0.2640099626400996, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.11904761904761904, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.375, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.0, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.3333333333333333, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.04, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.3148148148148148, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.2627646326276463, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.14285714285714285, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.13157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.0, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.3148148148148148, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.22580645161290322 + }, + "prompt_2": { + "accuracy": 0.22580645161290322 + }, + "prompt_3": { + "accuracy": 0.21505376344086022 + }, + "prompt_4": { + "accuracy": 0.25448028673835127 + }, + "prompt_5": { + "accuracy": 0.22580645161290322 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25289241927128303, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2682926829268293, + "arts": 0.275, + "astronomy": 0.2, + "business_ethics": 0.24880382775119617, + "chinese_civil_service_exam": 0.2375, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.23839009287925697, + "chinese_literature": 0.2549019607843137, + "chinese_teacher_qualification": 0.30726256983240224, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.16822429906542055, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.24074074074074073, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.24175824175824176, + "computer_science": 0.2107843137254902, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.2302158273381295, + "economics": 0.27044025157232704, + "education": 0.25766871165644173, + "electrical_engineering": 0.22674418604651161, + "elementary_chinese": 0.2261904761904762, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.25210084033613445, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.25925925925925924, + "food_science": 0.23776223776223776, + "genetics": 0.26704545454545453, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.3231707317073171, + "high_school_physics": 0.18181818181818182, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.2619047619047619, + "international_law": 0.2648648648648649, + "journalism": 0.22674418604651161, + "jurisprudence": 0.24817518248175183, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2601626016260163, + "machine_learning": 0.21311475409836064, + "management": 0.26666666666666666, + "marketing": 0.2611111111111111, + "marxist_theory": 0.24867724867724866, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.25517241379310346, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.28, + "professional_law": 0.22748815165876776, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.26436781609195403, + "security_study": 0.25925925925925924, + "sociology": 0.2831858407079646, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.2485207100591716, + "world_history": 0.2484472049689441, + "world_religions": 0.28125 + } + }, + "prompt_2": { + "accuracy": 0.2532377827663616, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2621951219512195, + "arts": 0.275, + "astronomy": 0.21212121212121213, + "business_ethics": 0.24880382775119617, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.3177570093457944, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.16822429906542055, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.25, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.23076923076923078, + "computer_science": 0.20098039215686275, + "computer_security": 0.2222222222222222, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.22302158273381295, + "economics": 0.27044025157232704, + "education": 0.25766871165644173, + "electrical_engineering": 0.23837209302325582, + "elementary_chinese": 0.2261904761904762, + "elementary_commonsense": 0.29292929292929293, + "elementary_information_and_technology": 0.25210084033613445, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.25925925925925924, + "food_science": 0.24475524475524477, + "genetics": 0.2727272727272727, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.20909090909090908, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.2619047619047619, + "international_law": 0.2648648648648649, + "journalism": 0.22674418604651161, + "jurisprudence": 0.2384428223844282, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2601626016260163, + "machine_learning": 0.21311475409836064, + "management": 0.26666666666666666, + "marketing": 0.2611111111111111, + "marxist_theory": 0.24867724867724866, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2620689655172414, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.28, + "professional_law": 0.23222748815165878, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.26436781609195403, + "security_study": 0.25925925925925924, + "sociology": 0.2831858407079646, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.25443786982248523, + "world_history": 0.2422360248447205, + "world_religions": 0.275 + } + }, + "prompt_3": { + "accuracy": 0.2523743740286652, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.25609756097560976, + "arts": 0.275, + "astronomy": 0.19393939393939394, + "business_ethics": 0.24401913875598086, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.3177570093457944, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.24509803921568626, + "chinese_teacher_qualification": 0.30726256983240224, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.16822429906542055, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.24074074074074073, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.23076923076923078, + "computer_science": 0.2107843137254902, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.22302158273381295, + "economics": 0.27044025157232704, + "education": 0.25766871165644173, + "electrical_engineering": 0.23255813953488372, + "elementary_chinese": 0.23015873015873015, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.24789915966386555, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.25925925925925924, + "food_science": 0.23776223776223776, + "genetics": 0.26704545454545453, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.21301775147928995, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.18181818181818182, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.2619047619047619, + "international_law": 0.2702702702702703, + "journalism": 0.22674418604651161, + "jurisprudence": 0.24087591240875914, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2601626016260163, + "machine_learning": 0.22950819672131148, + "management": 0.26666666666666666, + "marketing": 0.2611111111111111, + "marxist_theory": 0.24867724867724866, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.2482758620689655, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.28, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.26436781609195403, + "security_study": 0.25925925925925924, + "sociology": 0.2831858407079646, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.25443786982248523, + "world_history": 0.2422360248447205, + "world_religions": 0.26875 + } + }, + "prompt_4": { + "accuracy": 0.25047487480573305, + "category_acc": { + "agronomy": 0.22485207100591717, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.2621951219512195, + "arts": 0.275, + "astronomy": 0.21818181818181817, + "business_ethics": 0.24401913875598086, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.33587786259541985, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.24509803921568626, + "chinese_teacher_qualification": 0.26256983240223464, + "clinical_knowledge": 0.2911392405063291, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.1588785046728972, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.25, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.23443223443223443, + "computer_science": 0.22058823529411764, + "computer_security": 0.2222222222222222, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.22302158273381295, + "economics": 0.27044025157232704, + "education": 0.2085889570552147, + "electrical_engineering": 0.21511627906976744, + "elementary_chinese": 0.21825396825396826, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.3037037037037037, + "food_science": 0.2867132867132867, + "genetics": 0.2556818181818182, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.23668639053254437, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.3231707317073171, + "high_school_physics": 0.18181818181818182, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.23015873015873015, + "international_law": 0.2756756756756757, + "journalism": 0.2441860465116279, + "jurisprudence": 0.25304136253041365, + "legal_and_moral_basis": 0.2523364485981308, + "logical": 0.2764227642276423, + "machine_learning": 0.1885245901639344, + "management": 0.21904761904761905, + "marketing": 0.25555555555555554, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.2827586206896552, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.22857142857142856, + "professional_law": 0.22274881516587677, + "professional_medicine": 0.23138297872340424, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.25862068965517243, + "security_study": 0.2740740740740741, + "sociology": 0.252212389380531, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.26627218934911245, + "world_history": 0.2546583850931677, + "world_religions": 0.275 + } + }, + "prompt_5": { + "accuracy": 0.2527197375237437, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.25609756097560976, + "arts": 0.275, + "astronomy": 0.2, + "business_ethics": 0.24401913875598086, + "chinese_civil_service_exam": 0.23125, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.3177570093457944, + "chinese_history": 0.23839009287925697, + "chinese_literature": 0.24509803921568626, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.16822429906542055, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.24074074074074073, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.23443223443223443, + "computer_science": 0.23039215686274508, + "computer_security": 0.23391812865497075, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.2302158273381295, + "economics": 0.27044025157232704, + "education": 0.25766871165644173, + "electrical_engineering": 0.2441860465116279, + "elementary_chinese": 0.23412698412698413, + "elementary_commonsense": 0.2878787878787879, + "elementary_information_and_technology": 0.24789915966386555, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.25925925925925924, + "food_science": 0.23776223776223776, + "genetics": 0.2727272727272727, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.20118343195266272, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.3353658536585366, + "high_school_physics": 0.2, + "high_school_politics": 0.3146853146853147, + "human_sexuality": 0.2619047619047619, + "international_law": 0.2756756756756757, + "journalism": 0.22674418604651161, + "jurisprudence": 0.24087591240875914, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2601626016260163, + "machine_learning": 0.21311475409836064, + "management": 0.26666666666666666, + "marketing": 0.2611111111111111, + "marxist_theory": 0.24867724867724866, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2482758620689655, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.28, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.26436781609195403, + "security_study": 0.26666666666666666, + "sociology": 0.2831858407079646, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.2603550295857988, + "world_history": 0.2422360248447205, + "world_religions": 0.275 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.21212121212121213 + }, + "prompt_2": { + "accuracy": 0.21212121212121213 + }, + "prompt_3": { + "accuracy": 0.18181818181818182 + }, + "prompt_4": { + "accuracy": 0.21212121212121213 + }, + "prompt_5": { + "accuracy": 0.18181818181818182 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.15681818181818183 + }, + "prompt_2": { + "accuracy": 0.16136363636363638 + }, + "prompt_3": { + "accuracy": 0.1340909090909091 + }, + "prompt_4": { + "accuracy": 0.22954545454545455 + }, + "prompt_5": { + "accuracy": 0.29545454545454547 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34779661016949154 + }, + "prompt_2": { + "accuracy": 0.3383050847457627 + }, + "prompt_3": { + "accuracy": 0.336271186440678 + }, + "prompt_4": { + "accuracy": 0.3474576271186441 + }, + "prompt_5": { + "accuracy": 0.3474576271186441 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27711293941660436 + }, + "prompt_2": { + "accuracy": 0.2793567688855647 + }, + "prompt_3": { + "accuracy": 0.2793567688855647 + }, + "prompt_4": { + "accuracy": 0.27898279730740466 + }, + "prompt_5": { + "accuracy": 0.28047868362004486 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7344439000489956 + }, + "prompt_2": { + "accuracy": 0.732484076433121 + }, + "prompt_3": { + "accuracy": 0.7359137677609016 + }, + "prompt_4": { + "accuracy": 0.7280744732974033 + }, + "prompt_5": { + "accuracy": 0.7329740323370897 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.48832106340612486, + "rouge2": 0.2400408351672168, + "rougeL": 0.40635657587157564, + "avg_rouge": 0.3782394914816391 + }, + "prompt_2": { + "rouge1": 0.4849224990478017, + "rouge2": 0.23757010275058904, + "rougeL": 0.40315689510116576, + "avg_rouge": 0.3752164989665188 + }, + "prompt_3": { + "rouge1": 0.4902056844089529, + "rouge2": 0.24155610405807867, + "rougeL": 0.4070808762645925, + "avg_rouge": 0.37961422157720803 + }, + "prompt_4": { + "rouge1": 0.4866897308028868, + "rouge2": 0.23966762699809102, + "rougeL": 0.40587141218038814, + "avg_rouge": 0.37740958999378865 + }, + "prompt_5": { + "rouge1": 0.4882522910334047, + "rouge2": 0.23959241540876103, + "rougeL": 0.4048037464967501, + "avg_rouge": 0.3775494843129719 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2658485427484594, + "rouge2": 0.08391605918962078, + "rougeL": 0.20446013110978728, + "avg_rouge": 0.18474157768262253 + }, + "prompt_2": { + "rouge1": 0.2738018605158781, + "rouge2": 0.08822868684389382, + "rougeL": 0.20943381470074104, + "avg_rouge": 0.19048812068683765 + }, + "prompt_3": { + "rouge1": 0.2338865827836114, + "rouge2": 0.06824880901517959, + "rougeL": 0.17492013917724475, + "avg_rouge": 0.15901851032534525 + }, + "prompt_4": { + "rouge1": 0.23687932047583385, + "rouge2": 0.06998575802654529, + "rougeL": 0.1775108162856462, + "avg_rouge": 0.16145863159600846 + }, + "prompt_5": { + "rouge1": 0.2709147347446209, + "rouge2": 0.0877107200161855, + "rougeL": 0.20641216170191892, + "avg_rouge": 0.18834587215424178 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9094036697247706 + }, + "prompt_2": { + "accuracy": 0.908256880733945 + }, + "prompt_3": { + "accuracy": 0.9048165137614679 + }, + "prompt_4": { + "accuracy": 0.908256880733945 + }, + "prompt_5": { + "accuracy": 0.9139908256880734 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5071907957813998 + }, + "prompt_2": { + "accuracy": 0.6864813039309684 + }, + "prompt_3": { + "accuracy": 0.6644295302013423 + }, + "prompt_4": { + "accuracy": 0.6049856184084372 + }, + "prompt_5": { + "accuracy": 0.5225311601150527 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.809 + }, + "prompt_2": { + "accuracy": 0.8175 + }, + "prompt_3": { + "accuracy": 0.806 + }, + "prompt_4": { + "accuracy": 0.8175 + }, + "prompt_5": { + "accuracy": 0.8125 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.565 + }, + "prompt_2": { + "accuracy": 0.5595 + }, + "prompt_3": { + "accuracy": 0.5735 + }, + "prompt_4": { + "accuracy": 0.559 + }, + "prompt_5": { + "accuracy": 0.543 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.83 + }, + "prompt_2": { + "accuracy": 0.759 + }, + "prompt_3": { + "accuracy": 0.81 + }, + "prompt_4": { + "accuracy": 0.7845 + }, + "prompt_5": { + "accuracy": 0.784 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4647887323943662 + }, + "prompt_2": { + "accuracy": 0.43661971830985913 + }, + "prompt_3": { + "accuracy": 0.4507042253521127 + }, + "prompt_4": { + "accuracy": 0.4647887323943662 + }, + "prompt_5": { + "accuracy": 0.4647887323943662 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7942238267148014 + }, + "prompt_2": { + "accuracy": 0.7833935018050542 + }, + "prompt_3": { + "accuracy": 0.7653429602888087 + }, + "prompt_4": { + "accuracy": 0.7653429602888087 + }, + "prompt_5": { + "accuracy": 0.7653429602888087 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49264705882352944 + }, + "prompt_2": { + "accuracy": 0.5294117647058824 + }, + "prompt_3": { + "accuracy": 0.43137254901960786 + }, + "prompt_4": { + "accuracy": 0.553921568627451 + }, + "prompt_5": { + "accuracy": 0.5955882352941176 + } } }, "five_shot": { @@ -1356,53 +12981,1733 @@ "model_link": "https://huggingface.co/google/flan-t5-large", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3161904761904762, + "language_acc": { + "Malay": 0.24666666666666667, + "English": 0.44666666666666666, + "Vietnamese": 0.2866666666666667, + "Spanish": 0.34, + "Indonesian": 0.3466666666666667, + "Filipino": 0.26666666666666666, + "Chinese": 0.28 + }, + "consistency_score_2": 0.546031746031746, + "consistency_score_3": 0.3807619047619048, + "consistency_score_4": 0.29523809523809524, + "consistency_score_5": 0.24253968253968256, + "consistency_score_6": 0.2066666666666667, + "consistency_score_7": 0.18, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.47333333333333333, + "Malay,Vietnamese": 0.62, + "Malay,Spanish": 0.4533333333333333, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Filipino": 0.6266666666666667, + "Malay,Chinese": 0.6066666666666667, + "English,Vietnamese": 0.46, + "English,Spanish": 0.5533333333333333, + "English,Indonesian": 0.47333333333333333, + "English,Filipino": 0.5, + "English,Chinese": 0.43333333333333335, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,Indonesian": 0.58, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.7, + "Spanish,Indonesian": 0.5066666666666667, + "Spanish,Filipino": 0.54, + "Spanish,Chinese": 0.5066666666666667, + "Indonesian,Filipino": 0.5933333333333334, + "Indonesian,Chinese": 0.5866666666666667, + "Filipino,Chinese": 0.56 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.34, + "Malay,English,Spanish": 0.30666666666666664, + "Malay,English,Indonesian": 0.35333333333333333, + "Malay,English,Filipino": 0.36666666666666664, + "Malay,English,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish": 0.36, + "Malay,Vietnamese,Indonesian": 0.44, + "Malay,Vietnamese,Filipino": 0.4533333333333333, + "Malay,Vietnamese,Chinese": 0.5066666666666667, + "Malay,Spanish,Indonesian": 0.34, + "Malay,Spanish,Filipino": 0.4, + "Malay,Spanish,Chinese": 0.36, + "Malay,Indonesian,Filipino": 0.47333333333333333, + "Malay,Indonesian,Chinese": 0.4533333333333333, + "Malay,Filipino,Chinese": 0.46, + "English,Vietnamese,Spanish": 0.34, + "English,Vietnamese,Indonesian": 0.32, + "English,Vietnamese,Filipino": 0.30666666666666664, + "English,Vietnamese,Chinese": 0.3466666666666667, + "English,Spanish,Indonesian": 0.3333333333333333, + "English,Spanish,Filipino": 0.38, + "English,Spanish,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Indonesian,Chinese": 0.3333333333333333, + "English,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Spanish,Chinese": 0.42, + "Vietnamese,Indonesian,Filipino": 0.4, + "Vietnamese,Indonesian,Chinese": 0.47333333333333333, + "Vietnamese,Filipino,Chinese": 0.44, + "Spanish,Indonesian,Filipino": 0.37333333333333335, + "Spanish,Indonesian,Chinese": 0.38666666666666666, + "Spanish,Filipino,Chinese": 0.36666666666666664, + "Indonesian,Filipino,Chinese": 0.43333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.24, + "Malay,English,Vietnamese,Indonesian": 0.2733333333333333, + "Malay,English,Vietnamese,Filipino": 0.28, + "Malay,English,Vietnamese,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Chinese": 0.25333333333333335, + "Malay,English,Indonesian,Filipino": 0.3, + "Malay,English,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Filipino,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.32, + "Malay,Vietnamese,Indonesian,Filipino": 0.36666666666666664, + "Malay,Vietnamese,Indonesian,Chinese": 0.38, + "Malay,Vietnamese,Filipino,Chinese": 0.38666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.30666666666666664, + "Malay,Spanish,Filipino,Chinese": 0.32666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.38666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.24, + "English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.26666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino": 0.28, + "English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.34, + "Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino,Chinese": 0.35333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.3 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,English,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.24, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.22, + "Malay,English,Spanish,Filipino,Chinese": 0.24, + "Malay,English,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.32666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.28, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.2, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.22, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18 + } + }, + "AC3_2": 0.4004768917411958, + "AC3_3": 0.3454849750281892, + "AC3_4": 0.3053552884829173, + "AC3_5": 0.27451082246169134, + "AC3_6": 0.24995749843428255, + "AC3_7": 0.22940499035683776 + }, + "prompt_2": { + "overall_acc": 0.31523809523809526, + "language_acc": { + "Malay": 0.24, + "English": 0.4666666666666667, + "Vietnamese": 0.28, + "Spanish": 0.3466666666666667, + "Indonesian": 0.30666666666666664, + "Filipino": 0.30666666666666664, + "Chinese": 0.26 + }, + "consistency_score_2": 0.5361904761904762, + "consistency_score_3": 0.36590476190476195, + "consistency_score_4": 0.27923809523809523, + "consistency_score_5": 0.2266666666666667, + "consistency_score_6": 0.1895238095238095, + "consistency_score_7": 0.16, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4533333333333333, + "Malay,Vietnamese": 0.5933333333333334, + "Malay,Spanish": 0.44666666666666666, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Filipino": 0.6133333333333333, + "Malay,Chinese": 0.6266666666666667, + "English,Vietnamese": 0.47333333333333333, + "English,Spanish": 0.5933333333333334, + "English,Indonesian": 0.5, + "English,Filipino": 0.5, + "English,Chinese": 0.46, + "Vietnamese,Spanish": 0.5066666666666667, + "Vietnamese,Indonesian": 0.54, + "Vietnamese,Filipino": 0.47333333333333333, + "Vietnamese,Chinese": 0.6266666666666667, + "Spanish,Indonesian": 0.47333333333333333, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.5, + "Indonesian,Filipino": 0.6066666666666667, + "Indonesian,Chinese": 0.5666666666666667, + "Filipino,Chinese": 0.5333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.32666666666666666, + "Malay,English,Spanish": 0.30666666666666664, + "Malay,English,Indonesian": 0.36, + "Malay,English,Filipino": 0.35333333333333333, + "Malay,English,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Spanish": 0.3333333333333333, + "Malay,Vietnamese,Indonesian": 0.44, + "Malay,Vietnamese,Filipino": 0.4, + "Malay,Vietnamese,Chinese": 0.4666666666666667, + "Malay,Spanish,Indonesian": 0.3333333333333333, + "Malay,Spanish,Filipino": 0.37333333333333335, + "Malay,Spanish,Chinese": 0.35333333333333333, + "Malay,Indonesian,Filipino": 0.49333333333333335, + "Malay,Indonesian,Chinese": 0.4666666666666667, + "Malay,Filipino,Chinese": 0.44666666666666666, + "English,Vietnamese,Spanish": 0.32666666666666666, + "English,Vietnamese,Indonesian": 0.32, + "English,Vietnamese,Filipino": 0.30666666666666664, + "English,Vietnamese,Chinese": 0.3333333333333333, + "English,Spanish,Indonesian": 0.32666666666666666, + "English,Spanish,Filipino": 0.37333333333333335, + "English,Spanish,Chinese": 0.34, + "English,Indonesian,Filipino": 0.38, + "English,Indonesian,Chinese": 0.3333333333333333, + "English,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian": 0.31333333333333335, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Filipino": 0.36666666666666664, + "Vietnamese,Indonesian,Chinese": 0.4, + "Vietnamese,Filipino,Chinese": 0.38, + "Spanish,Indonesian,Filipino": 0.38, + "Spanish,Indonesian,Chinese": 0.34, + "Spanish,Filipino,Chinese": 0.35333333333333333, + "Indonesian,Filipino,Chinese": 0.4266666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.2733333333333333, + "Malay,English,Vietnamese,Filipino": 0.2733333333333333, + "Malay,English,Vietnamese,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.2733333333333333, + "Malay,English,Spanish,Chinese": 0.24666666666666667, + "Malay,English,Indonesian,Filipino": 0.30666666666666664, + "Malay,English,Indonesian,Chinese": 0.29333333333333333, + "Malay,English,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.28, + "Malay,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.36, + "Malay,Vietnamese,Filipino,Chinese": 0.32666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.3, + "Malay,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.30666666666666664, + "Malay,Indonesian,Filipino,Chinese": 0.38666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.24, + "English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.26, + "English,Vietnamese,Indonesian,Chinese": 0.26, + "English,Vietnamese,Filipino,Chinese": 0.26, + "English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "English,Spanish,Indonesian,Chinese": 0.24666666666666667, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Vietnamese,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.24, + "Malay,English,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.22, + "Malay,English,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16 + } + }, + "AC3_2": 0.39704484921298167, + "AC3_3": 0.33868701390574696, + "AC3_4": 0.29614805994316484, + "AC3_5": 0.26371411828759816, + "AC3_6": 0.23672596581114277, + "AC3_7": 0.21226452901345136 + }, + "prompt_3": { + "overall_acc": 0.30666666666666664, + "language_acc": { + "Malay": 0.24666666666666667, + "English": 0.44, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.34, + "Indonesian": 0.30666666666666664, + "Filipino": 0.29333333333333333, + "Chinese": 0.26666666666666666 + }, + "consistency_score_2": 0.5695238095238095, + "consistency_score_3": 0.4041904761904762, + "consistency_score_4": 0.31733333333333325, + "consistency_score_5": 0.2657142857142857, + "consistency_score_6": 0.23142857142857146, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5066666666666667, + "Malay,Vietnamese": 0.6133333333333333, + "Malay,Spanish": 0.5, + "Malay,Indonesian": 0.7066666666666667, + "Malay,Filipino": 0.6733333333333333, + "Malay,Chinese": 0.6466666666666666, + "English,Vietnamese": 0.47333333333333333, + "English,Spanish": 0.58, + "English,Indonesian": 0.48, + "English,Filipino": 0.49333333333333335, + "English,Chinese": 0.48, + "Vietnamese,Spanish": 0.56, + "Vietnamese,Indonesian": 0.5933333333333334, + "Vietnamese,Filipino": 0.56, + "Vietnamese,Chinese": 0.6733333333333333, + "Spanish,Indonesian": 0.5066666666666667, + "Spanish,Filipino": 0.5533333333333333, + "Spanish,Chinese": 0.54, + "Indonesian,Filipino": 0.62, + "Indonesian,Chinese": 0.6066666666666667, + "Filipino,Chinese": 0.5933333333333334 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.36, + "Malay,English,Spanish": 0.35333333333333333, + "Malay,English,Indonesian": 0.3933333333333333, + "Malay,English,Filipino": 0.38, + "Malay,English,Chinese": 0.36666666666666664, + "Malay,Vietnamese,Spanish": 0.38666666666666666, + "Malay,Vietnamese,Indonesian": 0.4866666666666667, + "Malay,Vietnamese,Filipino": 0.47333333333333333, + "Malay,Vietnamese,Chinese": 0.5066666666666667, + "Malay,Spanish,Indonesian": 0.38666666666666666, + "Malay,Spanish,Filipino": 0.41333333333333333, + "Malay,Spanish,Chinese": 0.4, + "Malay,Indonesian,Filipino": 0.54, + "Malay,Indonesian,Chinese": 0.5066666666666667, + "Malay,Filipino,Chinese": 0.49333333333333335, + "English,Vietnamese,Spanish": 0.3466666666666667, + "English,Vietnamese,Indonesian": 0.32666666666666666, + "English,Vietnamese,Filipino": 0.32666666666666666, + "English,Vietnamese,Chinese": 0.36, + "English,Spanish,Indonesian": 0.3466666666666667, + "English,Spanish,Filipino": 0.37333333333333335, + "English,Spanish,Chinese": 0.36, + "English,Indonesian,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese": 0.3466666666666667, + "English,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Vietnamese,Spanish,Filipino": 0.3933333333333333, + "Vietnamese,Spanish,Chinese": 0.43333333333333335, + "Vietnamese,Indonesian,Filipino": 0.43333333333333335, + "Vietnamese,Indonesian,Chinese": 0.47333333333333333, + "Vietnamese,Filipino,Chinese": 0.4666666666666667, + "Spanish,Indonesian,Filipino": 0.38666666666666666, + "Spanish,Indonesian,Chinese": 0.38666666666666666, + "Spanish,Filipino,Chinese": 0.4, + "Indonesian,Filipino,Chinese": 0.4666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.3, + "Malay,English,Vietnamese,Filipino": 0.30666666666666664, + "Malay,English,Vietnamese,Chinese": 0.30666666666666664, + "Malay,English,Spanish,Indonesian": 0.29333333333333333, + "Malay,English,Spanish,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Chinese": 0.28, + "Malay,English,Indonesian,Filipino": 0.32666666666666666, + "Malay,English,Indonesian,Chinese": 0.32, + "Malay,English,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Filipino": 0.34, + "Malay,Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.4, + "Malay,Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.4066666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.3466666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.4266666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.24, + "English,Vietnamese,Spanish,Filipino": 0.26, + "English,Vietnamese,Spanish,Chinese": 0.28, + "English,Vietnamese,Indonesian,Filipino": 0.28, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian,Filipino": 0.28, + "English,Spanish,Indonesian,Chinese": 0.28, + "English,Spanish,Filipino,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.34, + "Vietnamese,Spanish,Filipino,Chinese": 0.34, + "Vietnamese,Indonesian,Filipino,Chinese": 0.38666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Chinese": 0.24, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.26, + "Malay,English,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.3, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.36, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.3, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.3 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + } + }, + "AC3_2": 0.3986666666211667, + "AC3_3": 0.34873883524926186, + "AC3_4": 0.31190883185884644, + "AC3_5": 0.2847254575209714, + "AC3_6": 0.2637876105704466, + "AC3_7": 0.24692640687830436 + }, + "prompt_4": { + "overall_acc": 0.31333333333333335, + "language_acc": { + "Malay": 0.24, + "English": 0.44666666666666666, + "Vietnamese": 0.28, + "Spanish": 0.3333333333333333, + "Indonesian": 0.32666666666666666, + "Filipino": 0.29333333333333333, + "Chinese": 0.2733333333333333 + }, + "consistency_score_2": 0.5653968253968253, + "consistency_score_3": 0.40685714285714286, + "consistency_score_4": 0.32514285714285707, + "consistency_score_5": 0.2761904761904762, + "consistency_score_6": 0.24380952380952386, + "consistency_score_7": 0.22, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4866666666666667, + "Malay,Vietnamese": 0.5666666666666667, + "Malay,Spanish": 0.5066666666666667, + "Malay,Indonesian": 0.66, + "Malay,Filipino": 0.68, + "Malay,Chinese": 0.64, + "English,Vietnamese": 0.46, + "English,Spanish": 0.58, + "English,Indonesian": 0.5133333333333333, + "English,Filipino": 0.5466666666666666, + "English,Chinese": 0.4866666666666667, + "Vietnamese,Spanish": 0.56, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,Filipino": 0.5266666666666666, + "Vietnamese,Chinese": 0.6466666666666666, + "Spanish,Indonesian": 0.5666666666666667, + "Spanish,Filipino": 0.58, + "Spanish,Chinese": 0.5133333333333333, + "Indonesian,Filipino": 0.62, + "Indonesian,Chinese": 0.6, + "Filipino,Chinese": 0.6 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.32666666666666666, + "Malay,English,Spanish": 0.36, + "Malay,English,Indonesian": 0.4066666666666667, + "Malay,English,Filipino": 0.41333333333333333, + "Malay,English,Chinese": 0.37333333333333335, + "Malay,Vietnamese,Spanish": 0.36666666666666664, + "Malay,Vietnamese,Indonesian": 0.44, + "Malay,Vietnamese,Filipino": 0.44, + "Malay,Vietnamese,Chinese": 0.48, + "Malay,Spanish,Indonesian": 0.4066666666666667, + "Malay,Spanish,Filipino": 0.44, + "Malay,Spanish,Chinese": 0.38666666666666666, + "Malay,Indonesian,Filipino": 0.52, + "Malay,Indonesian,Chinese": 0.49333333333333335, + "Malay,Filipino,Chinese": 0.5, + "English,Vietnamese,Spanish": 0.36666666666666664, + "English,Vietnamese,Indonesian": 0.31333333333333335, + "English,Vietnamese,Filipino": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.3466666666666667, + "English,Spanish,Indonesian": 0.38, + "English,Spanish,Filipino": 0.43333333333333335, + "English,Spanish,Chinese": 0.38666666666666666, + "English,Indonesian,Filipino": 0.4066666666666667, + "English,Indonesian,Chinese": 0.38666666666666666, + "English,Filipino,Chinese": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Vietnamese,Spanish,Filipino": 0.4066666666666667, + "Vietnamese,Spanish,Chinese": 0.4, + "Vietnamese,Indonesian,Filipino": 0.4, + "Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Vietnamese,Filipino,Chinese": 0.44, + "Spanish,Indonesian,Filipino": 0.43333333333333335, + "Spanish,Indonesian,Chinese": 0.4, + "Spanish,Filipino,Chinese": 0.41333333333333333, + "Indonesian,Filipino,Chinese": 0.46 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.29333333333333333, + "Malay,English,Vietnamese,Filipino": 0.29333333333333333, + "Malay,English,Vietnamese,Chinese": 0.2866666666666667, + "Malay,English,Spanish,Indonesian": 0.30666666666666664, + "Malay,English,Spanish,Filipino": 0.3333333333333333, + "Malay,English,Spanish,Chinese": 0.29333333333333333, + "Malay,English,Indonesian,Filipino": 0.35333333333333333, + "Malay,English,Indonesian,Chinese": 0.34, + "Malay,English,Filipino,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.32, + "Malay,Vietnamese,Indonesian,Filipino": 0.36666666666666664, + "Malay,Vietnamese,Indonesian,Chinese": 0.37333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.38666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.36666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,Spanish,Filipino,Chinese": 0.35333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.42, + "English,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.3, + "English,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.28, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.29333333333333333, + "English,Spanish,Indonesian,Filipino": 0.34, + "English,Spanish,Indonesian,Chinese": 0.31333333333333335, + "English,Spanish,Filipino,Chinese": 0.32, + "English,Indonesian,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Filipino,Chinese": 0.34, + "Vietnamese,Indonesian,Filipino,Chinese": 0.35333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.35333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26, + "Malay,English,Vietnamese,Spanish,Chinese": 0.24, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.28, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22 + } + }, + "AC3_2": 0.4032129093953473, + "AC3_3": 0.3540227452563179, + "AC3_4": 0.31912887823164, + "AC3_5": 0.29359181470517964, + "AC3_6": 0.2742336181843969, + "AC3_7": 0.2584999999515312 + }, + "prompt_5": { + "overall_acc": 0.3161904761904762, + "language_acc": { + "Malay": 0.26, + "English": 0.5, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.3466666666666667, + "Indonesian": 0.28, + "Filipino": 0.30666666666666664, + "Chinese": 0.26666666666666666 + }, + "consistency_score_2": 0.526984126984127, + "consistency_score_3": 0.3487619047619048, + "consistency_score_4": 0.259047619047619, + "consistency_score_5": 0.206984126984127, + "consistency_score_6": 0.1742857142857143, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4866666666666667, + "Malay,Vietnamese": 0.6066666666666667, + "Malay,Spanish": 0.42, + "Malay,Indonesian": 0.6733333333333333, + "Malay,Filipino": 0.6066666666666667, + "Malay,Chinese": 0.6333333333333333, + "English,Vietnamese": 0.4533333333333333, + "English,Spanish": 0.56, + "English,Indonesian": 0.48, + "English,Filipino": 0.47333333333333333, + "English,Chinese": 0.46, + "Vietnamese,Spanish": 0.48, + "Vietnamese,Indonesian": 0.5066666666666667, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Chinese": 0.6266666666666667, + "Spanish,Indonesian": 0.48, + "Spanish,Filipino": 0.48, + "Spanish,Chinese": 0.4666666666666667, + "Indonesian,Filipino": 0.5866666666666667, + "Indonesian,Chinese": 0.5466666666666666, + "Filipino,Chinese": 0.54 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.34, + "Malay,English,Spanish": 0.3, + "Malay,English,Indonesian": 0.36666666666666664, + "Malay,English,Filipino": 0.34, + "Malay,English,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.4266666666666667, + "Malay,Vietnamese,Filipino": 0.4, + "Malay,Vietnamese,Chinese": 0.48, + "Malay,Spanish,Indonesian": 0.31333333333333335, + "Malay,Spanish,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese": 0.3333333333333333, + "Malay,Indonesian,Filipino": 0.4666666666666667, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Filipino,Chinese": 0.43333333333333335, + "English,Vietnamese,Spanish": 0.30666666666666664, + "English,Vietnamese,Indonesian": 0.29333333333333333, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.32666666666666666, + "English,Spanish,Indonesian": 0.29333333333333333, + "English,Spanish,Filipino": 0.3333333333333333, + "English,Spanish,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.3333333333333333, + "English,Indonesian,Chinese": 0.31333333333333335, + "English,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Vietnamese,Spanish,Filipino": 0.30666666666666664, + "Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Filipino,Chinese": 0.3933333333333333, + "Spanish,Indonesian,Filipino": 0.34, + "Spanish,Indonesian,Chinese": 0.30666666666666664, + "Spanish,Filipino,Chinese": 0.32, + "Indonesian,Filipino,Chinese": 0.3933333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22, + "Malay,English,Vietnamese,Indonesian": 0.26, + "Malay,English,Vietnamese,Filipino": 0.26, + "Malay,English,Vietnamese,Chinese": 0.28, + "Malay,English,Spanish,Indonesian": 0.21333333333333335, + "Malay,English,Spanish,Filipino": 0.24666666666666667, + "Malay,English,Spanish,Chinese": 0.24, + "Malay,English,Indonesian,Filipino": 0.26, + "Malay,English,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.28, + "Malay,Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.34, + "Malay,Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.26, + "Malay,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.24, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.24, + "English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino": 0.24, + "English,Spanish,Indonesian,Chinese": 0.22, + "English,Spanish,Filipino,Chinese": 0.24, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.22, + "Malay,English,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + } + }, + "AC3_2": 0.3952380951912202, + "AC3_3": 0.33167846566467846, + "AC3_4": 0.2847808261882733, + "AC3_5": 0.25018955150095346, + "AC3_6": 0.22471012478081498, + "AC3_7": 0.20651791746784776 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3003246753246754, + "language_acc": { + "English": 0.5113636363636364, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.24431818181818182, + "Indonesian": 0.26136363636363635, + "Filipino": 0.2556818181818182, + "Spanish": 0.32954545454545453, + "Malay": 0.24431818181818182 + }, + "consistency_score_2": 0.45102813852813856, + "consistency_score_3": 0.25097402597402596, + "consistency_score_4": 0.14983766233766233, + "consistency_score_5": 0.09117965367965367, + "consistency_score_6": 0.0551948051948052, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3409090909090909, + "English,Chinese": 0.2840909090909091, + "English,Indonesian": 0.3352272727272727, + "English,Filipino": 0.3125, + "English,Spanish": 0.4659090909090909, + "English,Malay": 0.32386363636363635, + "Vietnamese,Chinese": 0.5738636363636364, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Filipino": 0.5227272727272727, + "Vietnamese,Spanish": 0.4147727272727273, + "Vietnamese,Malay": 0.5113636363636364, + "Chinese,Indonesian": 0.48295454545454547, + "Chinese,Filipino": 0.5056818181818182, + "Chinese,Spanish": 0.3465909090909091, + "Chinese,Malay": 0.44886363636363635, + "Indonesian,Filipino": 0.5965909090909091, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Malay": 0.6193181818181818, + "Filipino,Spanish": 0.42613636363636365, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.4318181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.18181818181818182, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.20454545454545456, + "English,Vietnamese,Malay": 0.19318181818181818, + "English,Chinese,Indonesian": 0.1590909090909091, + "English,Chinese,Filipino": 0.14204545454545456, + "English,Chinese,Spanish": 0.14772727272727273, + "English,Chinese,Malay": 0.14772727272727273, + "English,Indonesian,Filipino": 0.19318181818181818, + "English,Indonesian,Spanish": 0.22727272727272727, + "English,Indonesian,Malay": 0.23863636363636365, + "English,Filipino,Spanish": 0.19318181818181818, + "English,Filipino,Malay": 0.17045454545454544, + "English,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian": 0.3465909090909091, + "Vietnamese,Chinese,Filipino": 0.3522727272727273, + "Vietnamese,Chinese,Spanish": 0.25, + "Vietnamese,Chinese,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Filipino": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish": 0.25, + "Vietnamese,Indonesian,Malay": 0.36363636363636365, + "Vietnamese,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Spanish,Malay": 0.22727272727272727, + "Chinese,Indonesian,Filipino": 0.3522727272727273, + "Chinese,Indonesian,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay": 0.3409090909090909, + "Chinese,Filipino,Spanish": 0.2215909090909091, + "Chinese,Filipino,Malay": 0.3181818181818182, + "Chinese,Spanish,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish": 0.3068181818181818, + "Indonesian,Filipino,Malay": 0.44886363636363635, + "Indonesian,Spanish,Malay": 0.3125, + "Filipino,Spanish,Malay": 0.2897727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Chinese,Indonesian,Malay": 0.125, + "English,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.1590909090909091, + "English,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Malay": 0.25, + "Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.24431818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.36056264580453856, + "AC3_3": 0.27344048761350315, + "AC3_4": 0.19992764173321143, + "AC3_5": 0.1398886186020237, + "AC3_6": 0.09325149733485677, + "AC3_7": 0.06123124446536174 + }, + "prompt_2": { + "overall_acc": 0.30032467532467527, + "language_acc": { + "English": 0.5113636363636364, + "Vietnamese": 0.2897727272727273, + "Chinese": 0.24431818181818182, + "Indonesian": 0.24431818181818182, + "Filipino": 0.26704545454545453, + "Spanish": 0.3181818181818182, + "Malay": 0.22727272727272727 + }, + "consistency_score_2": 0.4402056277056277, + "consistency_score_3": 0.24512987012987014, + "consistency_score_4": 0.15211038961038958, + "consistency_score_5": 0.09929653679653679, + "consistency_score_6": 0.06655844155844157, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.35795454545454547, + "English,Chinese": 0.26136363636363635, + "English,Indonesian": 0.3181818181818182, + "English,Filipino": 0.32386363636363635, + "English,Spanish": 0.48863636363636365, + "English,Malay": 0.2784090909090909, + "Vietnamese,Chinese": 0.5568181818181818, + "Vietnamese,Indonesian": 0.48863636363636365, + "Vietnamese,Filipino": 0.4772727272727273, + "Vietnamese,Spanish": 0.4431818181818182, + "Vietnamese,Malay": 0.5170454545454546, + "Chinese,Indonesian": 0.48295454545454547, + "Chinese,Filipino": 0.4431818181818182, + "Chinese,Spanish": 0.3693181818181818, + "Chinese,Malay": 0.4375, + "Indonesian,Filipino": 0.5227272727272727, + "Indonesian,Spanish": 0.4375, + "Indonesian,Malay": 0.6193181818181818, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.45454545454545453 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.19318181818181818, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.24431818181818182, + "English,Vietnamese,Malay": 0.19886363636363635, + "English,Chinese,Indonesian": 0.1534090909090909, + "English,Chinese,Filipino": 0.13636363636363635, + "English,Chinese,Spanish": 0.14772727272727273, + "English,Chinese,Malay": 0.13068181818181818, + "English,Indonesian,Filipino": 0.17613636363636365, + "English,Indonesian,Spanish": 0.2215909090909091, + "English,Indonesian,Malay": 0.19886363636363635, + "English,Filipino,Spanish": 0.19318181818181818, + "English,Filipino,Malay": 0.16477272727272727, + "English,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian": 0.3181818181818182, + "Vietnamese,Chinese,Filipino": 0.29545454545454547, + "Vietnamese,Chinese,Spanish": 0.26136363636363635, + "Vietnamese,Chinese,Malay": 0.3125, + "Vietnamese,Indonesian,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay": 0.36363636363636365, + "Vietnamese,Filipino,Spanish": 0.25, + "Vietnamese,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Spanish,Malay": 0.2840909090909091, + "Chinese,Indonesian,Filipino": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish": 0.2159090909090909, + "Chinese,Filipino,Malay": 0.2840909090909091, + "Chinese,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.4090909090909091, + "Indonesian,Spanish,Malay": 0.3352272727272727, + "Filipino,Spanish,Malay": 0.2784090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "English,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Filipino,Malay": 0.125, + "English,Vietnamese,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Chinese,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Indonesian,Spanish,Malay": 0.1590909090909091, + "English,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1875, + "Vietnamese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Chinese,Indonesian,Spanish,Malay": 0.1875, + "Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.24431818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.125, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.35705388870095955, + "AC3_3": 0.2699346783673332, + "AC3_4": 0.20194059613708146, + "AC3_5": 0.14924733353632352, + "AC3_6": 0.10896736004385395, + "AC3_7": 0.07895860006252567 + }, + "prompt_3": { + "overall_acc": 0.2881493506493506, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.25, + "Chinese": 0.25, + "Indonesian": 0.25, + "Filipino": 0.26136363636363635, + "Spanish": 0.32386363636363635, + "Malay": 0.23295454545454544 + }, + "consistency_score_2": 0.45238095238095244, + "consistency_score_3": 0.25925324675324674, + "consistency_score_4": 0.16428571428571428, + "consistency_score_5": 0.10876623376623375, + "consistency_score_6": 0.07386363636363635, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3409090909090909, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.3465909090909091, + "English,Filipino": 0.29545454545454547, + "English,Spanish": 0.48863636363636365, + "English,Malay": 0.2840909090909091, + "Vietnamese,Chinese": 0.5738636363636364, + "Vietnamese,Indonesian": 0.48295454545454547, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,Spanish": 0.4147727272727273, + "Vietnamese,Malay": 0.5284090909090909, + "Chinese,Indonesian": 0.4772727272727273, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.35795454545454547, + "Chinese,Malay": 0.4659090909090909, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.6647727272727273, + "Filipino,Spanish": 0.45454545454545453, + "Filipino,Malay": 0.5738636363636364, + "Spanish,Malay": 0.48295454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.19886363636363635, + "English,Vietnamese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Filipino": 0.17045454545454544, + "English,Vietnamese,Spanish": 0.22727272727272727, + "English,Vietnamese,Malay": 0.17613636363636365, + "English,Chinese,Indonesian": 0.18181818181818182, + "English,Chinese,Filipino": 0.1534090909090909, + "English,Chinese,Spanish": 0.17045454545454544, + "English,Chinese,Malay": 0.14772727272727273, + "English,Indonesian,Filipino": 0.19318181818181818, + "English,Indonesian,Spanish": 0.22727272727272727, + "English,Indonesian,Malay": 0.22727272727272727, + "English,Filipino,Spanish": 0.2159090909090909, + "English,Filipino,Malay": 0.17613636363636365, + "English,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "Vietnamese,Chinese,Filipino": 0.3125, + "Vietnamese,Chinese,Spanish": 0.26136363636363635, + "Vietnamese,Chinese,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,Filipino": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Filipino,Spanish": 0.25, + "Vietnamese,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Spanish,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,Spanish": 0.21022727272727273, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Filipino,Spanish": 0.23863636363636365, + "Chinese,Filipino,Malay": 0.3181818181818182, + "Chinese,Spanish,Malay": 0.23863636363636365, + "Indonesian,Filipino,Spanish": 0.3068181818181818, + "Indonesian,Filipino,Malay": 0.45454545454545453, + "Indonesian,Spanish,Malay": 0.35795454545454547, + "Filipino,Spanish,Malay": 0.32386363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish": 0.125, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Chinese,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Spanish,Malay": 0.10227272727272728, + "English,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1875, + "Vietnamese,Indonesian,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.19318181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.1875, + "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2840909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.35205386497672986, + "AC3_3": 0.2729386197526381, + "AC3_4": 0.20926239167039112, + "AC3_5": 0.15792234346537817, + "AC3_6": 0.1175856094253257, + "AC3_7": 0.08685841667731393 + }, + "prompt_4": { + "overall_acc": 0.29788961038961037, + "language_acc": { + "English": 0.4943181818181818, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.26136363636363635, + "Indonesian": 0.22727272727272727, + "Filipino": 0.26136363636363635, + "Spanish": 0.3352272727272727, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.438582251082251, + "consistency_score_3": 0.24074675324675324, + "consistency_score_4": 0.1451298701298701, + "consistency_score_5": 0.09090909090909091, + "consistency_score_6": 0.05844155844155844, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.375, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.3068181818181818, + "English,Filipino": 0.3352272727272727, + "English,Spanish": 0.4659090909090909, + "English,Malay": 0.3181818181818182, + "Vietnamese,Chinese": 0.5227272727272727, + "Vietnamese,Indonesian": 0.4943181818181818, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,Spanish": 0.39204545454545453, + "Vietnamese,Malay": 0.4715909090909091, + "Chinese,Indonesian": 0.45454545454545453, + "Chinese,Filipino": 0.48863636363636365, + "Chinese,Spanish": 0.3181818181818182, + "Chinese,Malay": 0.42045454545454547, + "Indonesian,Filipino": 0.5852272727272727, + "Indonesian,Spanish": 0.4034090909090909, + "Indonesian,Malay": 0.6306818181818182, + "Filipino,Spanish": 0.42613636363636365, + "Filipino,Malay": 0.5681818181818182, + "Spanish,Malay": 0.4375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1875, + "English,Vietnamese,Indonesian": 0.19886363636363635, + "English,Vietnamese,Filipino": 0.19886363636363635, + "English,Vietnamese,Spanish": 0.21022727272727273, + "English,Vietnamese,Malay": 0.20454545454545456, + "English,Chinese,Indonesian": 0.14772727272727273, + "English,Chinese,Filipino": 0.16477272727272727, + "English,Chinese,Spanish": 0.13636363636363635, + "English,Chinese,Malay": 0.14772727272727273, + "English,Indonesian,Filipino": 0.19886363636363635, + "English,Indonesian,Spanish": 0.18181818181818182, + "English,Indonesian,Malay": 0.21022727272727273, + "English,Filipino,Spanish": 0.19886363636363635, + "English,Filipino,Malay": 0.18181818181818182, + "English,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian": 0.3125, + "Vietnamese,Chinese,Filipino": 0.3068181818181818, + "Vietnamese,Chinese,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Malay": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino": 0.3465909090909091, + "Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.32386363636363635, + "Vietnamese,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,Spanish": 0.18181818181818182, + "Chinese,Indonesian,Malay": 0.30113636363636365, + "Chinese,Filipino,Spanish": 0.21022727272727273, + "Chinese,Filipino,Malay": 0.30113636363636365, + "Chinese,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish": 0.2897727272727273, + "Indonesian,Filipino,Malay": 0.44886363636363635, + "Indonesian,Spanish,Malay": 0.2897727272727273, + "Filipino,Spanish,Malay": 0.2897727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Malay": 0.14772727272727273, + "English,Vietnamese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Indonesian,Filipino,Malay": 0.14204545454545456, + "English,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.1875, + "Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.3547972508273269, + "AC3_3": 0.2662870959166822, + "AC3_4": 0.1951728190752645, + "AC3_5": 0.13930537100231796, + "AC3_6": 0.0977132206926324, + "AC3_7": 0.07017591781138496 + }, + "prompt_5": { + "overall_acc": 0.3043831168831169, + "language_acc": { + "English": 0.5284090909090909, + "Vietnamese": 0.2897727272727273, + "Chinese": 0.2556818181818182, + "Indonesian": 0.2215909090909091, + "Filipino": 0.2556818181818182, + "Spanish": 0.3352272727272727, + "Malay": 0.24431818181818182 + }, + "consistency_score_2": 0.43019480519480524, + "consistency_score_3": 0.23392857142857143, + "consistency_score_4": 0.14237012987012984, + "consistency_score_5": 0.0909090909090909, + "consistency_score_6": 0.05925324675324676, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3806818181818182, + "English,Chinese": 0.2897727272727273, + "English,Indonesian": 0.3352272727272727, + "English,Filipino": 0.3352272727272727, + "English,Spanish": 0.4772727272727273, + "English,Malay": 0.2784090909090909, + "Vietnamese,Chinese": 0.5738636363636364, + "Vietnamese,Indonesian": 0.4715909090909091, + "Vietnamese,Filipino": 0.45454545454545453, + "Vietnamese,Spanish": 0.4147727272727273, + "Vietnamese,Malay": 0.4715909090909091, + "Chinese,Indonesian": 0.4431818181818182, + "Chinese,Filipino": 0.4431818181818182, + "Chinese,Spanish": 0.3068181818181818, + "Chinese,Malay": 0.42045454545454547, + "Indonesian,Filipino": 0.5, + "Indonesian,Spanish": 0.4090909090909091, + "Indonesian,Malay": 0.6590909090909091, + "Filipino,Spanish": 0.38636363636363635, + "Filipino,Malay": 0.5511363636363636, + "Spanish,Malay": 0.4318181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.19886363636363635, + "English,Vietnamese,Indonesian": 0.2159090909090909, + "English,Vietnamese,Filipino": 0.19318181818181818, + "English,Vietnamese,Spanish": 0.23295454545454544, + "English,Vietnamese,Malay": 0.18181818181818182, + "English,Chinese,Indonesian": 0.16477272727272727, + "English,Chinese,Filipino": 0.1534090909090909, + "English,Chinese,Spanish": 0.14204545454545456, + "English,Chinese,Malay": 0.14204545454545456, + "English,Indonesian,Filipino": 0.19318181818181818, + "English,Indonesian,Spanish": 0.21022727272727273, + "English,Indonesian,Malay": 0.22727272727272727, + "English,Filipino,Spanish": 0.19318181818181818, + "English,Filipino,Malay": 0.17045454545454544, + "English,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian": 0.3125, + "Vietnamese,Chinese,Filipino": 0.3125, + "Vietnamese,Chinese,Spanish": 0.23295454545454544, + "Vietnamese,Chinese,Malay": 0.30113636363636365, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,Spanish": 0.23863636363636365, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Filipino,Malay": 0.2897727272727273, + "Vietnamese,Spanish,Malay": 0.23863636363636365, + "Chinese,Indonesian,Filipino": 0.26704545454545453, + "Chinese,Indonesian,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Malay": 0.30113636363636365, + "Chinese,Filipino,Spanish": 0.18181818181818182, + "Chinese,Filipino,Malay": 0.2840909090909091, + "Chinese,Spanish,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish": 0.22727272727272727, + "Indonesian,Filipino,Malay": 0.4090909090909091, + "Indonesian,Spanish,Malay": 0.3125, + "Filipino,Spanish,Malay": 0.26136363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Chinese,Indonesian,Malay": 0.125, + "English,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish": 0.125, + "English,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.1875, + "Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.3565150318808633, + "AC3_3": 0.26454527825518187, + "AC3_4": 0.19399999523745745, + "AC3_5": 0.14000373339747377, + "AC3_6": 0.09919628359980821, + "AC3_7": 0.07035270152329717 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4368932038834951 + }, + "prompt_2": { + "accuracy": 0.4368932038834951 + }, + "prompt_3": { + "accuracy": 0.44660194174757284 + }, + "prompt_4": { + "accuracy": 0.44660194174757284 + }, + "prompt_5": { + "accuracy": 0.42718446601941745 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.20952380952380953 + }, + "prompt_2": { + "accuracy": 0.24761904761904763 + }, + "prompt_3": { + "accuracy": 0.2571428571428571 + }, + "prompt_4": { + "accuracy": 0.19047619047619047 + }, + "prompt_5": { + "accuracy": 0.22857142857142856 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38317757009345793 + }, + "prompt_2": { + "accuracy": 0.3644859813084112 + }, + "prompt_3": { + "accuracy": 0.38317757009345793 + }, + "prompt_4": { + "accuracy": 0.40186915887850466 + }, + "prompt_5": { + "accuracy": 0.40186915887850466 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.5, + "politics": 0.4, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.23, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.4, + "politics": 0.3, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_3": { + "accuracy": 0.23, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.3, + "history": 0.2, + "literature": 0.3, + "politics": 0.2, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.26, + "category_acc": { + "brand": 0.1, + "demographics": 0.4, + "biology": 0.3, + "history": 0.2, + "literature": 0.4, + "politics": 0.4, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.25, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.3, + "history": 0.2, + "literature": 0.3, + "politics": 0.2, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.7 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17867450505230117 + }, + "prompt_2": { + "bleu_score": 0.17980302230654807 + }, + "prompt_3": { + "bleu_score": 0.18215472310958636 + }, + "prompt_4": { + "bleu_score": 0.1853035053490514 + }, + "prompt_5": { + "bleu_score": 0.16536410084591763 + } }, "indommlu": { "prompt_1": -1, @@ -1412,179 +14717,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.12762453974486163 + }, + "prompt_2": { + "bleu_score": 0.1273039214162624 + }, + "prompt_3": { + "bleu_score": 0.12606074802563352 + }, + "prompt_4": { + "bleu_score": 0.1285673427686895 + }, + "prompt_5": { + "bleu_score": 0.11796400146034605 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0365805947784829 + }, + "prompt_2": { + "bleu_score": 0.03622566175090841 + }, + "prompt_3": { + "bleu_score": 0.03708876349318651 + }, + "prompt_4": { + "bleu_score": 0.03651562852904686 + }, + "prompt_5": { + "bleu_score": 0.035512765274673605 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.01440465137266746 + }, + "prompt_2": { + "bleu_score": 0.01598935670581948 + }, + "prompt_3": { + "bleu_score": 0.015077840137430588 + }, + "prompt_4": { + "bleu_score": 0.014699606215751218 + }, + "prompt_5": { + "bleu_score": 0.011966513408253962 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11816374114431785 + }, + "prompt_2": { + "bleu_score": 0.1181825521107562 + }, + "prompt_3": { + "bleu_score": 0.11710861754985476 + }, + "prompt_4": { + "bleu_score": 0.1182541325284562 + }, + "prompt_5": { + "bleu_score": 0.11265109625624253 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.45507584597432904 + }, + "prompt_2": { + "accuracy": 0.43990665110851807 + }, + "prompt_3": { + "accuracy": 0.4364060676779463 + }, + "prompt_4": { + "accuracy": 0.4364060676779463 + }, + "prompt_5": { + "accuracy": 0.43990665110851807 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4140150160886664, + "category_acc": { + "high_school_european_history": 0.5426829268292683, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.44696969696969696, + "medical_genetics": 0.43434343434343436, + "high_school_us_history": 0.5369458128078818, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.576271186440678, + "virology": 0.34545454545454546, + "high_school_microeconomics": 0.459915611814346, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.42394822006472493, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3416370106761566, + "philosophy": 0.4161290322580645, + "professional_medicine": 0.4132841328413284, + "nutrition": 0.46557377049180326, + "global_facts": 0.30303030303030304, + "machine_learning": 0.24324324324324326, + "security_studies": 0.5573770491803278, + "public_relations": 0.47706422018348627, + "professional_psychology": 0.37152209492635024, + "prehistory": 0.4055727554179567, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.4, + "college_medicine": 0.4186046511627907, + "high_school_government_and_politics": 0.5416666666666666, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5246913580246914, + "high_school_geography": 0.5482233502538071, + "elementary_mathematics": 0.27851458885941643, + "human_aging": 0.45495495495495497, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.5680147058823529, + "formal_logic": 0.36, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.5666666666666667, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.36363636363636365, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.5127877237851662, + "high_school_chemistry": 0.2871287128712871, + "marketing": 0.7167381974248928, + "professional_law": 0.33724722765818654, + "management": 0.6372549019607843, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.38235294117647056, + "sociology": 0.565, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.43958868894601544, + "computer_security": 0.47474747474747475, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.4927536231884058, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.3841059602649007, + "college_biology": 0.3706293706293706 + } + }, + "prompt_2": { + "accuracy": 0.41344297461565965, + "category_acc": { + "high_school_european_history": 0.573170731707317, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.4431818181818182, + "medical_genetics": 0.4444444444444444, + "high_school_us_history": 0.5467980295566502, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.5889830508474576, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.43037974683544306, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.43042071197411, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.4161290322580645, + "professional_medicine": 0.39114391143911437, + "nutrition": 0.4557377049180328, + "global_facts": 0.29292929292929293, + "machine_learning": 0.3153153153153153, + "security_studies": 0.5532786885245902, + "public_relations": 0.46788990825688076, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.39009287925696595, + "anatomy": 0.417910447761194, + "human_sexuality": 0.4230769230769231, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.5260416666666666, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5308641975308642, + "high_school_geography": 0.5076142131979695, + "elementary_mathematics": 0.2838196286472148, + "human_aging": 0.4009009009009009, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.5477941176470589, + "formal_logic": 0.304, + "high_school_statistics": 0.33488372093023255, + "international_law": 0.525, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.32905982905982906, + "miscellaneous": 0.5038363171355499, + "high_school_chemistry": 0.27722772277227725, + "marketing": 0.703862660944206, + "professional_law": 0.33268101761252444, + "management": 0.6078431372549019, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.4672897196261682, + "world_religions": 0.38235294117647056, + "sociology": 0.575, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.4241645244215938, + "computer_security": 0.5050505050505051, + "moral_scenarios": 0.27740492170022374, + "moral_disputes": 0.5043478260869565, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.40397350993377484, + "college_biology": 0.34265734265734266 + } + }, + "prompt_3": { + "accuracy": 0.4150160886664283, + "category_acc": { + "high_school_european_history": 0.5792682926829268, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.4583333333333333, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.5467980295566502, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.5932203389830508, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.45147679324894513, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.42394822006472493, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.432258064516129, + "professional_medicine": 0.4022140221402214, + "nutrition": 0.46885245901639344, + "global_facts": 0.2828282828282828, + "machine_learning": 0.32432432432432434, + "security_studies": 0.5614754098360656, + "public_relations": 0.47706422018348627, + "professional_psychology": 0.3698854337152209, + "prehistory": 0.39628482972136225, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.4153846153846154, + "college_medicine": 0.42441860465116277, + "high_school_government_and_politics": 0.53125, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5246913580246914, + "high_school_geography": 0.5126903553299492, + "elementary_mathematics": 0.2838196286472148, + "human_aging": 0.4189189189189189, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.5514705882352942, + "formal_logic": 0.312, + "high_school_statistics": 0.32558139534883723, + "international_law": 0.5333333333333333, + "high_school_mathematics": 0.2936802973977695, + "high_school_computer_science": 0.3939393939393939, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.5, + "high_school_chemistry": 0.31683168316831684, + "marketing": 0.7081545064377682, + "professional_law": 0.33920417482061316, + "management": 0.6470588235294118, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.40588235294117647, + "sociology": 0.585, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.442159383033419, + "computer_security": 0.5050505050505051, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.5072463768115942, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.39072847682119205, + "college_biology": 0.3146853146853147 + } + }, + "prompt_4": { + "accuracy": 0.414229531641044, + "category_acc": { + "high_school_european_history": 0.5487804878048781, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.45454545454545453, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.5566502463054187, + "high_school_physics": 0.3, + "high_school_world_history": 0.5847457627118644, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.45147679324894513, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.44660194174757284, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.36298932384341637, + "philosophy": 0.41935483870967744, + "professional_medicine": 0.4059040590405904, + "nutrition": 0.46557377049180326, + "global_facts": 0.2828282828282828, + "machine_learning": 0.27927927927927926, + "security_studies": 0.5614754098360656, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.3617021276595745, + "prehistory": 0.3993808049535604, + "anatomy": 0.40298507462686567, + "human_sexuality": 0.4230769230769231, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.5416666666666666, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5308641975308642, + "high_school_geography": 0.5329949238578681, + "elementary_mathematics": 0.2891246684350133, + "human_aging": 0.4144144144144144, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.5569852941176471, + "formal_logic": 0.304, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.5333333333333333, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.5127877237851662, + "high_school_chemistry": 0.2871287128712871, + "marketing": 0.6995708154506438, + "professional_law": 0.33463796477495106, + "management": 0.6176470588235294, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.4672897196261682, + "world_religions": 0.38235294117647056, + "sociology": 0.59, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.42930591259640105, + "computer_security": 0.48484848484848486, + "moral_scenarios": 0.25838926174496646, + "moral_disputes": 0.5101449275362319, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.4105960264900662, + "college_biology": 0.34265734265734266 + } + }, + "prompt_5": { + "accuracy": 0.42002145155523773, + "category_acc": { + "high_school_european_history": 0.573170731707317, + "business_ethics": 0.5252525252525253, + "clinical_knowledge": 0.45075757575757575, + "medical_genetics": 0.42424242424242425, + "high_school_us_history": 0.5467980295566502, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.597457627118644, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.4345991561181435, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.44660194174757284, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.43870967741935485, + "professional_medicine": 0.3800738007380074, + "nutrition": 0.4721311475409836, + "global_facts": 0.29292929292929293, + "machine_learning": 0.3153153153153153, + "security_studies": 0.5614754098360656, + "public_relations": 0.46788990825688076, + "professional_psychology": 0.3862520458265139, + "prehistory": 0.4148606811145511, + "anatomy": 0.417910447761194, + "human_sexuality": 0.4153846153846154, + "college_medicine": 0.436046511627907, + "high_school_government_and_politics": 0.53125, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.5532994923857868, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.42342342342342343, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.5588235294117647, + "formal_logic": 0.312, + "high_school_statistics": 0.32558139534883723, + "international_law": 0.5666666666666667, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.3939393939393939, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.5089514066496164, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.7296137339055794, + "professional_law": 0.33659491193737767, + "management": 0.6078431372549019, + "college_physics": 0.3564356435643564, + "jurisprudence": 0.5046728971962616, + "world_religions": 0.3764705882352941, + "sociology": 0.595, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.42930591259640105, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.27628635346756153, + "moral_disputes": 0.5014492753623189, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.39072847682119205, + "college_biology": 0.34965034965034963 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24442793462109955 + }, + "prompt_2": { + "accuracy": 0.2473997028231798 + }, + "prompt_3": { + "accuracy": 0.2652303120356612 + }, + "prompt_4": { + "accuracy": 0.25928677563150077 + }, + "prompt_5": { + "accuracy": 0.24888558692421991 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2503113325031133, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.08333333333333333, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.21428571428571427, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.26666666666666666, + "business_administration": 0.18421052631578946, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.25, + "high_school_geography": 0.125, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.2962962962962963, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.375, + "high_school_history": 0.12, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.19230769230769232, + "sports_science": 0.16666666666666666, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.125, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.18518518518518517, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.25925925925925924, + "physician": 0.2962962962962963 + } + }, + "prompt_2": { + "accuracy": 0.2546699875466999, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.2833333333333333, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.25925925925925924, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.12, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.25, + "sports_science": 0.16666666666666666, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.25, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.16666666666666666, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2777777777777778 + } + }, + "prompt_3": { + "accuracy": 0.25529265255292655, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.23809523809523808, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.25, + "college_economics": 0.2833333333333333, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.07142857142857142, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.3333333333333333, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.12, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.25, + "sports_science": 0.16666666666666666, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.18518518518518517, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.25925925925925924, + "physician": 0.2962962962962963 + } + }, + "prompt_4": { + "accuracy": 0.24719800747198006, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.08333333333333333, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.31666666666666665, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.25, + "high_school_geography": 0.125, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.25925925925925924, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.25, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.2, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.19230769230769232, + "sports_science": 0.125, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.125, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.18518518518518517, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2777777777777778, + "physician": 0.2777777777777778 + } + }, + "prompt_5": { + "accuracy": 0.24906600249066002, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.21428571428571427, + "college_physics": 0.08333333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.26666666666666666, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.14285714285714285, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.25925925925925924, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.20588235294117646, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.12, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.17307692307692307, + "sports_science": 0.16666666666666666, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.16666666666666666, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.25925925925925924, + "physician": 0.3333333333333333 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26881720430107525 + }, + "prompt_2": { + "accuracy": 0.27956989247311825 + }, + "prompt_3": { + "accuracy": 0.26523297491039427 + }, + "prompt_4": { + "accuracy": 0.27956989247311825 + }, + "prompt_5": { + "accuracy": 0.2867383512544803 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2532377827663616, + "category_acc": { + "agronomy": 0.2781065088757396, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25625, + "astronomy": 0.22424242424242424, + "business_ethics": 0.27751196172248804, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.2900763358778626, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.21362229102167182, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.21940928270042195, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.3177570093457944, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.19444444444444445, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.22641509433962265, + "college_medicine": 0.2600732600732601, + "computer_science": 0.2549019607843137, + "computer_security": 0.19298245614035087, + "conceptual_physics": 0.24489795918367346, + "construction_project_management": 0.2949640287769784, + "economics": 0.23270440251572327, + "education": 0.26380368098159507, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.25630252100840334, + "elementary_mathematics": 0.3173913043478261, + "ethnology": 0.2740740740740741, + "food_science": 0.2937062937062937, + "genetics": 0.23295454545454544, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.1958041958041958, + "human_sexuality": 0.23809523809523808, + "international_law": 0.2648648648648649, + "journalism": 0.28488372093023256, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.2570093457943925, + "logical": 0.2682926829268293, + "machine_learning": 0.2459016393442623, + "management": 0.24285714285714285, + "marketing": 0.2388888888888889, + "marxist_theory": 0.2222222222222222, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2482758620689655, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.25862068965517243, + "security_study": 0.24444444444444444, + "sociology": 0.21238938053097345, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.2485207100591716, + "world_history": 0.2236024844720497, + "world_religions": 0.26875 + } + }, + "prompt_2": { + "accuracy": 0.24926610257295803, + "category_acc": { + "agronomy": 0.23076923076923078, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2804878048780488, + "arts": 0.2625, + "astronomy": 0.23636363636363636, + "business_ethics": 0.24880382775119617, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.2595419847328244, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.25077399380804954, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.22346368715083798, + "clinical_knowledge": 0.2109704641350211, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.28703703703703703, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.2169811320754717, + "college_medicine": 0.2271062271062271, + "computer_science": 0.27450980392156865, + "computer_security": 0.21052631578947367, + "conceptual_physics": 0.24489795918367346, + "construction_project_management": 0.26618705035971224, + "economics": 0.23270440251572327, + "education": 0.24539877300613497, + "electrical_engineering": 0.2441860465116279, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.24242424242424243, + "elementary_information_and_technology": 0.25210084033613445, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.28888888888888886, + "food_science": 0.25874125874125875, + "genetics": 0.25, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.23809523809523808, + "international_law": 0.2648648648648649, + "journalism": 0.26744186046511625, + "jurisprudence": 0.24574209245742093, + "legal_and_moral_basis": 0.22897196261682243, + "logical": 0.2845528455284553, + "machine_learning": 0.22131147540983606, + "management": 0.22857142857142856, + "marketing": 0.22777777777777777, + "marxist_theory": 0.2328042328042328, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.25517241379310346, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.2342857142857143, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.25, + "public_relations": 0.21839080459770116, + "security_study": 0.24444444444444444, + "sociology": 0.252212389380531, + "sports_science": 0.24848484848484848, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.22485207100591717, + "world_history": 0.22981366459627328, + "world_religions": 0.25 + } + }, + "prompt_3": { + "accuracy": 0.2540148506302884, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.1951219512195122, + "arts": 0.25625, + "astronomy": 0.28484848484848485, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.2375, + "chinese_driving_rule": 0.24427480916030533, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.19626168224299065, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.2011173184357542, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.2222222222222222, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.2673992673992674, + "computer_science": 0.2647058823529412, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.24489795918367346, + "construction_project_management": 0.28776978417266186, + "economics": 0.25157232704402516, + "education": 0.25153374233128833, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.3235294117647059, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.2740740740740741, + "food_science": 0.26573426573426573, + "genetics": 0.2840909090909091, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.3076923076923077, + "human_sexuality": 0.2698412698412698, + "international_law": 0.24324324324324326, + "journalism": 0.26744186046511625, + "jurisprudence": 0.27007299270072993, + "legal_and_moral_basis": 0.2570093457943925, + "logical": 0.2764227642276423, + "machine_learning": 0.23770491803278687, + "management": 0.22380952380952382, + "marketing": 0.19444444444444445, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.2689655172413793, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.24, + "professional_law": 0.23222748815165878, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.25, + "public_relations": 0.23563218390804597, + "security_study": 0.23703703703703705, + "sociology": 0.23893805309734514, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.23668639053254437, + "world_history": 0.2670807453416149, + "world_religions": 0.26875 + } + }, + "prompt_4": { + "accuracy": 0.25375582800897944, + "category_acc": { + "agronomy": 0.23076923076923078, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.24390243902439024, + "arts": 0.25625, + "astronomy": 0.22424242424242424, + "business_ethics": 0.24880382775119617, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.24427480916030533, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.1588785046728972, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.2737430167597765, + "clinical_knowledge": 0.28270042194092826, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.2037037037037037, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.2564102564102564, + "computer_science": 0.24019607843137256, + "computer_security": 0.23976608187134502, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.2446043165467626, + "economics": 0.22641509433962265, + "education": 0.25153374233128833, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.2814814814814815, + "food_science": 0.25874125874125875, + "genetics": 0.2556818181818182, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.2619047619047619, + "international_law": 0.2864864864864865, + "journalism": 0.26744186046511625, + "jurisprudence": 0.25304136253041365, + "legal_and_moral_basis": 0.2757009345794392, + "logical": 0.2601626016260163, + "machine_learning": 0.22950819672131148, + "management": 0.23809523809523808, + "marketing": 0.25, + "marxist_theory": 0.2328042328042328, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2620689655172414, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.25862068965517243, + "public_relations": 0.25287356321839083, + "security_study": 0.22962962962962963, + "sociology": 0.25663716814159293, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.2485207100591716, + "world_history": 0.2670807453416149, + "world_religions": 0.2625 + } + }, + "prompt_5": { + "accuracy": 0.2557416681056812, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.25, + "arts": 0.2375, + "astronomy": 0.23636363636363636, + "business_ethics": 0.2679425837320574, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.25190839694656486, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.24581005586592178, + "clinical_knowledge": 0.21940928270042195, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.25, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.20754716981132076, + "college_medicine": 0.26373626373626374, + "computer_science": 0.2696078431372549, + "computer_security": 0.21052631578947367, + "conceptual_physics": 0.22448979591836735, + "construction_project_management": 0.26618705035971224, + "economics": 0.25157232704402516, + "education": 0.27607361963190186, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.30158730158730157, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.23949579831932774, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.28888888888888886, + "food_science": 0.2727272727272727, + "genetics": 0.25, + "global_facts": 0.2348993288590604, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.25, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.2867132867132867, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2972972972972973, + "journalism": 0.26744186046511625, + "jurisprudence": 0.26034063260340634, + "legal_and_moral_basis": 0.24299065420560748, + "logical": 0.2764227642276423, + "machine_learning": 0.23770491803278687, + "management": 0.24285714285714285, + "marketing": 0.22777777777777777, + "marxist_theory": 0.21164021164021163, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.32413793103448274, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.27155172413793105, + "public_relations": 0.23563218390804597, + "security_study": 0.2222222222222222, + "sociology": 0.23008849557522124, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.2485207100591716, + "world_history": 0.2546583850931677, + "world_religions": 0.2375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3939393939393939 + }, + "prompt_2": { + "accuracy": 0.3333333333333333 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.30303030303030304 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27045454545454545 + }, + "prompt_2": { + "accuracy": 0.30227272727272725 + }, + "prompt_3": { + "accuracy": 0.23863636363636365 + }, + "prompt_4": { + "accuracy": 0.325 + }, + "prompt_5": { + "accuracy": 0.2863636363636364 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3447457627118644 + }, + "prompt_2": { + "accuracy": 0.35084745762711866 + }, + "prompt_3": { + "accuracy": 0.3288135593220339 + }, + "prompt_4": { + "accuracy": 0.3505084745762712 + }, + "prompt_5": { + "accuracy": 0.3325423728813559 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27412116679132387 + }, + "prompt_2": { + "accuracy": 0.27673896783844426 + }, + "prompt_3": { + "accuracy": 0.27673896783844426 + }, + "prompt_4": { + "accuracy": 0.27225130890052357 + }, + "prompt_5": { + "accuracy": 0.2782348541510845 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8745712885840274 + }, + "prompt_2": { + "accuracy": 0.8682018618324351 + }, + "prompt_3": { + "accuracy": 0.8740813326800588 + }, + "prompt_4": { + "accuracy": 0.8716315531602156 + }, + "prompt_5": { + "accuracy": 0.8721215090641842 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.5185502413989868, + "rouge2": 0.26902695086776107, + "rougeL": 0.4326615813336434, + "avg_rouge": 0.4067462578667971 + }, + "prompt_2": { + "rouge1": 0.5195081437230504, + "rouge2": 0.26978919306949084, + "rougeL": 0.4321525419405497, + "avg_rouge": 0.407149959577697 + }, + "prompt_3": { + "rouge1": 0.5164627525058035, + "rouge2": 0.2634573732082798, + "rougeL": 0.428210120875558, + "avg_rouge": 0.4027100821965471 + }, + "prompt_4": { + "rouge1": 0.5187636030296008, + "rouge2": 0.2682403483474693, + "rougeL": 0.431688121834721, + "avg_rouge": 0.40623069107059706 + }, + "prompt_5": { + "rouge1": 0.5174920588119234, + "rouge2": 0.2681096266000787, + "rougeL": 0.43091196555067857, + "avg_rouge": 0.40550455032089355 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.24473106678549608, + "rouge2": 0.07497275887631535, + "rougeL": 0.18455304751304932, + "avg_rouge": 0.16808562439162025 + }, + "prompt_2": { + "rouge1": 0.24400060316989267, + "rouge2": 0.07411740576777581, + "rougeL": 0.1859320919925664, + "avg_rouge": 0.16801670031007832 + }, + "prompt_3": { + "rouge1": 0.23258485291513853, + "rouge2": 0.06633834569497556, + "rougeL": 0.17354782143627387, + "avg_rouge": 0.15749034001546267 + }, + "prompt_4": { + "rouge1": 0.2330099006258128, + "rouge2": 0.06574260390432406, + "rougeL": 0.17380664311339997, + "avg_rouge": 0.15751971588117894 + }, + "prompt_5": { + "rouge1": 0.24191879995962773, + "rouge2": 0.06950787790931097, + "rougeL": 0.18438164940683938, + "avg_rouge": 0.16526944242525934 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9529816513761468 + }, + "prompt_2": { + "accuracy": 0.9506880733944955 + }, + "prompt_3": { + "accuracy": 0.9495412844036697 + }, + "prompt_4": { + "accuracy": 0.9529816513761468 + }, + "prompt_5": { + "accuracy": 0.948394495412844 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.716203259827421 + }, + "prompt_2": { + "accuracy": 0.7277085330776606 + }, + "prompt_3": { + "accuracy": 0.7229146692233941 + }, + "prompt_4": { + "accuracy": 0.713326941514861 + }, + "prompt_5": { + "accuracy": 0.7248322147651006 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.824 + }, + "prompt_2": { + "accuracy": 0.853 + }, + "prompt_3": { + "accuracy": 0.8215 + }, + "prompt_4": { + "accuracy": 0.8275 + }, + "prompt_5": { + "accuracy": 0.8465 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.703 + }, + "prompt_2": { + "accuracy": 0.694 + }, + "prompt_3": { + "accuracy": 0.708 + }, + "prompt_4": { + "accuracy": 0.705 + }, + "prompt_5": { + "accuracy": 0.7005 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.82 + }, + "prompt_2": { + "accuracy": 0.8055 + }, + "prompt_3": { + "accuracy": 0.761 + }, + "prompt_4": { + "accuracy": 0.865 + }, + "prompt_5": { + "accuracy": 0.895 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5915492957746479 + }, + "prompt_2": { + "accuracy": 0.6056338028169014 + }, + "prompt_3": { + "accuracy": 0.5774647887323944 + }, + "prompt_4": { + "accuracy": 0.5633802816901409 + }, + "prompt_5": { + "accuracy": 0.5633802816901409 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8483754512635379 + }, + "prompt_2": { + "accuracy": 0.8267148014440433 + }, + "prompt_3": { + "accuracy": 0.8411552346570397 + }, + "prompt_4": { + "accuracy": 0.8122743682310469 + }, + "prompt_5": { + "accuracy": 0.8158844765342961 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7696078431372549 + }, + "prompt_2": { + "accuracy": 0.8137254901960784 + }, + "prompt_3": { + "accuracy": 0.7647058823529411 + }, + "prompt_4": { + "accuracy": 0.8137254901960784 + }, + "prompt_5": { + "accuracy": 0.803921568627451 + } } }, "five_shot": { @@ -1694,53 +16189,1733 @@ "model_link": "https://huggingface.co/google/flan-t5-xl", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.36095238095238097, + "language_acc": { + "Malay": 0.34, + "English": 0.5666666666666667, + "Vietnamese": 0.24666666666666667, + "Spanish": 0.44666666666666666, + "Indonesian": 0.37333333333333335, + "Filipino": 0.2866666666666667, + "Chinese": 0.26666666666666666 + }, + "consistency_score_2": 0.5158730158730158, + "consistency_score_3": 0.3371428571428572, + "consistency_score_4": 0.24438095238095237, + "consistency_score_5": 0.18666666666666665, + "consistency_score_6": 0.14761904761904762, + "consistency_score_7": 0.12, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.48, + "Malay,Vietnamese": 0.5266666666666666, + "Malay,Spanish": 0.5, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Filipino": 0.5866666666666667, + "Malay,Chinese": 0.5533333333333333, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.6, + "English,Indonesian": 0.4866666666666667, + "English,Filipino": 0.5266666666666666, + "English,Chinese": 0.34, + "Vietnamese,Spanish": 0.44, + "Vietnamese,Indonesian": 0.5266666666666666, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Chinese": 0.66, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.49333333333333335, + "Spanish,Chinese": 0.4266666666666667, + "Indonesian,Filipino": 0.5266666666666666, + "Indonesian,Chinese": 0.5666666666666667, + "Filipino,Chinese": 0.5066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26666666666666666, + "Malay,English,Spanish": 0.36, + "Malay,English,Indonesian": 0.36666666666666664, + "Malay,English,Filipino": 0.36, + "Malay,English,Chinese": 0.26, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.4, + "Malay,Vietnamese,Filipino": 0.37333333333333335, + "Malay,Vietnamese,Chinese": 0.42, + "Malay,Spanish,Indonesian": 0.41333333333333333, + "Malay,Spanish,Filipino": 0.36, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.4533333333333333, + "Malay,Indonesian,Chinese": 0.44, + "Malay,Filipino,Chinese": 0.38666666666666666, + "English,Vietnamese,Spanish": 0.28, + "English,Vietnamese,Indonesian": 0.2733333333333333, + "English,Vietnamese,Filipino": 0.26666666666666666, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian": 0.36666666666666664, + "English,Spanish,Filipino": 0.38, + "English,Spanish,Chinese": 0.26, + "English,Indonesian,Filipino": 0.3333333333333333, + "English,Indonesian,Chinese": 0.2866666666666667, + "English,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Vietnamese,Filipino,Chinese": 0.38, + "Spanish,Indonesian,Filipino": 0.34, + "Spanish,Indonesian,Chinese": 0.32666666666666666, + "Spanish,Filipino,Chinese": 0.26666666666666666, + "Indonesian,Filipino,Chinese": 0.35333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.22, + "Malay,English,Vietnamese,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian": 0.3, + "Malay,English,Spanish,Filipino": 0.28, + "Malay,English,Spanish,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.3, + "Malay,Vietnamese,Indonesian,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.29333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.3, + "Malay,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.31333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Spanish,Indonesian,Chinese": 0.23333333333333334, + "English,Spanish,Filipino,Chinese": 0.2, + "English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Vietnamese,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.16, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + } + }, + "AC3_2": 0.4247267335124726, + "AC3_3": 0.3486415902833266, + "AC3_4": 0.29144235663020845, + "AC3_5": 0.24607536227390503, + "AC3_6": 0.20954164433190306, + "AC3_7": 0.18011881184373768 + }, + "prompt_2": { + "overall_acc": 0.36, + "language_acc": { + "Malay": 0.36, + "English": 0.5066666666666667, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.4533333333333333, + "Indonesian": 0.38, + "Filipino": 0.28, + "Chinese": 0.2866666666666667 + }, + "consistency_score_2": 0.5314285714285715, + "consistency_score_3": 0.3499047619047619, + "consistency_score_4": 0.2512380952380952, + "consistency_score_5": 0.1888888888888889, + "consistency_score_6": 0.1457142857142857, + "consistency_score_7": 0.11333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5, + "Malay,Vietnamese": 0.5266666666666666, + "Malay,Spanish": 0.52, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Filipino": 0.6266666666666667, + "Malay,Chinese": 0.5866666666666667, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.6266666666666667, + "English,Indonesian": 0.52, + "English,Filipino": 0.48, + "English,Chinese": 0.37333333333333335, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.6466666666666666, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.5, + "Spanish,Chinese": 0.43333333333333335, + "Indonesian,Filipino": 0.5733333333333334, + "Indonesian,Chinese": 0.58, + "Filipino,Chinese": 0.5533333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26666666666666666, + "Malay,English,Spanish": 0.38666666666666666, + "Malay,English,Indonesian": 0.38666666666666666, + "Malay,English,Filipino": 0.35333333333333333, + "Malay,English,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.41333333333333333, + "Malay,Vietnamese,Filipino": 0.38666666666666666, + "Malay,Vietnamese,Chinese": 0.42, + "Malay,Spanish,Indonesian": 0.41333333333333333, + "Malay,Spanish,Filipino": 0.38, + "Malay,Spanish,Chinese": 0.31333333333333335, + "Malay,Indonesian,Filipino": 0.48, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Filipino,Chinese": 0.4266666666666667, + "English,Vietnamese,Spanish": 0.2866666666666667, + "English,Vietnamese,Indonesian": 0.29333333333333333, + "English,Vietnamese,Filipino": 0.25333333333333335, + "English,Vietnamese,Chinese": 0.26, + "English,Spanish,Indonesian": 0.3933333333333333, + "English,Spanish,Filipino": 0.36, + "English,Spanish,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Indonesian,Chinese": 0.31333333333333335, + "English,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino": 0.37333333333333335, + "Vietnamese,Indonesian,Chinese": 0.43333333333333335, + "Vietnamese,Filipino,Chinese": 0.4066666666666667, + "Spanish,Indonesian,Filipino": 0.3466666666666667, + "Spanish,Indonesian,Chinese": 0.31333333333333335, + "Spanish,Filipino,Chinese": 0.2866666666666667, + "Indonesian,Filipino,Chinese": 0.3933333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22, + "Malay,English,Vietnamese,Indonesian": 0.24, + "Malay,English,Vietnamese,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.28, + "Malay,English,Spanish,Chinese": 0.22666666666666666, + "Malay,English,Indonesian,Filipino": 0.29333333333333333, + "Malay,English,Indonesian,Chinese": 0.25333333333333335, + "Malay,English,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Chinese": 0.24, + "Malay,Vietnamese,Indonesian,Filipino": 0.32, + "Malay,Vietnamese,Indonesian,Chinese": 0.34, + "Malay,Vietnamese,Filipino,Chinese": 0.31333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.24, + "English,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Spanish,Filipino,Chinese": 0.2, + "English,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.16, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + } + }, + "AC3_2": 0.4292307691826183, + "AC3_3": 0.3548806009695969, + "AC3_4": 0.2959426612167747, + "AC3_5": 0.24777327930708587, + "AC3_6": 0.2074576270776214, + "AC3_7": 0.17239436616076173 + }, + "prompt_3": { + "overall_acc": 0.359047619047619, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.5266666666666666, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.44, + "Indonesian": 0.3933333333333333, + "Filipino": 0.3, + "Chinese": 0.26666666666666666 + }, + "consistency_score_2": 0.54, + "consistency_score_3": 0.3615238095238096, + "consistency_score_4": 0.2643809523809524, + "consistency_score_5": 0.20380952380952383, + "consistency_score_6": 0.16285714285714284, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.5533333333333333, + "Malay,Spanish": 0.5066666666666667, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.6666666666666666, + "Malay,Chinese": 0.58, + "English,Vietnamese": 0.38666666666666666, + "English,Spanish": 0.5933333333333334, + "English,Indonesian": 0.5333333333333333, + "English,Filipino": 0.5066666666666667, + "English,Chinese": 0.38, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.66, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.52, + "Spanish,Chinese": 0.43333333333333335, + "Indonesian,Filipino": 0.6, + "Indonesian,Chinese": 0.6, + "Filipino,Chinese": 0.54 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.36, + "Malay,English,Indonesian": 0.4066666666666667, + "Malay,English,Filipino": 0.38, + "Malay,English,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.44666666666666666, + "Malay,Vietnamese,Filipino": 0.42, + "Malay,Vietnamese,Chinese": 0.44, + "Malay,Spanish,Indonesian": 0.41333333333333333, + "Malay,Spanish,Filipino": 0.3933333333333333, + "Malay,Spanish,Chinese": 0.3, + "Malay,Indonesian,Filipino": 0.5266666666666666, + "Malay,Indonesian,Chinese": 0.47333333333333333, + "Malay,Filipino,Chinese": 0.44, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.30666666666666664, + "English,Vietnamese,Filipino": 0.28, + "English,Vietnamese,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian": 0.38666666666666666, + "English,Spanish,Filipino": 0.37333333333333335, + "English,Spanish,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.37333333333333335, + "English,Indonesian,Chinese": 0.3333333333333333, + "English,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.32666666666666666, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.46, + "Vietnamese,Filipino,Chinese": 0.4, + "Spanish,Indonesian,Filipino": 0.37333333333333335, + "Spanish,Indonesian,Chinese": 0.32666666666666666, + "Spanish,Filipino,Chinese": 0.2866666666666667, + "Indonesian,Filipino,Chinese": 0.41333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.26, + "Malay,English,Vietnamese,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Chinese": 0.22, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Indonesian,Filipino": 0.32666666666666666, + "Malay,English,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian": 0.28, + "Malay,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Malay,Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,Spanish,Filipino,Chinese": 0.24, + "Malay,Indonesian,Filipino,Chinese": 0.36666666666666664, + "English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.26, + "English,Vietnamese,Filipino,Chinese": 0.21333333333333335, + "English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "English,Spanish,Indonesian,Chinese": 0.24666666666666667, + "English,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Vietnamese,Spanish,Indonesian,Chinese": 0.26, + "Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.22, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + } + }, + "AC3_2": 0.43131355927405934, + "AC3_3": 0.36028145961290353, + "AC3_4": 0.30452679203064886, + "AC3_5": 0.2600209491118084, + "AC3_6": 0.22407716366926567, + "AC3_7": 0.19445519016037818 + }, + "prompt_4": { + "overall_acc": 0.35999999999999993, + "language_acc": { + "Malay": 0.35333333333333333, + "English": 0.5266666666666666, + "Vietnamese": 0.24, + "Spanish": 0.4533333333333333, + "Indonesian": 0.38666666666666666, + "Filipino": 0.3, + "Chinese": 0.26 + }, + "consistency_score_2": 0.5339682539682539, + "consistency_score_3": 0.3533333333333333, + "consistency_score_4": 0.25657142857142856, + "consistency_score_5": 0.19587301587301587, + "consistency_score_6": 0.15333333333333335, + "consistency_score_7": 0.12, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5133333333333333, + "Malay,Vietnamese": 0.5466666666666666, + "Malay,Spanish": 0.49333333333333335, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.66, + "Malay,Chinese": 0.5733333333333334, + "English,Vietnamese": 0.36, + "English,Spanish": 0.6, + "English,Indonesian": 0.5133333333333333, + "English,Filipino": 0.4866666666666667, + "English,Chinese": 0.37333333333333335, + "Vietnamese,Spanish": 0.42, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.68, + "Spanish,Indonesian": 0.5333333333333333, + "Spanish,Filipino": 0.49333333333333335, + "Spanish,Chinese": 0.44, + "Indonesian,Filipino": 0.5866666666666667, + "Indonesian,Chinese": 0.5866666666666667, + "Filipino,Chinese": 0.5533333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2733333333333333, + "Malay,English,Spanish": 0.37333333333333335, + "Malay,English,Indonesian": 0.4066666666666667, + "Malay,English,Filipino": 0.37333333333333335, + "Malay,English,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish": 0.3, + "Malay,Vietnamese,Indonesian": 0.43333333333333335, + "Malay,Vietnamese,Filipino": 0.42, + "Malay,Vietnamese,Chinese": 0.44, + "Malay,Spanish,Indonesian": 0.4066666666666667, + "Malay,Spanish,Filipino": 0.37333333333333335, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.5066666666666667, + "Malay,Indonesian,Chinese": 0.4666666666666667, + "Malay,Filipino,Chinese": 0.44, + "English,Vietnamese,Spanish": 0.26, + "English,Vietnamese,Indonesian": 0.29333333333333333, + "English,Vietnamese,Filipino": 0.26, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Spanish,Indonesian": 0.37333333333333335, + "English,Spanish,Filipino": 0.36, + "English,Spanish,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.35333333333333333, + "English,Indonesian,Chinese": 0.31333333333333335, + "English,Filipino,Chinese": 0.28, + "Vietnamese,Spanish,Indonesian": 0.32, + "Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.31333333333333335, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.4666666666666667, + "Vietnamese,Filipino,Chinese": 0.41333333333333333, + "Spanish,Indonesian,Filipino": 0.3466666666666667, + "Spanish,Indonesian,Chinese": 0.32, + "Spanish,Filipino,Chinese": 0.28, + "Indonesian,Filipino,Chinese": 0.4066666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.25333333333333335, + "Malay,English,Vietnamese,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.28, + "Malay,English,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Indonesian,Filipino": 0.31333333333333335, + "Malay,English,Indonesian,Chinese": 0.26, + "Malay,English,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.36, + "Malay,Vietnamese,Filipino,Chinese": 0.34, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.36666666666666664, + "English,Vietnamese,Spanish,Indonesian": 0.22, + "English,Vietnamese,Spanish,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Spanish,Indonesian,Chinese": 0.23333333333333334, + "English,Spanish,Filipino,Chinese": 0.2, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.32, + "Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino": 0.24, + "Malay,English,Spanish,Indonesian,Chinese": 0.2, + "Malay,English,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + } + }, + "AC3_2": 0.4300568181337116, + "AC3_3": 0.3566355139686959, + "AC3_4": 0.29961075064649495, + "AC3_5": 0.25370645340952735, + "AC3_6": 0.21506493502303928, + "AC3_7": 0.17999999996249996 + }, + "prompt_5": { + "overall_acc": 0.36476190476190473, + "language_acc": { + "Malay": 0.35333333333333333, + "English": 0.54, + "Vietnamese": 0.26, + "Spanish": 0.4533333333333333, + "Indonesian": 0.37333333333333335, + "Filipino": 0.30666666666666664, + "Chinese": 0.26666666666666666 + }, + "consistency_score_2": 0.4863492063492064, + "consistency_score_3": 0.29676190476190467, + "consistency_score_4": 0.19504761904761903, + "consistency_score_5": 0.13238095238095238, + "consistency_score_6": 0.09238095238095237, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.47333333333333333, + "Malay,Vietnamese": 0.5133333333333333, + "Malay,Spanish": 0.49333333333333335, + "Malay,Indonesian": 0.6733333333333333, + "Malay,Filipino": 0.62, + "Malay,Chinese": 0.4266666666666667, + "English,Vietnamese": 0.3466666666666667, + "English,Spanish": 0.6, + "English,Indonesian": 0.4866666666666667, + "English,Filipino": 0.5, + "English,Chinese": 0.32, + "Vietnamese,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian": 0.5266666666666666, + "Vietnamese,Filipino": 0.5266666666666666, + "Vietnamese,Chinese": 0.47333333333333333, + "Spanish,Indonesian": 0.5133333333333333, + "Spanish,Filipino": 0.5066666666666667, + "Spanish,Chinese": 0.3333333333333333, + "Indonesian,Filipino": 0.56, + "Indonesian,Chinese": 0.49333333333333335, + "Filipino,Chinese": 0.3933333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24, + "Malay,English,Spanish": 0.36, + "Malay,English,Indonesian": 0.36, + "Malay,English,Filipino": 0.3466666666666667, + "Malay,English,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish": 0.3, + "Malay,Vietnamese,Indonesian": 0.38666666666666666, + "Malay,Vietnamese,Filipino": 0.38, + "Malay,Vietnamese,Chinese": 0.26, + "Malay,Spanish,Indonesian": 0.38666666666666666, + "Malay,Spanish,Filipino": 0.37333333333333335, + "Malay,Spanish,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino": 0.47333333333333333, + "Malay,Indonesian,Chinese": 0.34, + "Malay,Filipino,Chinese": 0.28, + "English,Vietnamese,Spanish": 0.26, + "English,Vietnamese,Indonesian": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.25333333333333335, + "English,Vietnamese,Chinese": 0.18, + "English,Spanish,Indonesian": 0.3466666666666667, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.22, + "English,Indonesian,Filipino": 0.3333333333333333, + "English,Indonesian,Chinese": 0.26, + "English,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino": 0.36, + "Vietnamese,Indonesian,Chinese": 0.31333333333333335, + "Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Spanish,Indonesian,Filipino": 0.32666666666666666, + "Spanish,Indonesian,Chinese": 0.23333333333333334, + "Spanish,Filipino,Chinese": 0.18666666666666668, + "Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Chinese": 0.12, + "Malay,English,Spanish,Indonesian": 0.2866666666666667, + "Malay,English,Spanish,Filipino": 0.2733333333333333, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.28, + "Malay,English,Indonesian,Chinese": 0.18, + "Malay,English,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.3, + "Malay,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.14, + "Malay,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino": 0.24, + "English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "English,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Indonesian,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + } + }, + "AC3_2": 0.41687074825034015, + "AC3_3": 0.3272669435837258, + "AC3_4": 0.25417910201392885, + "AC3_5": 0.19426017146247548, + "AC3_6": 0.14742460314235403, + "AC3_7": 0.11272994846540843 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3490259740259741, + "language_acc": { + "English": 0.5227272727272727, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.24431818181818182, + "Indonesian": 0.35795454545454547, + "Filipino": 0.30113636363636365, + "Spanish": 0.42045454545454547, + "Malay": 0.3352272727272727 + }, + "consistency_score_2": 0.41801948051948046, + "consistency_score_3": 0.2202922077922078, + "consistency_score_4": 0.13506493506493505, + "consistency_score_5": 0.09145021645021642, + "consistency_score_6": 0.06655844155844155, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.30113636363636365, + "English,Chinese": 0.2897727272727273, + "English,Indonesian": 0.38636363636363635, + "English,Filipino": 0.3352272727272727, + "English,Spanish": 0.6022727272727273, + "English,Malay": 0.3409090909090909, + "Vietnamese,Chinese": 0.5170454545454546, + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.35795454545454547, + "Vietnamese,Malay": 0.4715909090909091, + "Chinese,Indonesian": 0.39204545454545453, + "Chinese,Filipino": 0.39204545454545453, + "Chinese,Spanish": 0.29545454545454547, + "Chinese,Malay": 0.4943181818181818, + "Indonesian,Filipino": 0.4659090909090909, + "Indonesian,Spanish": 0.4602272727272727, + "Indonesian,Malay": 0.5568181818181818, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.4318181818181818, + "Spanish,Malay": 0.4090909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.2159090909090909, + "English,Vietnamese,Malay": 0.16477272727272727, + "English,Chinese,Indonesian": 0.17613636363636365, + "English,Chinese,Filipino": 0.14772727272727273, + "English,Chinese,Spanish": 0.17613636363636365, + "English,Chinese,Malay": 0.16477272727272727, + "English,Indonesian,Filipino": 0.20454545454545456, + "English,Indonesian,Spanish": 0.2840909090909091, + "English,Indonesian,Malay": 0.23295454545454544, + "English,Filipino,Spanish": 0.23863636363636365, + "English,Filipino,Malay": 0.17613636363636365, + "English,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian": 0.25, + "Vietnamese,Chinese,Filipino": 0.25, + "Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay": 0.3125, + "Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Filipino,Spanish": 0.19886363636363635, + "Vietnamese,Filipino,Malay": 0.25, + "Vietnamese,Spanish,Malay": 0.2159090909090909, + "Chinese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Indonesian,Spanish": 0.1875, + "Chinese,Indonesian,Malay": 0.30113636363636365, + "Chinese,Filipino,Spanish": 0.17045454545454544, + "Chinese,Filipino,Malay": 0.23863636363636365, + "Chinese,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.2840909090909091, + "Indonesian,Spanish,Malay": 0.26704545454545453, + "Filipino,Spanish,Malay": 0.2215909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Spanish,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish": 0.125, + "English,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Chinese,Filipino,Spanish,Malay": 0.125, + "Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.38041984465596346, + "AC3_3": 0.27010450337613606, + "AC3_4": 0.1947616434135639, + "AC3_5": 0.14492724716707203, + "AC3_6": 0.11179738227829356, + "AC3_7": 0.08920339293363665 + }, + "prompt_2": { + "overall_acc": 0.3530844155844156, + "language_acc": { + "English": 0.5340909090909091, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.24431818181818182, + "Indonesian": 0.32386363636363635, + "Filipino": 0.3181818181818182, + "Spanish": 0.4375, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.4215367965367966, + "consistency_score_3": 0.22029220779220776, + "consistency_score_4": 0.13555194805194803, + "consistency_score_5": 0.0936147186147186, + "consistency_score_6": 0.0706168831168831, + "consistency_score_7": 0.056818181818181816, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.29545454545454547, + "English,Chinese": 0.29545454545454547, + "English,Indonesian": 0.38636363636363635, + "English,Filipino": 0.3465909090909091, + "English,Spanish": 0.6306818181818182, + "English,Malay": 0.3522727272727273, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Indonesian": 0.35795454545454547, + "Vietnamese,Filipino": 0.42613636363636365, + "Vietnamese,Spanish": 0.3522727272727273, + "Vietnamese,Malay": 0.4602272727272727, + "Chinese,Indonesian": 0.3806818181818182, + "Chinese,Filipino": 0.42045454545454547, + "Chinese,Spanish": 0.29545454545454547, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.48863636363636365, + "Indonesian,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.4602272727272727, + "Filipino,Malay": 0.48295454545454547, + "Spanish,Malay": 0.3977272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1534090909090909, + "English,Vietnamese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.2159090909090909, + "English,Vietnamese,Malay": 0.14772727272727273, + "English,Chinese,Indonesian": 0.18181818181818182, + "English,Chinese,Filipino": 0.1590909090909091, + "English,Chinese,Spanish": 0.19886363636363635, + "English,Chinese,Malay": 0.17045454545454544, + "English,Indonesian,Filipino": 0.22727272727272727, + "English,Indonesian,Spanish": 0.2897727272727273, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Filipino,Malay": 0.1875, + "English,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian": 0.2159090909090909, + "Vietnamese,Chinese,Filipino": 0.23863636363636365, + "Vietnamese,Chinese,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Indonesian,Spanish": 0.18181818181818182, + "Chinese,Indonesian,Malay": 0.2897727272727273, + "Chinese,Filipino,Spanish": 0.19886363636363635, + "Chinese,Filipino,Malay": 0.26704545454545453, + "Chinese,Spanish,Malay": 0.1875, + "Indonesian,Filipino,Spanish": 0.2727272727272727, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.2897727272727273, + "Filipino,Spanish,Malay": 0.25 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.125, + "English,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Malay": 0.1875, + "Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.125, + "Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.1875, + "Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + } + }, + "AC3_2": 0.38428607712853896, + "AC3_3": 0.2713111845212277, + "AC3_4": 0.1958973335233559, + "AC3_5": 0.14799177197438992, + "AC3_6": 0.1176948051670274, + "AC3_7": 0.09788478845496786 + }, + "prompt_3": { + "overall_acc": 0.35633116883116883, + "language_acc": { + "English": 0.5284090909090909, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.25, + "Indonesian": 0.3409090909090909, + "Filipino": 0.3181818181818182, + "Spanish": 0.4375, + "Malay": 0.3522727272727273 + }, + "consistency_score_2": 0.4310064935064935, + "consistency_score_3": 0.2318181818181818, + "consistency_score_4": 0.14886363636363642, + "consistency_score_5": 0.10930735930735931, + "consistency_score_6": 0.08766233766233765, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3068181818181818, + "English,Chinese": 0.32386363636363635, + "English,Indonesian": 0.38636363636363635, + "English,Filipino": 0.3352272727272727, + "English,Spanish": 0.6363636363636364, + "English,Malay": 0.3693181818181818, + "Vietnamese,Chinese": 0.5056818181818182, + "Vietnamese,Indonesian": 0.3977272727272727, + "Vietnamese,Filipino": 0.48295454545454547, + "Vietnamese,Spanish": 0.36363636363636365, + "Vietnamese,Malay": 0.4602272727272727, + "Chinese,Indonesian": 0.38636363636363635, + "Chinese,Filipino": 0.4034090909090909, + "Chinese,Spanish": 0.3125, + "Chinese,Malay": 0.48863636363636365, + "Indonesian,Filipino": 0.5, + "Indonesian,Spanish": 0.4431818181818182, + "Indonesian,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.4431818181818182, + "Filipino,Malay": 0.4715909090909091, + "Spanish,Malay": 0.4375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.16477272727272727, + "English,Vietnamese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.2159090909090909, + "English,Vietnamese,Malay": 0.1590909090909091, + "English,Chinese,Indonesian": 0.1875, + "English,Chinese,Filipino": 0.1590909090909091, + "English,Chinese,Spanish": 0.2159090909090909, + "English,Chinese,Malay": 0.1875, + "English,Indonesian,Filipino": 0.2215909090909091, + "English,Indonesian,Spanish": 0.2897727272727273, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.26136363636363635, + "English,Filipino,Malay": 0.19318181818181818, + "English,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "Vietnamese,Chinese,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Spanish": 0.1875, + "Vietnamese,Chinese,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Spanish,Malay": 0.2159090909090909, + "Chinese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Indonesian,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.19318181818181818, + "Chinese,Filipino,Malay": 0.26704545454545453, + "Chinese,Spanish,Malay": 0.20454545454545456, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.26136363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish": 0.125, + "English,Vietnamese,Chinese,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino": 0.125, + "English,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.125, + "English,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + } + }, + "AC3_2": 0.39012752706251297, + "AC3_3": 0.2808947882994167, + "AC3_4": 0.20999722477731542, + "AC3_5": 0.1672955167656897, + "AC3_6": 0.1407084688303422, + "AC3_7": 0.12236277870225969 + }, + "prompt_4": { + "overall_acc": 0.35227272727272724, + "language_acc": { + "English": 0.5340909090909091, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.24431818181818182, + "Indonesian": 0.3352272727272727, + "Filipino": 0.3068181818181818, + "Spanish": 0.42613636363636365, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.4323593073593073, + "consistency_score_3": 0.23522727272727276, + "consistency_score_4": 0.15227272727272728, + "consistency_score_5": 0.11255411255411256, + "consistency_score_6": 0.09172077922077922, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3068181818181818, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.38636363636363635, + "English,Filipino": 0.3409090909090909, + "English,Spanish": 0.6363636363636364, + "English,Malay": 0.375, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.3977272727272727, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,Spanish": 0.3693181818181818, + "Vietnamese,Malay": 0.4772727272727273, + "Chinese,Indonesian": 0.3806818181818182, + "Chinese,Filipino": 0.42613636363636365, + "Chinese,Spanish": 0.30113636363636365, + "Chinese,Malay": 0.5, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5909090909090909, + "Filipino,Spanish": 0.45454545454545453, + "Filipino,Malay": 0.4715909090909091, + "Spanish,Malay": 0.4090909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.22727272727272727, + "English,Vietnamese,Malay": 0.17613636363636365, + "English,Chinese,Indonesian": 0.18181818181818182, + "English,Chinese,Filipino": 0.16477272727272727, + "English,Chinese,Spanish": 0.2159090909090909, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.22727272727272727, + "English,Indonesian,Spanish": 0.2897727272727273, + "English,Indonesian,Malay": 0.25, + "English,Filipino,Spanish": 0.2727272727272727, + "English,Filipino,Malay": 0.19318181818181818, + "English,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Malay": 0.2784090909090909, + "Vietnamese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Filipino,Malay": 0.2784090909090909, + "Vietnamese,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino": 0.25, + "Chinese,Indonesian,Spanish": 0.1875, + "Chinese,Indonesian,Malay": 0.30113636363636365, + "Chinese,Filipino,Spanish": 0.19886363636363635, + "Chinese,Filipino,Malay": 0.2840909090909091, + "Chinese,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.2840909090909091, + "Filipino,Spanish,Malay": 0.25 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish": 0.14772727272727273, + "English,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Chinese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.125, + "English,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + } + }, + "AC3_2": 0.3882288400759127, + "AC3_3": 0.2820907332032594, + "AC3_4": 0.21263308759094413, + "AC3_5": 0.1706000634620811, + "AC3_6": 0.14554595310000934, + "AC3_7": 0.1297846889651599 + }, + "prompt_5": { + "overall_acc": 0.3547077922077922, + "language_acc": { + "English": 0.5284090909090909, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.26704545454545453, + "Indonesian": 0.32954545454545453, + "Filipino": 0.3125, + "Spanish": 0.4318181818181818, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.42343073593073605, + "consistency_score_3": 0.22629870129870128, + "consistency_score_4": 0.14318181818181816, + "consistency_score_5": 0.10173160173160174, + "consistency_score_6": 0.07792207792207792, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3125, + "English,Chinese": 0.3352272727272727, + "English,Indonesian": 0.39204545454545453, + "English,Filipino": 0.32386363636363635, + "English,Spanish": 0.6363636363636364, + "English,Malay": 0.36363636363636365, + "Vietnamese,Chinese": 0.45454545454545453, + "Vietnamese,Indonesian": 0.42045454545454547, + "Vietnamese,Filipino": 0.42613636363636365, + "Vietnamese,Spanish": 0.3409090909090909, + "Vietnamese,Malay": 0.48295454545454547, + "Chinese,Indonesian": 0.3693181818181818, + "Chinese,Filipino": 0.38636363636363635, + "Chinese,Spanish": 0.32954545454545453, + "Chinese,Malay": 0.48295454545454547, + "Indonesian,Filipino": 0.4659090909090909, + "Indonesian,Spanish": 0.4431818181818182, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.5, + "Spanish,Malay": 0.4034090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1590909090909091, + "English,Vietnamese,Indonesian": 0.17045454545454544, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.23295454545454544, + "English,Vietnamese,Malay": 0.17045454545454544, + "English,Chinese,Indonesian": 0.1875, + "English,Chinese,Filipino": 0.16477272727272727, + "English,Chinese,Spanish": 0.23295454545454544, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.21022727272727273, + "English,Indonesian,Spanish": 0.3068181818181818, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.26136363636363635, + "English,Filipino,Malay": 0.20454545454545456, + "English,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian": 0.21022727272727273, + "Vietnamese,Chinese,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.19318181818181818, + "Chinese,Indonesian,Malay": 0.2897727272727273, + "Chinese,Filipino,Spanish": 0.20454545454545456, + "Chinese,Filipino,Malay": 0.2727272727272727, + "Chinese,Spanish,Malay": 0.20454545454545456, + "Indonesian,Filipino,Spanish": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.3181818181818182, + "Indonesian,Spanish,Malay": 0.2840909090909091, + "Filipino,Spanish,Malay": 0.2556818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Malay": 0.125, + "English,Vietnamese,Filipino,Spanish": 0.125, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish": 0.1534090909090909, + "English,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.21022727272727273, + "English,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Malay": 0.1875, + "Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + } + }, + "AC3_2": 0.386034558229385, + "AC3_3": 0.2763133066513315, + "AC3_4": 0.20401191565988722, + "AC3_5": 0.15811515096729128, + "AC3_6": 0.1277746643304958, + "AC3_7": 0.10627431904067511 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6116504854368932 + }, + "prompt_2": { + "accuracy": 0.6310679611650486 + }, + "prompt_3": { + "accuracy": 0.5533980582524272 + }, + "prompt_4": { + "accuracy": 0.6019417475728155 + }, + "prompt_5": { + "accuracy": 0.5533980582524272 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2761904761904762 + }, + "prompt_2": { + "accuracy": 0.24761904761904763 + }, + "prompt_3": { + "accuracy": 0.2571428571428571 + }, + "prompt_4": { + "accuracy": 0.2761904761904762 + }, + "prompt_5": { + "accuracy": 0.24761904761904763 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5607476635514018 + }, + "prompt_2": { + "accuracy": 0.5233644859813084 + }, + "prompt_3": { + "accuracy": 0.5794392523364486 + }, + "prompt_4": { + "accuracy": 0.5700934579439252 + }, + "prompt_5": { + "accuracy": 0.5794392523364486 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.2, + "culture": 0.3, + "film": 0.1, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.2, + "culture": 0.3, + "film": 0.1, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.2, + "culture": 0.4, + "film": 0.2, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.4, + "history": 0.2, + "literature": 0.5, + "politics": 0.2, + "culture": 0.3, + "film": 0.1, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.2, + "culture": 0.4, + "film": 0.3, + "law": 0.3, + "geography": 0.4 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17566885121708076 + }, + "prompt_2": { + "bleu_score": 0.1713078295205753 + }, + "prompt_3": { + "bleu_score": 0.17850414392492361 + }, + "prompt_4": { + "bleu_score": 0.1844321203566888 + }, + "prompt_5": { + "bleu_score": 0.16808729137211453 + } }, "indommlu": { "prompt_1": -1, @@ -1750,179 +17925,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.23488503401541674 + }, + "prompt_2": { + "bleu_score": 0.2307822821379948 + }, + "prompt_3": { + "bleu_score": 0.23189671087897454 + }, + "prompt_4": { + "bleu_score": 0.23583063429894296 + }, + "prompt_5": { + "bleu_score": 0.2272843824231829 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.05722546824411785 + }, + "prompt_2": { + "bleu_score": 0.05598499142182433 + }, + "prompt_3": { + "bleu_score": 0.05631276544898687 + }, + "prompt_4": { + "bleu_score": 0.05696429887107825 + }, + "prompt_5": { + "bleu_score": 0.054508028053641244 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.015539667218594544 + }, + "prompt_2": { + "bleu_score": 0.016210867074934316 + }, + "prompt_3": { + "bleu_score": 0.015667134358540672 + }, + "prompt_4": { + "bleu_score": 0.01834555049157511 + }, + "prompt_5": { + "bleu_score": 0.014676302750537105 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21970473136690866 + }, + "prompt_2": { + "bleu_score": 0.2169844807042232 + }, + "prompt_3": { + "bleu_score": 0.21724171906816947 + }, + "prompt_4": { + "bleu_score": 0.2215415562877783 + }, + "prompt_5": { + "bleu_score": 0.20766335488432952 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5029171528588098 + }, + "prompt_2": { + "accuracy": 0.5052508751458576 + }, + "prompt_3": { + "accuracy": 0.5122520420070011 + }, + "prompt_4": { + "accuracy": 0.5099183197199533 + }, + "prompt_5": { + "accuracy": 0.5134189031505251 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48916696460493386, + "category_acc": { + "high_school_european_history": 0.7012195121951219, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.5606060606060606, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.6847290640394089, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.6694915254237288, + "virology": 0.44242424242424244, + "high_school_microeconomics": 0.510548523206751, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.5825242718446602, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.5225806451612903, + "professional_medicine": 0.4575645756457565, + "nutrition": 0.4786885245901639, + "global_facts": 0.31313131313131315, + "machine_learning": 0.35135135135135137, + "security_studies": 0.5737704918032787, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.4795417348608838, + "prehistory": 0.47678018575851394, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.5769230769230769, + "college_medicine": 0.47093023255813954, + "high_school_government_and_politics": 0.6822916666666666, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.6700507614213198, + "elementary_mathematics": 0.34748010610079577, + "human_aging": 0.536036036036036, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.6911764705882353, + "formal_logic": 0.352, + "high_school_statistics": 0.40930232558139534, + "international_law": 0.65, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.4444444444444444, + "miscellaneous": 0.659846547314578, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.7811158798283262, + "professional_law": 0.3835616438356164, + "management": 0.6862745098039216, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.5794392523364486, + "world_religions": 0.5764705882352941, + "sociology": 0.73, + "us_foreign_policy": 0.7272727272727273, + "high_school_macroeconomics": 0.5244215938303342, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.23154362416107382, + "moral_disputes": 0.5797101449275363, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.5099337748344371, + "college_biology": 0.45454545454545453 + } + }, + "prompt_2": { + "accuracy": 0.48859492313192704, + "category_acc": { + "high_school_european_history": 0.7073170731707317, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.5681818181818182, + "medical_genetics": 0.494949494949495, + "high_school_us_history": 0.6847290640394089, + "high_school_physics": 0.3, + "high_school_world_history": 0.6864406779661016, + "virology": 0.4303030303030303, + "high_school_microeconomics": 0.510548523206751, + "econometrics": 0.336283185840708, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.5728155339805825, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.5225806451612903, + "professional_medicine": 0.44649446494464945, + "nutrition": 0.4721311475409836, + "global_facts": 0.3333333333333333, + "machine_learning": 0.36036036036036034, + "security_studies": 0.5860655737704918, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.46317512274959083, + "prehistory": 0.4891640866873065, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.5615384615384615, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.6770833333333334, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.5802469135802469, + "high_school_geography": 0.6751269035532995, + "elementary_mathematics": 0.3183023872679045, + "human_aging": 0.5135135135135135, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.6930147058823529, + "formal_logic": 0.384, + "high_school_statistics": 0.4186046511627907, + "international_law": 0.65, + "high_school_mathematics": 0.3048327137546468, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.42735042735042733, + "miscellaneous": 0.649616368286445, + "high_school_chemistry": 0.3069306930693069, + "marketing": 0.7467811158798283, + "professional_law": 0.3842139595564253, + "management": 0.6764705882352942, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.5941176470588235, + "sociology": 0.74, + "us_foreign_policy": 0.7171717171717171, + "high_school_macroeconomics": 0.5244215938303342, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.2203579418344519, + "moral_disputes": 0.5855072463768116, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.543046357615894, + "college_biology": 0.4965034965034965 + } + }, + "prompt_3": { + "accuracy": 0.4883089023954237, + "category_acc": { + "high_school_european_history": 0.7073170731707317, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.5454545454545454, + "medical_genetics": 0.494949494949495, + "high_school_us_history": 0.6847290640394089, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.6694915254237288, + "virology": 0.44242424242424244, + "high_school_microeconomics": 0.5147679324894515, + "econometrics": 0.336283185840708, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.56957928802589, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3879003558718861, + "philosophy": 0.5193548387096775, + "professional_medicine": 0.45018450184501846, + "nutrition": 0.4819672131147541, + "global_facts": 0.3434343434343434, + "machine_learning": 0.3333333333333333, + "security_studies": 0.5901639344262295, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.46808510638297873, + "prehistory": 0.47368421052631576, + "anatomy": 0.44776119402985076, + "human_sexuality": 0.5769230769230769, + "college_medicine": 0.47674418604651164, + "high_school_government_and_politics": 0.6822916666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.5864197530864198, + "high_school_geography": 0.6700507614213198, + "elementary_mathematics": 0.33421750663129973, + "human_aging": 0.5225225225225225, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.6911764705882353, + "formal_logic": 0.376, + "high_school_statistics": 0.413953488372093, + "international_law": 0.65, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6547314578005116, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.759656652360515, + "professional_law": 0.3796477495107632, + "management": 0.6764705882352942, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.5764705882352941, + "sociology": 0.735, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.532133676092545, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.2225950782997763, + "moral_disputes": 0.5855072463768116, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.5364238410596026, + "college_biology": 0.46853146853146854 + } + }, + "prompt_4": { + "accuracy": 0.4864497676081516, + "category_acc": { + "high_school_european_history": 0.7073170731707317, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.553030303030303, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.6748768472906403, + "high_school_physics": 0.3, + "high_school_world_history": 0.6610169491525424, + "virology": 0.4484848484848485, + "high_school_microeconomics": 0.5189873417721519, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.5631067961165048, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.5225806451612903, + "professional_medicine": 0.44280442804428044, + "nutrition": 0.47540983606557374, + "global_facts": 0.3434343434343434, + "machine_learning": 0.36036036036036034, + "security_studies": 0.5778688524590164, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.46153846153846156, + "prehistory": 0.47368421052631576, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.5461538461538461, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.6770833333333334, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.6802030456852792, + "elementary_mathematics": 0.3183023872679045, + "human_aging": 0.509009009009009, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.6875, + "formal_logic": 0.368, + "high_school_statistics": 0.4186046511627907, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6483375959079284, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.7424892703862661, + "professional_law": 0.3822570123939987, + "management": 0.6862745098039216, + "college_physics": 0.3564356435643564, + "jurisprudence": 0.5794392523364486, + "world_religions": 0.5764705882352941, + "sociology": 0.745, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.5218508997429306, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.23042505592841164, + "moral_disputes": 0.5826086956521739, + "electrical_engineering": 0.4236111111111111, + "astronomy": 0.5298013245033113, + "college_biology": 0.4755244755244755 + } + }, + "prompt_5": { + "accuracy": 0.486735788344655, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.5643939393939394, + "medical_genetics": 0.5050505050505051, + "high_school_us_history": 0.6650246305418719, + "high_school_physics": 0.3, + "high_school_world_history": 0.6694915254237288, + "virology": 0.45454545454545453, + "high_school_microeconomics": 0.5189873417721519, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.5598705501618123, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.5, + "professional_medicine": 0.44649446494464945, + "nutrition": 0.4786885245901639, + "global_facts": 0.31313131313131315, + "machine_learning": 0.34234234234234234, + "security_studies": 0.5860655737704918, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.46153846153846156, + "prehistory": 0.47987616099071206, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.5538461538461539, + "college_medicine": 0.4883720930232558, + "high_school_government_and_politics": 0.6770833333333334, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.6700507614213198, + "elementary_mathematics": 0.32625994694960214, + "human_aging": 0.5180180180180181, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.6893382352941176, + "formal_logic": 0.352, + "high_school_statistics": 0.42790697674418604, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6508951406649617, + "high_school_chemistry": 0.3217821782178218, + "marketing": 0.759656652360515, + "professional_law": 0.37834311806914545, + "management": 0.696078431372549, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.5764705882352941, + "sociology": 0.74, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.5347043701799485, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.22706935123042504, + "moral_disputes": 0.5768115942028985, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.5165562913907285, + "college_biology": 0.46853146853146854 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24665676077265974 + }, + "prompt_2": { + "accuracy": 0.2451708766716196 + }, + "prompt_3": { + "accuracy": 0.2555720653789004 + }, + "prompt_4": { + "accuracy": 0.24219910846953938 + }, + "prompt_5": { + "accuracy": 0.2451708766716196 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.273972602739726, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.5, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.2857142857142857, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.26666666666666666, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.125, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.25, + "sports_science": 0.2916666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3148148148148148, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3148148148148148, + "physician": 0.2777777777777778 + } + }, + "prompt_2": { + "accuracy": 0.2646326276463263, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.23809523809523808, + "college_physics": 0.125, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.25, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.08333333333333333, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.08, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.2692307692307692, + "sports_science": 0.2916666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.375, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.2646326276463263, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2857142857142857, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.25, + "college_economics": 0.23333333333333334, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.08333333333333333, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.08, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.25, + "sports_science": 0.2916666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.2962962962962963, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3148148148148148, + "physician": 0.2777777777777778 + } + }, + "prompt_4": { + "accuracy": 0.27085927770859275, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2857142857142857, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.16666666666666666, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.21666666666666667, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.041666666666666664, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.12, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.2692307692307692, + "sports_science": 0.3333333333333333, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.26774595267745954, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.5, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2619047619047619, + "college_physics": 0.041666666666666664, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.14285714285714285, + "college_economics": 0.21666666666666667, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.08333333333333333, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.08, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.25, + "sports_science": 0.2916666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.25925925925925924 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25089605734767023 + }, + "prompt_2": { + "accuracy": 0.23655913978494625 + }, + "prompt_3": { + "accuracy": 0.2724014336917563 + }, + "prompt_4": { + "accuracy": 0.25448028673835127 + }, + "prompt_5": { + "accuracy": 0.27956989247311825 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2514246244171991, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.3108108108108108, + "ancient_chinese": 0.25, + "arts": 0.26875, + "astronomy": 0.3151515151515151, + "business_ethics": 0.2679425837320574, + "chinese_civil_service_exam": 0.3, + "chinese_driving_rule": 0.21374045801526717, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.2260061919504644, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.20670391061452514, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.3018867924528302, + "college_education": 0.2336448598130841, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.25, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.23809523809523808, + "computer_science": 0.22549019607843138, + "computer_security": 0.23976608187134502, + "conceptual_physics": 0.23809523809523808, + "construction_project_management": 0.2805755395683453, + "economics": 0.22012578616352202, + "education": 0.31901840490797545, + "electrical_engineering": 0.23837209302325582, + "elementary_chinese": 0.23412698412698413, + "elementary_commonsense": 0.22727272727272727, + "elementary_information_and_technology": 0.2815126050420168, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.3037037037037037, + "food_science": 0.23076923076923078, + "genetics": 0.2556818181818182, + "global_facts": 0.2080536912751678, + "high_school_biology": 0.28994082840236685, + "high_school_chemistry": 0.18181818181818182, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.18902439024390244, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.25396825396825395, + "international_law": 0.32432432432432434, + "journalism": 0.18604651162790697, + "jurisprudence": 0.2871046228710462, + "legal_and_moral_basis": 0.22429906542056074, + "logical": 0.24390243902439024, + "machine_learning": 0.20491803278688525, + "management": 0.24285714285714285, + "marketing": 0.2777777777777778, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.19827586206896552, + "nutrition": 0.2206896551724138, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.21800947867298578, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.2801724137931034, + "public_relations": 0.27011494252873564, + "security_study": 0.2222222222222222, + "sociology": 0.24336283185840707, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.1834319526627219, + "world_history": 0.2608695652173913, + "world_religions": 0.25625 + } + }, + "prompt_2": { + "accuracy": 0.25297876014505266, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.23170731707317074, + "arts": 0.24375, + "astronomy": 0.21212121212121213, + "business_ethics": 0.31100478468899523, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.22137404580152673, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.23529411764705882, + "chinese_teacher_qualification": 0.22346368715083798, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.205607476635514, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.23148148148148148, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.2830188679245283, + "college_medicine": 0.27106227106227104, + "computer_science": 0.2696078431372549, + "computer_security": 0.26900584795321636, + "conceptual_physics": 0.22448979591836735, + "construction_project_management": 0.22302158273381295, + "economics": 0.2138364779874214, + "education": 0.2085889570552147, + "electrical_engineering": 0.25, + "elementary_chinese": 0.25793650793650796, + "elementary_commonsense": 0.2777777777777778, + "elementary_information_and_technology": 0.24369747899159663, + "elementary_mathematics": 0.2956521739130435, + "ethnology": 0.25925925925925924, + "food_science": 0.2097902097902098, + "genetics": 0.2727272727272727, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.21893491124260356, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.15454545454545454, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.25396825396825395, + "international_law": 0.2918918918918919, + "journalism": 0.25, + "jurisprudence": 0.2773722627737226, + "legal_and_moral_basis": 0.2383177570093458, + "logical": 0.17886178861788618, + "machine_learning": 0.28688524590163933, + "management": 0.23809523809523808, + "marketing": 0.22777777777777777, + "marxist_theory": 0.21693121693121692, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.25517241379310346, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.25862068965517243, + "public_relations": 0.2413793103448276, + "security_study": 0.21481481481481482, + "sociology": 0.252212389380531, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.2, + "virology": 0.30177514792899407, + "world_history": 0.2857142857142857, + "world_religions": 0.2625 + } + }, + "prompt_3": { + "accuracy": 0.25470557762044554, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.22560975609756098, + "arts": 0.21875, + "astronomy": 0.26666666666666666, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.26717557251908397, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.22910216718266255, + "chinese_literature": 0.24509803921568626, + "chinese_teacher_qualification": 0.2011173184357542, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.2336448598130841, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.23148148148148148, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.28205128205128205, + "computer_science": 0.2647058823529412, + "computer_security": 0.2807017543859649, + "conceptual_physics": 0.2108843537414966, + "construction_project_management": 0.23741007194244604, + "economics": 0.20754716981132076, + "education": 0.3067484662576687, + "electrical_engineering": 0.2441860465116279, + "elementary_chinese": 0.25396825396825395, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.23529411764705882, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.2962962962962963, + "food_science": 0.26573426573426573, + "genetics": 0.2897727272727273, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.17751479289940827, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.2966101694915254, + "high_school_mathematics": 0.16463414634146342, + "high_school_physics": 0.21818181818181817, + "high_school_politics": 0.23776223776223776, + "human_sexuality": 0.2857142857142857, + "international_law": 0.23243243243243245, + "journalism": 0.23837209302325582, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.24766355140186916, + "logical": 0.2764227642276423, + "machine_learning": 0.32786885245901637, + "management": 0.2571428571428571, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2222222222222222, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.296551724137931, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.24202127659574468, + "professional_psychology": 0.25, + "public_relations": 0.29310344827586204, + "security_study": 0.22962962962962963, + "sociology": 0.2345132743362832, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.23668639053254437, + "world_history": 0.2484472049689441, + "world_religions": 0.33125 + } + }, + "prompt_4": { + "accuracy": 0.2536694871352098, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.2865853658536585, + "arts": 0.2125, + "astronomy": 0.2606060606060606, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.26717557251908397, + "chinese_food_culture": 0.33088235294117646, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.23219814241486067, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.2011173184357542, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.3055555555555556, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.22641509433962265, + "college_medicine": 0.2454212454212454, + "computer_science": 0.24019607843137256, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.22302158273381295, + "economics": 0.27044025157232704, + "education": 0.2822085889570552, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.24242424242424243, + "elementary_information_and_technology": 0.18067226890756302, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.2962962962962963, + "food_science": 0.26573426573426573, + "genetics": 0.30113636363636365, + "global_facts": 0.2080536912751678, + "high_school_biology": 0.21301775147928995, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.2619047619047619, + "international_law": 0.24864864864864866, + "journalism": 0.21511627906976744, + "jurisprudence": 0.25060827250608275, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.2032520325203252, + "machine_learning": 0.20491803278688525, + "management": 0.2619047619047619, + "marketing": 0.29444444444444445, + "marxist_theory": 0.24867724867724866, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.2827586206896552, + "philosophy": 0.20952380952380953, + "professional_accounting": 0.22285714285714286, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.27586206896551724, + "security_study": 0.2, + "sociology": 0.23008849557522124, + "sports_science": 0.23030303030303031, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.24260355029585798, + "world_history": 0.2670807453416149, + "world_religions": 0.275 + } + }, + "prompt_5": { + "accuracy": 0.24952512519426698, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.25, + "ancient_chinese": 0.24390243902439024, + "arts": 0.20625, + "astronomy": 0.2787878787878788, + "business_ethics": 0.291866028708134, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.24148606811145512, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.22346368715083798, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.24074074074074073, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.21611721611721613, + "computer_science": 0.22549019607843138, + "computer_security": 0.2631578947368421, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.2949640287769784, + "economics": 0.2138364779874214, + "education": 0.294478527607362, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.23015873015873015, + "elementary_commonsense": 0.2222222222222222, + "elementary_information_and_technology": 0.24369747899159663, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.2740740740740741, + "food_science": 0.22377622377622378, + "genetics": 0.26704545454545453, + "global_facts": 0.1476510067114094, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.20454545454545456, + "high_school_geography": 0.2288135593220339, + "high_school_mathematics": 0.17073170731707318, + "high_school_physics": 0.23636363636363636, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.2222222222222222, + "international_law": 0.24324324324324326, + "journalism": 0.20930232558139536, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2383177570093458, + "logical": 0.2845528455284553, + "machine_learning": 0.3360655737704918, + "management": 0.22857142857142856, + "marketing": 0.2722222222222222, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.30344827586206896, + "philosophy": 0.21904761904761905, + "professional_accounting": 0.24, + "professional_law": 0.22748815165876776, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.21982758620689655, + "public_relations": 0.29310344827586204, + "security_study": 0.28888888888888886, + "sociology": 0.24336283185840707, + "sports_science": 0.2606060606060606, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.24260355029585798, + "world_history": 0.2360248447204969, + "world_religions": 0.28125 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42424242424242425 + }, + "prompt_2": { + "accuracy": 0.42424242424242425 + }, + "prompt_3": { + "accuracy": 0.45454545454545453 + }, + "prompt_4": { + "accuracy": 0.3939393939393939 + }, + "prompt_5": { + "accuracy": 0.3939393939393939 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.375 + }, + "prompt_2": { + "accuracy": 0.425 + }, + "prompt_3": { + "accuracy": 0.4090909090909091 + }, + "prompt_4": { + "accuracy": 0.4431818181818182 + }, + "prompt_5": { + "accuracy": 0.4590909090909091 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3257627118644068 + }, + "prompt_2": { + "accuracy": 0.32101694915254236 + }, + "prompt_3": { + "accuracy": 0.3142372881355932 + }, + "prompt_4": { + "accuracy": 0.3264406779661017 + }, + "prompt_5": { + "accuracy": 0.32610169491525426 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2703814510097233 + }, + "prompt_2": { + "accuracy": 0.2819745699326851 + }, + "prompt_3": { + "accuracy": 0.28534031413612565 + }, + "prompt_4": { + "accuracy": 0.2830964846671653 + }, + "prompt_5": { + "accuracy": 0.28272251308900526 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9216070553650172 + }, + "prompt_2": { + "accuracy": 0.9250367466927977 + }, + "prompt_3": { + "accuracy": 0.9220970112689858 + }, + "prompt_4": { + "accuracy": 0.9240568348848603 + }, + "prompt_5": { + "accuracy": 0.9240568348848603 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.5283141765151181, + "rouge2": 0.2841682908341947, + "rougeL": 0.4439385397634555, + "avg_rouge": 0.4188070023709228 + }, + "prompt_2": { + "rouge1": 0.5311394348123868, + "rouge2": 0.28916594396953255, + "rougeL": 0.44524999752398886, + "avg_rouge": 0.4218517921019694 + }, + "prompt_3": { + "rouge1": 0.5306022687065938, + "rouge2": 0.288898011991371, + "rougeL": 0.4448962586942802, + "avg_rouge": 0.4214655131307483 + }, + "prompt_4": { + "rouge1": 0.5287854277947174, + "rouge2": 0.28671129433746034, + "rougeL": 0.44258691846581183, + "avg_rouge": 0.4193612135326632 + }, + "prompt_5": { + "rouge1": 0.5284886644407688, + "rouge2": 0.28674540748733024, + "rougeL": 0.44364468459290207, + "avg_rouge": 0.41962625217366706 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.23516593911979225, + "rouge2": 0.06556769074364975, + "rougeL": 0.175650063040926, + "avg_rouge": 0.158794564301456 + }, + "prompt_2": { + "rouge1": 0.23235043239609107, + "rouge2": 0.06516831879832731, + "rougeL": 0.17253374137518865, + "avg_rouge": 0.156684164189869 + }, + "prompt_3": { + "rouge1": 0.23191487346167575, + "rouge2": 0.06496385456257882, + "rougeL": 0.17168698393524168, + "avg_rouge": 0.1561885706531654 + }, + "prompt_4": { + "rouge1": 0.23474499252840375, + "rouge2": 0.06531406645201585, + "rougeL": 0.1749962961007415, + "avg_rouge": 0.15835178502705372 + }, + "prompt_5": { + "rouge1": 0.23480280387607813, + "rouge2": 0.06335623152293764, + "rougeL": 0.18096259443924614, + "avg_rouge": 0.1597072099460873 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9598623853211009 + }, + "prompt_2": { + "accuracy": 0.9598623853211009 + }, + "prompt_3": { + "accuracy": 0.9541284403669725 + }, + "prompt_4": { + "accuracy": 0.9587155963302753 + }, + "prompt_5": { + "accuracy": 0.9541284403669725 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8063279002876318 + }, + "prompt_2": { + "accuracy": 0.8072866730584851 + }, + "prompt_3": { + "accuracy": 0.8015340364333653 + }, + "prompt_4": { + "accuracy": 0.8015340364333653 + }, + "prompt_5": { + "accuracy": 0.8034515819750719 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.844 + }, + "prompt_2": { + "accuracy": 0.8665 + }, + "prompt_3": { + "accuracy": 0.857 + }, + "prompt_4": { + "accuracy": 0.863 + }, + "prompt_5": { + "accuracy": 0.8635 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.891 + }, + "prompt_2": { + "accuracy": 0.8895 + }, + "prompt_3": { + "accuracy": 0.894 + }, + "prompt_4": { + "accuracy": 0.8735 + }, + "prompt_5": { + "accuracy": 0.879 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9395 + }, + "prompt_2": { + "accuracy": 0.9415 + }, + "prompt_3": { + "accuracy": 0.94 + }, + "prompt_4": { + "accuracy": 0.946 + }, + "prompt_5": { + "accuracy": 0.9435 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7746478873239436 + }, + "prompt_2": { + "accuracy": 0.7887323943661971 + }, + "prompt_3": { + "accuracy": 0.7746478873239436 + }, + "prompt_4": { + "accuracy": 0.7605633802816901 + }, + "prompt_5": { + "accuracy": 0.7605633802816901 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9169675090252708 + }, + "prompt_2": { + "accuracy": 0.9133574007220217 + }, + "prompt_3": { + "accuracy": 0.9133574007220217 + }, + "prompt_4": { + "accuracy": 0.8916967509025271 + }, + "prompt_5": { + "accuracy": 0.9025270758122743 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8112745098039216 + }, + "prompt_2": { + "accuracy": 0.8284313725490197 + }, + "prompt_3": { + "accuracy": 0.8112745098039216 + }, + "prompt_4": { + "accuracy": 0.8333333333333334 + }, + "prompt_5": { + "accuracy": 0.8259803921568627 + } } }, "five_shot": { @@ -2370,53 +19735,1733 @@ "model_link": "https://huggingface.co/lmsys/vicuna-7b-v1.3", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.40190476190476193, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.4866666666666667, + "Vietnamese": 0.35333333333333333, + "Spanish": 0.4666666666666667, + "Indonesian": 0.36666666666666664, + "Filipino": 0.3466666666666667, + "Chinese": 0.46 + }, + "consistency_score_2": 0.5650793650793651, + "consistency_score_3": 0.4040000000000002, + "consistency_score_4": 0.32057142857142856, + "consistency_score_5": 0.2698412698412699, + "consistency_score_6": 0.2361904761904762, + "consistency_score_7": 0.21333333333333335, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.58, + "Malay,Vietnamese": 0.5866666666666667, + "Malay,Spanish": 0.5133333333333333, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.54, + "Malay,Chinese": 0.58, + "English,Vietnamese": 0.48, + "English,Spanish": 0.6866666666666666, + "English,Indonesian": 0.58, + "English,Filipino": 0.5533333333333333, + "English,Chinese": 0.6, + "Vietnamese,Spanish": 0.4666666666666667, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.54, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.5, + "Spanish,Chinese": 0.5933333333333334, + "Indonesian,Filipino": 0.56, + "Indonesian,Chinese": 0.5866666666666667, + "Filipino,Chinese": 0.5866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.3933333333333333, + "Malay,English,Spanish": 0.43333333333333335, + "Malay,English,Indonesian": 0.48, + "Malay,English,Filipino": 0.38, + "Malay,English,Chinese": 0.4266666666666667, + "Malay,Vietnamese,Spanish": 0.36, + "Malay,Vietnamese,Indonesian": 0.4866666666666667, + "Malay,Vietnamese,Filipino": 0.38666666666666666, + "Malay,Vietnamese,Chinese": 0.42, + "Malay,Spanish,Indonesian": 0.42, + "Malay,Spanish,Filipino": 0.32666666666666666, + "Malay,Spanish,Chinese": 0.3933333333333333, + "Malay,Indonesian,Filipino": 0.44, + "Malay,Indonesian,Chinese": 0.48, + "Malay,Filipino,Chinese": 0.4, + "English,Vietnamese,Spanish": 0.36666666666666664, + "English,Vietnamese,Indonesian": 0.38, + "English,Vietnamese,Filipino": 0.34, + "English,Vietnamese,Chinese": 0.38, + "English,Spanish,Indonesian": 0.47333333333333333, + "English,Spanish,Filipino": 0.42, + "English,Spanish,Chinese": 0.48, + "English,Indonesian,Filipino": 0.4, + "English,Indonesian,Chinese": 0.43333333333333335, + "English,Filipino,Chinese": 0.41333333333333333, + "Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Vietnamese,Filipino,Chinese": 0.38, + "Spanish,Indonesian,Filipino": 0.36666666666666664, + "Spanish,Indonesian,Chinese": 0.4, + "Spanish,Filipino,Chinese": 0.41333333333333333, + "Indonesian,Filipino,Chinese": 0.4266666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.30666666666666664, + "Malay,English,Vietnamese,Indonesian": 0.3466666666666667, + "Malay,English,Vietnamese,Filipino": 0.29333333333333333, + "Malay,English,Vietnamese,Chinese": 0.30666666666666664, + "Malay,English,Spanish,Indonesian": 0.38666666666666666, + "Malay,English,Spanish,Filipino": 0.3, + "Malay,English,Spanish,Chinese": 0.3466666666666667, + "Malay,English,Indonesian,Filipino": 0.3466666666666667, + "Malay,English,Indonesian,Chinese": 0.38666666666666666, + "Malay,English,Filipino,Chinese": 0.31333333333333335, + "Malay,Vietnamese,Spanish,Indonesian": 0.32, + "Malay,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.36, + "Malay,Vietnamese,Filipino,Chinese": 0.31333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.32, + "English,Vietnamese,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Chinese": 0.30666666666666664, + "English,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.31333333333333335, + "English,Vietnamese,Filipino,Chinese": 0.3, + "English,Spanish,Indonesian,Filipino": 0.34, + "English,Spanish,Indonesian,Chinese": 0.37333333333333335, + "English,Spanish,Filipino,Chinese": 0.35333333333333333, + "English,Indonesian,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.28, + "Vietnamese,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.31333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.28, + "Malay,English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + } + }, + "AC3_2": 0.4697245411139163, + "AC3_3": 0.40294965724142096, + "AC3_4": 0.3566600127561777, + "AC3_5": 0.3228913493083103, + "AC3_6": 0.2975294953336139, + "AC3_7": 0.2787203301920553 + }, + "prompt_2": { + "overall_acc": 0.379047619047619, + "language_acc": { + "Malay": 0.34, + "English": 0.4866666666666667, + "Vietnamese": 0.34, + "Spanish": 0.4533333333333333, + "Indonesian": 0.30666666666666664, + "Filipino": 0.31333333333333335, + "Chinese": 0.41333333333333333 + }, + "consistency_score_2": 0.5742857142857142, + "consistency_score_3": 0.40723809523809523, + "consistency_score_4": 0.31638095238095243, + "consistency_score_5": 0.25936507936507935, + "consistency_score_6": 0.22095238095238096, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.52, + "Malay,Vietnamese": 0.5933333333333334, + "Malay,Spanish": 0.6266666666666667, + "Malay,Indonesian": 0.7, + "Malay,Filipino": 0.5733333333333334, + "Malay,Chinese": 0.5933333333333334, + "English,Vietnamese": 0.48, + "English,Spanish": 0.6533333333333333, + "English,Indonesian": 0.5333333333333333, + "English,Filipino": 0.5, + "English,Chinese": 0.56, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,Filipino": 0.5866666666666667, + "Vietnamese,Chinese": 0.56, + "Spanish,Indonesian": 0.5866666666666667, + "Spanish,Filipino": 0.5666666666666667, + "Spanish,Chinese": 0.6, + "Indonesian,Filipino": 0.5866666666666667, + "Indonesian,Chinese": 0.6066666666666667, + "Filipino,Chinese": 0.6133333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.36, + "Malay,English,Spanish": 0.44, + "Malay,English,Indonesian": 0.41333333333333333, + "Malay,English,Filipino": 0.34, + "Malay,English,Chinese": 0.38666666666666666, + "Malay,Vietnamese,Spanish": 0.4, + "Malay,Vietnamese,Indonesian": 0.4666666666666667, + "Malay,Vietnamese,Filipino": 0.4066666666666667, + "Malay,Vietnamese,Chinese": 0.4266666666666667, + "Malay,Spanish,Indonesian": 0.49333333333333335, + "Malay,Spanish,Filipino": 0.42, + "Malay,Spanish,Chinese": 0.4533333333333333, + "Malay,Indonesian,Filipino": 0.4666666666666667, + "Malay,Indonesian,Chinese": 0.4866666666666667, + "Malay,Filipino,Chinese": 0.4266666666666667, + "English,Vietnamese,Spanish": 0.34, + "English,Vietnamese,Indonesian": 0.36, + "English,Vietnamese,Filipino": 0.32666666666666666, + "English,Vietnamese,Chinese": 0.36666666666666664, + "English,Spanish,Indonesian": 0.43333333333333335, + "English,Spanish,Filipino": 0.4066666666666667, + "English,Spanish,Chinese": 0.44666666666666666, + "English,Indonesian,Filipino": 0.35333333333333333, + "English,Indonesian,Chinese": 0.38666666666666666, + "English,Filipino,Chinese": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,Filipino": 0.36, + "Vietnamese,Spanish,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Filipino": 0.41333333333333333, + "Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Vietnamese,Filipino,Chinese": 0.4266666666666667, + "Spanish,Indonesian,Filipino": 0.4066666666666667, + "Spanish,Indonesian,Chinese": 0.44, + "Spanish,Filipino,Chinese": 0.43333333333333335, + "Indonesian,Filipino,Chinese": 0.44666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.29333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.3, + "Malay,English,Vietnamese,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Chinese": 0.29333333333333333, + "Malay,English,Spanish,Indonesian": 0.36, + "Malay,English,Spanish,Filipino": 0.30666666666666664, + "Malay,English,Spanish,Chinese": 0.34, + "Malay,English,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Indonesian,Chinese": 0.32666666666666666, + "Malay,English,Filipino,Chinese": 0.3, + "Malay,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.32, + "Malay,Vietnamese,Indonesian,Filipino": 0.34, + "Malay,Vietnamese,Indonesian,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.32, + "Malay,Spanish,Indonesian,Filipino": 0.3466666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.3933333333333333, + "Malay,Spanish,Filipino,Chinese": 0.35333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.36, + "English,Vietnamese,Spanish,Indonesian": 0.28, + "English,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.3, + "English,Vietnamese,Indonesian,Filipino": 0.26, + "English,Vietnamese,Indonesian,Chinese": 0.2866666666666667, + "English,Vietnamese,Filipino,Chinese": 0.3, + "English,Spanish,Indonesian,Filipino": 0.32, + "English,Spanish,Indonesian,Chinese": 0.3466666666666667, + "English,Spanish,Filipino,Chinese": 0.3333333333333333, + "English,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.3466666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.22, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.3, + "Malay,English,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.28, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.30666666666666664, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + } + }, + "AC3_2": 0.456674753769708, + "AC3_3": 0.392637504564313, + "AC3_4": 0.3448907669940079, + "AC3_5": 0.3079879708735919, + "AC3_6": 0.2791715796964797, + "AC3_7": 0.2560621186463341 + }, + "prompt_3": { + "overall_acc": 0.3695238095238095, + "language_acc": { + "Malay": 0.30666666666666664, + "English": 0.4533333333333333, + "Vietnamese": 0.3333333333333333, + "Spanish": 0.4266666666666667, + "Indonesian": 0.32666666666666666, + "Filipino": 0.32, + "Chinese": 0.42 + }, + "consistency_score_2": 0.566984126984127, + "consistency_score_3": 0.39352380952380955, + "consistency_score_4": 0.29790476190476195, + "consistency_score_5": 0.2387301587301587, + "consistency_score_6": 0.2, + "consistency_score_7": 0.17333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5066666666666667, + "Malay,Vietnamese": 0.6133333333333333, + "Malay,Spanish": 0.56, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.5666666666666667, + "Malay,Chinese": 0.54, + "English,Vietnamese": 0.43333333333333335, + "English,Spanish": 0.7133333333333334, + "English,Indonesian": 0.5466666666666666, + "English,Filipino": 0.56, + "English,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,Filipino": 0.5666666666666667, + "Vietnamese,Chinese": 0.5266666666666666, + "Spanish,Indonesian": 0.6, + "Spanish,Filipino": 0.54, + "Spanish,Chinese": 0.5533333333333333, + "Indonesian,Filipino": 0.6266666666666667, + "Indonesian,Chinese": 0.6133333333333333, + "Filipino,Chinese": 0.6066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.34, + "Malay,English,Spanish": 0.43333333333333335, + "Malay,English,Indonesian": 0.4066666666666667, + "Malay,English,Filipino": 0.35333333333333333, + "Malay,English,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish": 0.36, + "Malay,Vietnamese,Indonesian": 0.4666666666666667, + "Malay,Vietnamese,Filipino": 0.4266666666666667, + "Malay,Vietnamese,Chinese": 0.38666666666666666, + "Malay,Spanish,Indonesian": 0.4533333333333333, + "Malay,Spanish,Filipino": 0.36, + "Malay,Spanish,Chinese": 0.37333333333333335, + "Malay,Indonesian,Filipino": 0.47333333333333333, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Filipino,Chinese": 0.3933333333333333, + "English,Vietnamese,Spanish": 0.3466666666666667, + "English,Vietnamese,Indonesian": 0.32, + "English,Vietnamese,Filipino": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.31333333333333335, + "English,Spanish,Indonesian": 0.4666666666666667, + "English,Spanish,Filipino": 0.44666666666666666, + "English,Spanish,Chinese": 0.4533333333333333, + "English,Indonesian,Filipino": 0.41333333333333333, + "English,Indonesian,Chinese": 0.4066666666666667, + "English,Filipino,Chinese": 0.41333333333333333, + "Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino": 0.41333333333333333, + "Vietnamese,Indonesian,Chinese": 0.37333333333333335, + "Vietnamese,Filipino,Chinese": 0.3933333333333333, + "Spanish,Indonesian,Filipino": 0.41333333333333333, + "Spanish,Indonesian,Chinese": 0.4266666666666667, + "Spanish,Filipino,Chinese": 0.4, + "Indonesian,Filipino,Chinese": 0.44666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.29333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.2866666666666667, + "Malay,English,Vietnamese,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Chinese": 0.24, + "Malay,English,Spanish,Indonesian": 0.36, + "Malay,English,Spanish,Filipino": 0.31333333333333335, + "Malay,English,Spanish,Chinese": 0.31333333333333335, + "Malay,English,Indonesian,Filipino": 0.31333333333333335, + "Malay,English,Indonesian,Chinese": 0.32, + "Malay,English,Filipino,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.31333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.3, + "Malay,Spanish,Indonesian,Filipino": 0.32666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,Spanish,Filipino,Chinese": 0.28, + "Malay,Indonesian,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino": 0.36, + "English,Spanish,Indonesian,Chinese": 0.35333333333333333, + "English,Spanish,Filipino,Chinese": 0.35333333333333333, + "English,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2, + "Malay,English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.2, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + } + }, + "AC3_2": 0.4474369652468152, + "AC3_3": 0.3811463756246328, + "AC3_4": 0.32987171119215, + "AC3_5": 0.2900646187016906, + "AC3_6": 0.25953177252968085, + "AC3_7": 0.23597660814366514 + }, + "prompt_4": { + "overall_acc": 0.3923809523809524, + "language_acc": { + "Malay": 0.32, + "English": 0.5066666666666667, + "Vietnamese": 0.36, + "Spanish": 0.49333333333333335, + "Indonesian": 0.32, + "Filipino": 0.32, + "Chinese": 0.4266666666666667 + }, + "consistency_score_2": 0.6250793650793651, + "consistency_score_3": 0.473904761904762, + "consistency_score_4": 0.38799999999999996, + "consistency_score_5": 0.3314285714285714, + "consistency_score_6": 0.29047619047619044, + "consistency_score_7": 0.26, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5866666666666667, + "Malay,Vietnamese": 0.6933333333333334, + "Malay,Spanish": 0.5866666666666667, + "Malay,Indonesian": 0.7133333333333334, + "Malay,Filipino": 0.6333333333333333, + "Malay,Chinese": 0.6533333333333333, + "English,Vietnamese": 0.5266666666666666, + "English,Spanish": 0.78, + "English,Indonesian": 0.5933333333333334, + "English,Filipino": 0.5733333333333334, + "English,Chinese": 0.6066666666666667, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,Indonesian": 0.62, + "Vietnamese,Filipino": 0.6266666666666667, + "Vietnamese,Chinese": 0.6333333333333333, + "Spanish,Indonesian": 0.6133333333333333, + "Spanish,Filipino": 0.58, + "Spanish,Chinese": 0.6866666666666666, + "Indonesian,Filipino": 0.6133333333333333, + "Indonesian,Chinese": 0.6266666666666667, + "Filipino,Chinese": 0.6333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.4533333333333333, + "Malay,English,Spanish": 0.5133333333333333, + "Malay,English,Indonesian": 0.4866666666666667, + "Malay,English,Filipino": 0.4266666666666667, + "Malay,English,Chinese": 0.46, + "Malay,Vietnamese,Spanish": 0.4666666666666667, + "Malay,Vietnamese,Indonesian": 0.5466666666666666, + "Malay,Vietnamese,Filipino": 0.5, + "Malay,Vietnamese,Chinese": 0.52, + "Malay,Spanish,Indonesian": 0.49333333333333335, + "Malay,Spanish,Filipino": 0.44, + "Malay,Spanish,Chinese": 0.5, + "Malay,Indonesian,Filipino": 0.5066666666666667, + "Malay,Indonesian,Chinese": 0.5266666666666666, + "Malay,Filipino,Chinese": 0.47333333333333333, + "English,Vietnamese,Spanish": 0.46, + "English,Vietnamese,Indonesian": 0.41333333333333333, + "English,Vietnamese,Filipino": 0.4066666666666667, + "English,Vietnamese,Chinese": 0.4266666666666667, + "English,Spanish,Indonesian": 0.5266666666666666, + "English,Spanish,Filipino": 0.49333333333333335, + "English,Spanish,Chinese": 0.5733333333333334, + "English,Indonesian,Filipino": 0.42, + "English,Indonesian,Chinese": 0.4533333333333333, + "English,Filipino,Chinese": 0.44666666666666666, + "Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "Vietnamese,Spanish,Filipino": 0.43333333333333335, + "Vietnamese,Spanish,Chinese": 0.47333333333333333, + "Vietnamese,Indonesian,Filipino": 0.4666666666666667, + "Vietnamese,Indonesian,Chinese": 0.48, + "Vietnamese,Filipino,Chinese": 0.47333333333333333, + "Spanish,Indonesian,Filipino": 0.43333333333333335, + "Spanish,Indonesian,Chinese": 0.5, + "Spanish,Filipino,Chinese": 0.5, + "Indonesian,Filipino,Chinese": 0.46 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.4066666666666667, + "Malay,English,Vietnamese,Indonesian": 0.38, + "Malay,English,Vietnamese,Filipino": 0.36, + "Malay,English,Vietnamese,Chinese": 0.36, + "Malay,English,Spanish,Indonesian": 0.43333333333333335, + "Malay,English,Spanish,Filipino": 0.38666666666666666, + "Malay,English,Spanish,Chinese": 0.44666666666666666, + "Malay,English,Indonesian,Filipino": 0.36, + "Malay,English,Indonesian,Chinese": 0.3933333333333333, + "Malay,English,Filipino,Chinese": 0.36, + "Malay,Vietnamese,Spanish,Indonesian": 0.3933333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.3933333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.42, + "Malay,Vietnamese,Indonesian,Chinese": 0.43333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.3933333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.37333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.4266666666666667, + "Malay,Spanish,Filipino,Chinese": 0.4, + "Malay,Indonesian,Filipino,Chinese": 0.3933333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "English,Vietnamese,Spanish,Filipino": 0.37333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.4066666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.34, + "English,Vietnamese,Filipino,Chinese": 0.35333333333333333, + "English,Spanish,Indonesian,Filipino": 0.37333333333333335, + "English,Spanish,Indonesian,Chinese": 0.43333333333333335, + "English,Spanish,Filipino,Chinese": 0.42, + "English,Indonesian,Filipino,Chinese": 0.35333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.36, + "Vietnamese,Spanish,Indonesian,Chinese": 0.38666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.3933333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.37333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.3933333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.34, + "Malay,English,Vietnamese,Spanish,Filipino": 0.34, + "Malay,English,Vietnamese,Spanish,Chinese": 0.35333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.30666666666666664, + "Malay,English,Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Malay,English,Spanish,Indonesian,Filipino": 0.32666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.38, + "Malay,English,Spanish,Filipino,Chinese": 0.35333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.34, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.34, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.30666666666666664, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.34, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.3, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.3, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.3, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.28 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + } + }, + "AC3_2": 0.48212049620103675, + "AC3_3": 0.4293068642968851, + "AC3_4": 0.39017817910548114, + "AC3_5": 0.35933834581501617, + "AC3_6": 0.33382479904787915, + "AC3_7": 0.31275912403965006 + }, + "prompt_5": { + "overall_acc": 0.3771428571428571, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.48, + "Vietnamese": 0.3466666666666667, + "Spanish": 0.46, + "Indonesian": 0.3, + "Filipino": 0.3333333333333333, + "Chinese": 0.3933333333333333 + }, + "consistency_score_2": 0.621904761904762, + "consistency_score_3": 0.4636190476190476, + "consistency_score_4": 0.3716190476190476, + "consistency_score_5": 0.3120634920634921, + "consistency_score_6": 0.2704761904761905, + "consistency_score_7": 0.24, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5666666666666667, + "Malay,Vietnamese": 0.6666666666666666, + "Malay,Spanish": 0.6133333333333333, + "Malay,Indonesian": 0.7466666666666667, + "Malay,Filipino": 0.6066666666666667, + "Malay,Chinese": 0.5866666666666667, + "English,Vietnamese": 0.54, + "English,Spanish": 0.74, + "English,Indonesian": 0.5466666666666666, + "English,Filipino": 0.5666666666666667, + "English,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.56, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Filipino": 0.6466666666666666, + "Vietnamese,Chinese": 0.62, + "Spanish,Indonesian": 0.6666666666666666, + "Spanish,Filipino": 0.6133333333333333, + "Spanish,Chinese": 0.6733333333333333, + "Indonesian,Filipino": 0.6333333333333333, + "Indonesian,Chinese": 0.6266666666666667, + "Filipino,Chinese": 0.64 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.43333333333333335, + "Malay,English,Spanish": 0.4866666666666667, + "Malay,English,Indonesian": 0.46, + "Malay,English,Filipino": 0.41333333333333333, + "Malay,English,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Spanish": 0.4666666666666667, + "Malay,Vietnamese,Indonesian": 0.5466666666666666, + "Malay,Vietnamese,Filipino": 0.4866666666666667, + "Malay,Vietnamese,Chinese": 0.4666666666666667, + "Malay,Spanish,Indonesian": 0.5466666666666666, + "Malay,Spanish,Filipino": 0.4533333333333333, + "Malay,Spanish,Chinese": 0.47333333333333333, + "Malay,Indonesian,Filipino": 0.5266666666666666, + "Malay,Indonesian,Chinese": 0.5066666666666667, + "Malay,Filipino,Chinese": 0.44, + "English,Vietnamese,Spanish": 0.46, + "English,Vietnamese,Indonesian": 0.38666666666666666, + "English,Vietnamese,Filipino": 0.4266666666666667, + "English,Vietnamese,Chinese": 0.4066666666666667, + "English,Spanish,Indonesian": 0.5066666666666667, + "English,Spanish,Filipino": 0.49333333333333335, + "English,Spanish,Chinese": 0.5266666666666666, + "English,Indonesian,Filipino": 0.4066666666666667, + "English,Indonesian,Chinese": 0.3933333333333333, + "English,Filipino,Chinese": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.44666666666666666, + "Vietnamese,Spanish,Filipino": 0.44666666666666666, + "Vietnamese,Spanish,Chinese": 0.44666666666666666, + "Vietnamese,Indonesian,Filipino": 0.4866666666666667, + "Vietnamese,Indonesian,Chinese": 0.46, + "Vietnamese,Filipino,Chinese": 0.4666666666666667, + "Spanish,Indonesian,Filipino": 0.47333333333333333, + "Spanish,Indonesian,Chinese": 0.5, + "Spanish,Filipino,Chinese": 0.49333333333333335, + "Indonesian,Filipino,Chinese": 0.46 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.38, + "Malay,English,Vietnamese,Indonesian": 0.36, + "Malay,English,Vietnamese,Filipino": 0.3466666666666667, + "Malay,English,Vietnamese,Chinese": 0.32666666666666666, + "Malay,English,Spanish,Indonesian": 0.4266666666666667, + "Malay,English,Spanish,Filipino": 0.37333333333333335, + "Malay,English,Spanish,Chinese": 0.38, + "Malay,English,Indonesian,Filipino": 0.36666666666666664, + "Malay,English,Indonesian,Chinese": 0.3466666666666667, + "Malay,English,Filipino,Chinese": 0.32, + "Malay,Vietnamese,Spanish,Indonesian": 0.41333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.38, + "Malay,Vietnamese,Spanish,Chinese": 0.37333333333333335, + "Malay,Vietnamese,Indonesian,Filipino": 0.43333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.4, + "Malay,Vietnamese,Filipino,Chinese": 0.36666666666666664, + "Malay,Spanish,Indonesian,Filipino": 0.42, + "Malay,Spanish,Indonesian,Chinese": 0.43333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.38666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.38666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.36, + "English,Vietnamese,Spanish,Filipino": 0.38, + "English,Vietnamese,Spanish,Chinese": 0.37333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.29333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.3333333333333333, + "English,Spanish,Indonesian,Filipino": 0.37333333333333335, + "English,Spanish,Indonesian,Chinese": 0.38, + "English,Spanish,Filipino,Chinese": 0.4066666666666667, + "English,Indonesian,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian,Filipino": 0.36666666666666664, + "Vietnamese,Spanish,Indonesian,Chinese": 0.36, + "Vietnamese,Spanish,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Filipino,Chinese": 0.35333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.3933333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.32, + "Malay,English,Vietnamese,Spanish,Chinese": 0.30666666666666664, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.34, + "Malay,English,Spanish,Indonesian,Chinese": 0.34, + "Malay,English,Spanish,Filipino,Chinese": 0.32, + "Malay,English,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.32, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.32, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.30666666666666664, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.32, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.3 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24 + } + }, + "AC3_2": 0.46954105946546193, + "AC3_3": 0.4159337173432404, + "AC3_4": 0.37436057704779674, + "AC3_5": 0.34153056519358593, + "AC3_6": 0.31502521003539, + "AC3_7": 0.2933333332858024 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3157467532467532, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.3522727272727273, + "Indonesian": 0.26704545454545453, + "Filipino": 0.23863636363636365, + "Spanish": 0.38636363636363635, + "Malay": 0.3125 + }, + "consistency_score_2": 0.46239177489177485, + "consistency_score_3": 0.2689935064935065, + "consistency_score_4": 0.17954545454545454, + "consistency_score_5": 0.13284632034632032, + "consistency_score_6": 0.10795454545454544, + "consistency_score_7": 0.09659090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3125, + "English,Chinese": 0.5, + "English,Indonesian": 0.45454545454545453, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.5113636363636364, + "English,Malay": 0.4147727272727273, + "Vietnamese,Chinese": 0.375, + "Vietnamese,Indonesian": 0.4431818181818182, + "Vietnamese,Filipino": 0.5113636363636364, + "Vietnamese,Spanish": 0.42613636363636365, + "Vietnamese,Malay": 0.4602272727272727, + "Chinese,Indonesian": 0.4659090909090909, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,Spanish": 0.4602272727272727, + "Chinese,Malay": 0.4659090909090909, + "Indonesian,Filipino": 0.5511363636363636, + "Indonesian,Spanish": 0.5, + "Indonesian,Malay": 0.6420454545454546, + "Filipino,Spanish": 0.4375, + "Filipino,Malay": 0.5625, + "Spanish,Malay": 0.5 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.19886363636363635, + "English,Vietnamese,Indonesian": 0.19886363636363635, + "English,Vietnamese,Filipino": 0.19886363636363635, + "English,Vietnamese,Spanish": 0.20454545454545456, + "English,Vietnamese,Malay": 0.1875, + "English,Chinese,Indonesian": 0.2556818181818182, + "English,Chinese,Filipino": 0.21022727272727273, + "English,Chinese,Spanish": 0.29545454545454547, + "English,Chinese,Malay": 0.25, + "English,Indonesian,Filipino": 0.2556818181818182, + "English,Indonesian,Spanish": 0.2840909090909091, + "English,Indonesian,Malay": 0.3068181818181818, + "English,Filipino,Spanish": 0.25, + "English,Filipino,Malay": 0.2556818181818182, + "English,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "Vietnamese,Chinese,Filipino": 0.23295454545454544, + "Vietnamese,Chinese,Spanish": 0.23295454545454544, + "Vietnamese,Chinese,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino": 0.32386363636363635, + "Vietnamese,Indonesian,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Filipino,Malay": 0.32386363636363635, + "Vietnamese,Spanish,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino": 0.2727272727272727, + "Chinese,Indonesian,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Malay": 0.3465909090909091, + "Chinese,Filipino,Spanish": 0.2215909090909091, + "Chinese,Filipino,Malay": 0.2784090909090909, + "Chinese,Spanish,Malay": 0.2784090909090909, + "Indonesian,Filipino,Spanish": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.42613636363636365, + "Indonesian,Spanish,Malay": 0.3693181818181818, + "Filipino,Spanish,Malay": 0.3181818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish": 0.17613636363636365, + "English,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Chinese,Filipino,Spanish": 0.16477272727272727, + "English,Chinese,Filipino,Malay": 0.17613636363636365, + "English,Chinese,Spanish,Malay": 0.1875, + "English,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Filipino,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.26704545454545453 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.125, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + } + }, + "AC3_2": 0.37525118305240257, + "AC3_3": 0.2905010383344785, + "AC3_4": 0.22891898329754312, + "AC3_5": 0.18701044126609928, + "AC3_6": 0.16089777077358955, + "AC3_7": 0.147928596957682 + }, + "prompt_2": { + "overall_acc": 0.3133116883116883, + "language_acc": { + "English": 0.4034090909090909, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.32954545454545453, + "Indonesian": 0.2727272727272727, + "Filipino": 0.23863636363636365, + "Spanish": 0.3693181818181818, + "Malay": 0.3125 + }, + "consistency_score_2": 0.5045995670995671, + "consistency_score_3": 0.3224025974025975, + "consistency_score_4": 0.23295454545454547, + "consistency_score_5": 0.1820887445887446, + "consistency_score_6": 0.15097402597402595, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.35795454545454547, + "English,Chinese": 0.48863636363636365, + "English,Indonesian": 0.4715909090909091, + "English,Filipino": 0.3806818181818182, + "English,Spanish": 0.5681818181818182, + "English,Malay": 0.4375, + "Vietnamese,Chinese": 0.4034090909090909, + "Vietnamese,Indonesian": 0.5170454545454546, + "Vietnamese,Filipino": 0.5909090909090909, + "Vietnamese,Spanish": 0.5056818181818182, + "Vietnamese,Malay": 0.5738636363636364, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,Spanish": 0.5056818181818182, + "Chinese,Malay": 0.4943181818181818, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.5511363636363636, + "Indonesian,Malay": 0.6761363636363636, + "Filipino,Spanish": 0.5056818181818182, + "Filipino,Malay": 0.5909090909090909, + "Spanish,Malay": 0.5738636363636364 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2215909090909091, + "English,Vietnamese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Filipino": 0.25, + "English,Vietnamese,Spanish": 0.2784090909090909, + "English,Vietnamese,Malay": 0.26704545454545453, + "English,Chinese,Indonesian": 0.2727272727272727, + "English,Chinese,Filipino": 0.23295454545454544, + "English,Chinese,Spanish": 0.3409090909090909, + "English,Chinese,Malay": 0.2727272727272727, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.3522727272727273, + "English,Indonesian,Malay": 0.3409090909090909, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.2727272727272727, + "English,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.2784090909090909, + "Vietnamese,Chinese,Malay": 0.29545454545454547, + "Vietnamese,Indonesian,Filipino": 0.3806818181818182, + "Vietnamese,Indonesian,Spanish": 0.3522727272727273, + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Filipino,Spanish": 0.36363636363636365, + "Vietnamese,Filipino,Malay": 0.42045454545454547, + "Vietnamese,Spanish,Malay": 0.3806818181818182, + "Chinese,Indonesian,Filipino": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.3181818181818182, + "Chinese,Indonesian,Malay": 0.36363636363636365, + "Chinese,Filipino,Spanish": 0.2784090909090909, + "Chinese,Filipino,Malay": 0.3181818181818182, + "Chinese,Spanish,Malay": 0.32954545454545453, + "Indonesian,Filipino,Spanish": 0.3693181818181818, + "Indonesian,Filipino,Malay": 0.4659090909090909, + "Indonesian,Spanish,Malay": 0.4431818181818182, + "Filipino,Spanish,Malay": 0.4034090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Chinese,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.20454545454545456, + "English,Vietnamese,Filipino,Malay": 0.21022727272727273, + "English,Vietnamese,Spanish,Malay": 0.2215909090909091, + "English,Chinese,Indonesian,Filipino": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Chinese,Indonesian,Malay": 0.2215909090909091, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.21022727272727273, + "English,Chinese,Spanish,Malay": 0.2215909090909091, + "English,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Indonesian,Filipino,Malay": 0.25, + "English,Indonesian,Spanish,Malay": 0.2840909090909091, + "English,Filipino,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.30113636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.3068181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.22727272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, + "Chinese,Filipino,Spanish,Malay": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.3352272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Vietnamese,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.25, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + } + }, + "AC3_2": 0.3865870318424462, + "AC3_3": 0.31779214141426615, + "AC3_4": 0.26722274748586516, + "AC3_5": 0.2303208806020786, + "AC3_6": 0.20376214690007788, + "AC3_7": 0.18443576529002037 + }, + "prompt_3": { + "overall_acc": 0.31737012987012986, + "language_acc": { + "English": 0.4034090909090909, + "Vietnamese": 0.30113636363636365, + "Chinese": 0.3352272727272727, + "Indonesian": 0.26136363636363635, + "Filipino": 0.2556818181818182, + "Spanish": 0.3522727272727273, + "Malay": 0.3125 + }, + "consistency_score_2": 0.4956709956709957, + "consistency_score_3": 0.31331168831168826, + "consistency_score_4": 0.2258116883116883, + "consistency_score_5": 0.17667748917748916, + "consistency_score_6": 0.1461038961038961, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4090909090909091, + "English,Chinese": 0.5340909090909091, + "English,Indonesian": 0.5170454545454546, + "English,Filipino": 0.38636363636363635, + "English,Spanish": 0.5738636363636364, + "English,Malay": 0.4318181818181818, + "Vietnamese,Chinese": 0.3977272727272727, + "Vietnamese,Indonesian": 0.48295454545454547, + "Vietnamese,Filipino": 0.5454545454545454, + "Vietnamese,Spanish": 0.4715909090909091, + "Vietnamese,Malay": 0.5511363636363636, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Filipino": 0.375, + "Chinese,Spanish": 0.4715909090909091, + "Chinese,Malay": 0.48295454545454547, + "Indonesian,Filipino": 0.5568181818181818, + "Indonesian,Spanish": 0.5568181818181818, + "Indonesian,Malay": 0.6363636363636364, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Malay": 0.5795454545454546, + "Spanish,Malay": 0.5 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.24431818181818182, + "English,Vietnamese,Indonesian": 0.2897727272727273, + "English,Vietnamese,Filipino": 0.2556818181818182, + "English,Vietnamese,Spanish": 0.2840909090909091, + "English,Vietnamese,Malay": 0.2784090909090909, + "English,Chinese,Indonesian": 0.3181818181818182, + "English,Chinese,Filipino": 0.23863636363636365, + "English,Chinese,Spanish": 0.3409090909090909, + "English,Chinese,Malay": 0.2897727272727273, + "English,Indonesian,Filipino": 0.29545454545454547, + "English,Indonesian,Spanish": 0.38636363636363635, + "English,Indonesian,Malay": 0.3409090909090909, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Filipino,Malay": 0.2784090909090909, + "English,Spanish,Malay": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian": 0.25, + "Vietnamese,Chinese,Filipino": 0.25, + "Vietnamese,Chinese,Spanish": 0.26704545454545453, + "Vietnamese,Chinese,Malay": 0.29545454545454547, + "Vietnamese,Indonesian,Filipino": 0.3522727272727273, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Filipino,Malay": 0.3977272727272727, + "Vietnamese,Spanish,Malay": 0.32954545454545453, + "Chinese,Indonesian,Filipino": 0.2784090909090909, + "Chinese,Indonesian,Spanish": 0.30113636363636365, + "Chinese,Indonesian,Malay": 0.3465909090909091, + "Chinese,Filipino,Spanish": 0.2556818181818182, + "Chinese,Filipino,Malay": 0.2897727272727273, + "Chinese,Spanish,Malay": 0.30113636363636365, + "Indonesian,Filipino,Spanish": 0.3693181818181818, + "Indonesian,Filipino,Malay": 0.45454545454545453, + "Indonesian,Spanish,Malay": 0.3977272727272727, + "Filipino,Spanish,Malay": 0.3522727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.19318181818181818, + "English,Vietnamese,Chinese,Filipino": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "English,Vietnamese,Filipino,Spanish": 0.19886363636363635, + "English,Vietnamese,Filipino,Malay": 0.2215909090909091, + "English,Vietnamese,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Chinese,Filipino,Spanish": 0.1875, + "English,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Spanish,Malay": 0.2215909090909091, + "English,Indonesian,Filipino,Spanish": 0.25, + "English,Indonesian,Filipino,Malay": 0.2556818181818182, + "English,Indonesian,Spanish,Malay": 0.2727272727272727, + "English,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.26704545454545453, + "Chinese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "Chinese,Filipino,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.3125 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.1875, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + } + }, + "AC3_2": 0.38696976895212376, + "AC3_3": 0.3153278509921388, + "AC3_4": 0.26387438766770904, + "AC3_5": 0.22699090335514444, + "AC3_6": 0.20009325171382705, + "AC3_7": 0.17935779812459288 + }, + "prompt_4": { + "overall_acc": 0.30844155844155846, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.2897727272727273, + "Chinese": 0.3352272727272727, + "Indonesian": 0.24431818181818182, + "Filipino": 0.23295454545454544, + "Spanish": 0.3465909090909091, + "Malay": 0.29545454545454547 + }, + "consistency_score_2": 0.4699675324675325, + "consistency_score_3": 0.2800324675324675, + "consistency_score_4": 0.1935064935064935, + "consistency_score_5": 0.14935064935064934, + "consistency_score_6": 0.1258116883116883, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3465909090909091, + "English,Chinese": 0.5568181818181818, + "English,Indonesian": 0.4943181818181818, + "English,Filipino": 0.36363636363636365, + "English,Spanish": 0.5738636363636364, + "English,Malay": 0.4147727272727273, + "Vietnamese,Chinese": 0.375, + "Vietnamese,Indonesian": 0.4602272727272727, + "Vietnamese,Filipino": 0.5056818181818182, + "Vietnamese,Spanish": 0.3977272727272727, + "Vietnamese,Malay": 0.4659090909090909, + "Chinese,Indonesian": 0.4943181818181818, + "Chinese,Filipino": 0.3465909090909091, + "Chinese,Spanish": 0.5227272727272727, + "Chinese,Malay": 0.4715909090909091, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.5227272727272727, + "Indonesian,Malay": 0.6534090909090909, + "Filipino,Spanish": 0.4034090909090909, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.5170454545454546 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2215909090909091, + "English,Vietnamese,Indonesian": 0.25, + "English,Vietnamese,Filipino": 0.21022727272727273, + "English,Vietnamese,Spanish": 0.2215909090909091, + "English,Vietnamese,Malay": 0.2215909090909091, + "English,Chinese,Indonesian": 0.32386363636363635, + "English,Chinese,Filipino": 0.23295454545454544, + "English,Chinese,Spanish": 0.3806818181818182, + "English,Chinese,Malay": 0.2840909090909091, + "English,Indonesian,Filipino": 0.2727272727272727, + "English,Indonesian,Spanish": 0.3522727272727273, + "English,Indonesian,Malay": 0.32954545454545453, + "English,Filipino,Spanish": 0.24431818181818182, + "English,Filipino,Malay": 0.23295454545454544, + "English,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Spanish": 0.23295454545454544, + "Vietnamese,Chinese,Malay": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.29545454545454547, + "Vietnamese,Spanish,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino": 0.25, + "Chinese,Indonesian,Spanish": 0.3181818181818182, + "Chinese,Indonesian,Malay": 0.3465909090909091, + "Chinese,Filipino,Spanish": 0.22727272727272727, + "Chinese,Filipino,Malay": 0.24431818181818182, + "Chinese,Spanish,Malay": 0.3181818181818182, + "Indonesian,Filipino,Spanish": 0.3068181818181818, + "Indonesian,Filipino,Malay": 0.3693181818181818, + "Indonesian,Spanish,Malay": 0.375, + "Filipino,Spanish,Malay": 0.2897727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.1590909090909091, + "English,Vietnamese,Chinese,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino": 0.1875, + "English,Chinese,Indonesian,Spanish": 0.25, + "English,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Chinese,Spanish,Malay": 0.23295454545454544, + "English,Indonesian,Filipino,Spanish": 0.20454545454545456, + "English,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Indonesian,Spanish,Malay": 0.25, + "English,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.25, + "Vietnamese,Indonesian,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.24431818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + } + }, + "AC3_2": 0.37244559398273136, + "AC3_3": 0.29355127626001354, + "AC3_4": 0.23781522483123582, + "AC3_5": 0.2012526480171971, + "AC3_6": 0.17872314597170919, + "AC3_7": 0.16608391604456696 + }, + "prompt_5": { + "overall_acc": 0.30844155844155846, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.29545454545454547, + "Chinese": 0.3409090909090909, + "Indonesian": 0.26704545454545453, + "Filipino": 0.2556818181818182, + "Spanish": 0.32954545454545453, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.4894480519480519, + "consistency_score_3": 0.30129870129870134, + "consistency_score_4": 0.21022727272727276, + "consistency_score_5": 0.1590909090909091, + "consistency_score_6": 0.12824675324675325, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.375, + "English,Chinese": 0.5340909090909091, + "English,Indonesian": 0.4943181818181818, + "English,Filipino": 0.39204545454545453, + "English,Spanish": 0.5681818181818182, + "English,Malay": 0.44886363636363635, + "Vietnamese,Chinese": 0.3693181818181818, + "Vietnamese,Indonesian": 0.44886363636363635, + "Vietnamese,Filipino": 0.5227272727272727, + "Vietnamese,Spanish": 0.42613636363636365, + "Vietnamese,Malay": 0.5170454545454546, + "Chinese,Indonesian": 0.5170454545454546, + "Chinese,Filipino": 0.3806818181818182, + "Chinese,Spanish": 0.5113636363636364, + "Chinese,Malay": 0.5511363636363636, + "Indonesian,Filipino": 0.5284090909090909, + "Indonesian,Spanish": 0.48863636363636365, + "Indonesian,Malay": 0.7272727272727273, + "Filipino,Spanish": 0.4375, + "Filipino,Malay": 0.5340909090909091, + "Spanish,Malay": 0.5056818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23295454545454544, + "English,Vietnamese,Indonesian": 0.2556818181818182, + "English,Vietnamese,Filipino": 0.22727272727272727, + "English,Vietnamese,Spanish": 0.2556818181818182, + "English,Vietnamese,Malay": 0.26136363636363635, + "English,Chinese,Indonesian": 0.32954545454545453, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.3806818181818182, + "English,Chinese,Malay": 0.3181818181818182, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.3409090909090909, + "English,Indonesian,Malay": 0.36363636363636365, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.26704545454545453, + "English,Spanish,Malay": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "Vietnamese,Chinese,Filipino": 0.22727272727272727, + "Vietnamese,Chinese,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Filipino,Spanish": 0.26704545454545453, + "Vietnamese,Filipino,Malay": 0.3352272727272727, + "Vietnamese,Spanish,Malay": 0.2897727272727273, + "Chinese,Indonesian,Filipino": 0.2727272727272727, + "Chinese,Indonesian,Spanish": 0.3125, + "Chinese,Indonesian,Malay": 0.42045454545454547, + "Chinese,Filipino,Spanish": 0.24431818181818182, + "Chinese,Filipino,Malay": 0.3068181818181818, + "Chinese,Spanish,Malay": 0.3352272727272727, + "Indonesian,Filipino,Spanish": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.4431818181818182, + "Indonesian,Spanish,Malay": 0.39204545454545453, + "Filipino,Spanish,Malay": 0.3181818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "English,Vietnamese,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino": 0.20454545454545456, + "English,Chinese,Indonesian,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Spanish,Malay": 0.23863636363636365, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.23863636363636365, + "English,Indonesian,Spanish,Malay": 0.26704545454545453, + "English,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.2159090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, + "Chinese,Filipino,Spanish,Malay": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.2840909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Chinese,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546 + } + }, + "AC3_2": 0.3784135497801617, + "AC3_3": 0.3048282919987462, + "AC3_4": 0.25003556688876827, + "AC3_5": 0.20991161611671838, + "AC3_6": 0.18116641718441037, + "AC3_7": 0.15993265989425115 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4368932038834951 + }, + "prompt_2": { + "accuracy": 0.46601941747572817 + }, + "prompt_3": { + "accuracy": 0.42718446601941745 + }, + "prompt_4": { + "accuracy": 0.4368932038834951 + }, + "prompt_5": { + "accuracy": 0.4174757281553398 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4 + }, + "prompt_2": { + "accuracy": 0.3904761904761905 + }, + "prompt_3": { + "accuracy": 0.34285714285714286 + }, + "prompt_4": { + "accuracy": 0.3523809523809524 + }, + "prompt_5": { + "accuracy": 0.34285714285714286 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.514018691588785 + }, + "prompt_2": { + "accuracy": 0.4672897196261682 + }, + "prompt_3": { + "accuracy": 0.4205607476635514 + }, + "prompt_4": { + "accuracy": 0.42990654205607476 + }, + "prompt_5": { + "accuracy": 0.4766355140186916 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.3, + "culture": 0.3, + "film": 0.2, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.5, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.6, + "film": 0.1, + "law": 0.2, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.5, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.4, + "culture": 0.3, + "film": 0.2, + "law": 0.4, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.6, + "film": 0.2, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.2, + "culture": 0.3, + "film": 0.2, + "law": 0.4, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17818389937895332 + }, + "prompt_2": { + "bleu_score": 0.18329857238416086 + }, + "prompt_3": { + "bleu_score": 0.1842641022361462 + }, + "prompt_4": { + "bleu_score": 0.18113750925189162 + }, + "prompt_5": { + "bleu_score": 0.18475979486158378 + } }, "indommlu": { "prompt_1": -1, @@ -2426,179 +21471,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.23045698173580897 + }, + "prompt_2": { + "bleu_score": 0.2297827539636557 + }, + "prompt_3": { + "bleu_score": 0.2343627600060239 + }, + "prompt_4": { + "bleu_score": 0.23065119914722687 + }, + "prompt_5": { + "bleu_score": 0.21061251333272102 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11781458950741591 + }, + "prompt_2": { + "bleu_score": 0.11821106058420704 + }, + "prompt_3": { + "bleu_score": 0.11906290505763724 + }, + "prompt_4": { + "bleu_score": 0.11653752924885515 + }, + "prompt_5": { + "bleu_score": 0.10230828107667242 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1552250355965207 + }, + "prompt_2": { + "bleu_score": 0.15414923884534001 + }, + "prompt_3": { + "bleu_score": 0.15945970110094956 + }, + "prompt_4": { + "bleu_score": 0.15876385697274442 + }, + "prompt_5": { + "bleu_score": 0.1499566089035635 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.20916667147768384 + }, + "prompt_2": { + "bleu_score": 0.21153335235150353 + }, + "prompt_3": { + "bleu_score": 0.21496921295545635 + }, + "prompt_4": { + "bleu_score": 0.21438330713807416 + }, + "prompt_5": { + "bleu_score": 0.19751377414907603 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4725787631271879 + }, + "prompt_2": { + "accuracy": 0.4235705950991832 + }, + "prompt_3": { + "accuracy": 0.45040840140023336 + }, + "prompt_4": { + "accuracy": 0.46557759626604434 + }, + "prompt_5": { + "accuracy": 0.4282380396732789 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4529853414372542, + "category_acc": { + "high_school_european_history": 0.5792682926829268, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.4962121212121212, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.6305418719211823, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.5720338983050848, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.43037974683544306, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.5533980582524272, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.33451957295373663, + "philosophy": 0.46774193548387094, + "professional_medicine": 0.45018450184501846, + "nutrition": 0.5475409836065573, + "global_facts": 0.24242424242424243, + "machine_learning": 0.3063063063063063, + "security_studies": 0.5204918032786885, + "public_relations": 0.45871559633027525, + "professional_psychology": 0.41080196399345337, + "prehistory": 0.5201238390092879, + "anatomy": 0.44776119402985076, + "human_sexuality": 0.5538461538461539, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.6302083333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.583756345177665, + "elementary_mathematics": 0.3050397877984085, + "human_aging": 0.47297297297297297, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6452205882352942, + "formal_logic": 0.336, + "high_school_statistics": 0.413953488372093, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.43434343434343436, + "conceptual_physics": 0.3547008547008547, + "miscellaneous": 0.6112531969309463, + "high_school_chemistry": 0.32673267326732675, + "marketing": 0.6609442060085837, + "professional_law": 0.36921069797782124, + "management": 0.5490196078431373, + "college_physics": 0.297029702970297, + "jurisprudence": 0.514018691588785, + "world_religions": 0.6764705882352942, + "sociology": 0.67, + "us_foreign_policy": 0.696969696969697, + "high_school_macroeconomics": 0.4473007712082262, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.4405797101449275, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.48344370860927155, + "college_biology": 0.44755244755244755 + } + }, + "prompt_2": { + "accuracy": 0.4184483375044691, + "category_acc": { + "high_school_european_history": 0.5609756097560976, + "business_ethics": 0.4444444444444444, + "clinical_knowledge": 0.4431818181818182, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.5615763546798029, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5466101694915254, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.4092827004219409, + "econometrics": 0.19469026548672566, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.5339805825242718, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.45806451612903226, + "professional_medicine": 0.44649446494464945, + "nutrition": 0.5081967213114754, + "global_facts": 0.20202020202020202, + "machine_learning": 0.3063063063063063, + "security_studies": 0.48770491803278687, + "public_relations": 0.3394495412844037, + "professional_psychology": 0.3911620294599018, + "prehistory": 0.48606811145510836, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.5260416666666666, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.5246913580246914, + "high_school_geography": 0.49238578680203043, + "elementary_mathematics": 0.2891246684350133, + "human_aging": 0.42792792792792794, + "college_mathematics": 0.37373737373737376, + "high_school_psychology": 0.5790441176470589, + "formal_logic": 0.384, + "high_school_statistics": 0.40930232558139534, + "international_law": 0.575, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.3838383838383838, + "conceptual_physics": 0.32905982905982906, + "miscellaneous": 0.4961636828644501, + "high_school_chemistry": 0.2871287128712871, + "marketing": 0.6008583690987125, + "professional_law": 0.36007827788649704, + "management": 0.45098039215686275, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.5420560747663551, + "world_religions": 0.5529411764705883, + "sociology": 0.57, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.42159383033419023, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.42028985507246375, + "electrical_engineering": 0.4236111111111111, + "astronomy": 0.4304635761589404, + "college_biology": 0.40559440559440557 + } + }, + "prompt_3": { + "accuracy": 0.438398283875581, + "category_acc": { + "high_school_european_history": 0.5853658536585366, + "business_ethics": 0.47474747474747475, + "clinical_knowledge": 0.4583333333333333, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.5812807881773399, + "high_school_physics": 0.28, + "high_school_world_history": 0.5805084745762712, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.4219409282700422, + "econometrics": 0.19469026548672566, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.5242718446601942, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.5064516129032258, + "professional_medicine": 0.45387453874538747, + "nutrition": 0.5147540983606558, + "global_facts": 0.25252525252525254, + "machine_learning": 0.32432432432432434, + "security_studies": 0.48360655737704916, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.41243862520458263, + "prehistory": 0.48606811145510836, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.5384615384615384, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.5572916666666666, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.5802469135802469, + "high_school_geography": 0.5532994923857868, + "elementary_mathematics": 0.29442970822281167, + "human_aging": 0.44594594594594594, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.6029411764705882, + "formal_logic": 0.32, + "high_school_statistics": 0.4046511627906977, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.22304832713754646, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.3076923076923077, + "miscellaneous": 0.571611253196931, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.6351931330472103, + "professional_law": 0.3724722765818656, + "management": 0.4803921568627451, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.6529411764705882, + "sociology": 0.625, + "us_foreign_policy": 0.6464646464646465, + "high_school_macroeconomics": 0.43444730077120824, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.4608695652173913, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.4105960264900662, + "college_biology": 0.4195804195804196 + } + }, + "prompt_4": { + "accuracy": 0.4460493385770468, + "category_acc": { + "high_school_european_history": 0.5792682926829268, + "business_ethics": 0.45454545454545453, + "clinical_knowledge": 0.48484848484848486, + "medical_genetics": 0.5050505050505051, + "high_school_us_history": 0.5615763546798029, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.5720338983050848, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.4345991561181435, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.5663430420711975, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.3238434163701068, + "philosophy": 0.4645161290322581, + "professional_medicine": 0.4833948339483395, + "nutrition": 0.5344262295081967, + "global_facts": 0.20202020202020202, + "machine_learning": 0.2882882882882883, + "security_studies": 0.5368852459016393, + "public_relations": 0.43119266055045874, + "professional_psychology": 0.41407528641571195, + "prehistory": 0.49226006191950467, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.5153846153846153, + "college_medicine": 0.45930232558139533, + "high_school_government_and_politics": 0.65625, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.5532994923857868, + "elementary_mathematics": 0.29442970822281167, + "human_aging": 0.3963963963963964, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.6084558823529411, + "formal_logic": 0.336, + "high_school_statistics": 0.4604651162790698, + "international_law": 0.6, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.3333333333333333, + "miscellaneous": 0.578005115089514, + "high_school_chemistry": 0.32673267326732675, + "marketing": 0.6695278969957081, + "professional_law": 0.37377690802348335, + "management": 0.5882352941176471, + "college_physics": 0.297029702970297, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.6470588235294118, + "sociology": 0.65, + "us_foreign_policy": 0.6464646464646465, + "high_school_macroeconomics": 0.45758354755784064, + "computer_security": 0.48484848484848486, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.41739130434782606, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.47019867549668876, + "college_biology": 0.40559440559440557 + } + }, + "prompt_5": { + "accuracy": 0.42352520557740436, + "category_acc": { + "high_school_european_history": 0.5609756097560976, + "business_ethics": 0.494949494949495, + "clinical_knowledge": 0.4393939393939394, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.5073891625615764, + "high_school_physics": 0.26, + "high_school_world_history": 0.559322033898305, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.5339805825242718, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.45806451612903226, + "professional_medicine": 0.4907749077490775, + "nutrition": 0.5049180327868853, + "global_facts": 0.2222222222222222, + "machine_learning": 0.27927927927927926, + "security_studies": 0.5245901639344263, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.3895253682487725, + "prehistory": 0.47987616099071206, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.5885416666666666, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.4506172839506173, + "high_school_geography": 0.5431472081218274, + "elementary_mathematics": 0.26525198938992045, + "human_aging": 0.3738738738738739, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.5919117647058824, + "formal_logic": 0.352, + "high_school_statistics": 0.4418604651162791, + "international_law": 0.5833333333333334, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.43434343434343436, + "conceptual_physics": 0.3247863247863248, + "miscellaneous": 0.551150895140665, + "high_school_chemistry": 0.3217821782178218, + "marketing": 0.6051502145922747, + "professional_law": 0.3639921722113503, + "management": 0.5588235294117647, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.4766355140186916, + "world_religions": 0.6352941176470588, + "sociology": 0.585, + "us_foreign_policy": 0.5555555555555556, + "high_school_macroeconomics": 0.4087403598971722, + "computer_security": 0.41414141414141414, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.3681159420289855, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.4768211920529801, + "college_biology": 0.3986013986013986 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.300148588410104 + }, + "prompt_2": { + "accuracy": 0.31946508172362553 + }, + "prompt_3": { + "accuracy": 0.32392273402674593 + }, + "prompt_4": { + "accuracy": 0.3016344725111441 + }, + "prompt_5": { + "accuracy": 0.3112927191679049 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.31320049813200496, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.3333333333333333, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.23333333333333334, + "business_administration": 0.34210526315789475, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.25, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.4444444444444444, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.25, + "high_school_history": 0.28, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.40384615384615385, + "sports_science": 0.2916666666666667, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2222222222222222, + "physician": 0.3148148148148148 + } + }, + "prompt_2": { + "accuracy": 0.31693648816936487, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.041666666666666664, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2619047619047619, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.2833333333333333, + "business_administration": 0.34210526315789475, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.4074074074074074, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.25, + "high_school_history": 0.4, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.36538461538461536, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.375, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.30323785803237857, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.125, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.30952380952380953, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.21666666666666667, + "business_administration": 0.4473684210526316, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.25, + "middle_school_politics": 0.5, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.4074074074074074, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.25, + "high_school_history": 0.32, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.34615384615384615, + "sports_science": 0.25, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2962962962962963, + "physician": 0.35185185185185186 + } + }, + "prompt_4": { + "accuracy": 0.3069738480697385, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.125, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.3333333333333333, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.25, + "business_administration": 0.42105263157894735, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.32, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.3148148148148148, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2777777777777778, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.32627646326276466, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.23809523809523808, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.375, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.31666666666666665, + "business_administration": 0.3684210526315789, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.5, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.4074074074074074, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.25, + "high_school_history": 0.28, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.4230769230769231, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3333333333333333, + "physician": 0.3148148148148148 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27598566308243727 + }, + "prompt_2": { + "accuracy": 0.25089605734767023 + }, + "prompt_3": { + "accuracy": 0.2867383512544803 + }, + "prompt_4": { + "accuracy": 0.2616487455197133 + }, + "prompt_5": { + "accuracy": 0.27598566308243727 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3098773959592471, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.2804878048780488, + "arts": 0.30625, + "astronomy": 0.26666666666666666, + "business_ethics": 0.3444976076555024, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.3053435114503817, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.33126934984520123, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.39106145251396646, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.3644859813084112, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.25925925925925924, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.31868131868131866, + "computer_science": 0.3284313725490196, + "computer_security": 0.3216374269005848, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.302158273381295, + "economics": 0.31446540880503143, + "education": 0.31901840490797545, + "electrical_engineering": 0.36046511627906974, + "elementary_chinese": 0.21825396825396826, + "elementary_commonsense": 0.25252525252525254, + "elementary_information_and_technology": 0.35294117647058826, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.4, + "food_science": 0.35664335664335667, + "genetics": 0.22727272727272727, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.1864406779661017, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.30158730158730157, + "international_law": 0.3081081081081081, + "journalism": 0.32558139534883723, + "jurisprudence": 0.340632603406326, + "legal_and_moral_basis": 0.5093457943925234, + "logical": 0.25203252032520324, + "machine_learning": 0.27049180327868855, + "management": 0.3333333333333333, + "marketing": 0.3888888888888889, + "marxist_theory": 0.42328042328042326, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.31724137931034485, + "philosophy": 0.3619047619047619, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.3448275862068966, + "public_relations": 0.3448275862068966, + "security_study": 0.32592592592592595, + "sociology": 0.36283185840707965, + "sports_science": 0.3515151515151515, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.28994082840236685, + "world_history": 0.34782608695652173, + "world_religions": 0.34375 + } + }, + "prompt_2": { + "accuracy": 0.3142807805214989, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.33783783783783783, + "ancient_chinese": 0.29878048780487804, + "arts": 0.29375, + "astronomy": 0.2545454545454545, + "business_ethics": 0.33014354066985646, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.3435114503816794, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.34674922600619196, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.39106145251396646, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.38317757009345793, + "college_engineering_hydrology": 0.3490566037735849, + "college_law": 0.24074074074074073, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.326007326007326, + "computer_science": 0.36764705882352944, + "computer_security": 0.3216374269005848, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.3381294964028777, + "economics": 0.34591194968553457, + "education": 0.31901840490797545, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.20634920634920634, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.3487394957983193, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.34814814814814815, + "food_science": 0.40559440559440557, + "genetics": 0.25, + "global_facts": 0.33557046979865773, + "high_school_biology": 0.28994082840236685, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.2288135593220339, + "high_school_mathematics": 0.1951219512195122, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.32867132867132864, + "human_sexuality": 0.2857142857142857, + "international_law": 0.32432432432432434, + "journalism": 0.3313953488372093, + "jurisprudence": 0.31873479318734793, + "legal_and_moral_basis": 0.5420560747663551, + "logical": 0.2764227642276423, + "machine_learning": 0.2786885245901639, + "management": 0.3476190476190476, + "marketing": 0.40555555555555556, + "marxist_theory": 0.43386243386243384, + "modern_chinese": 0.25, + "nutrition": 0.2689655172413793, + "philosophy": 0.3619047619047619, + "professional_accounting": 0.3485714285714286, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.34051724137931033, + "public_relations": 0.3333333333333333, + "security_study": 0.2962962962962963, + "sociology": 0.3008849557522124, + "sports_science": 0.3515151515151515, + "traditional_chinese_medicine": 0.2918918918918919, + "virology": 0.28994082840236685, + "world_history": 0.33540372670807456, + "world_religions": 0.34375 + } + }, + "prompt_3": { + "accuracy": 0.30875496460024177, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.25, + "ancient_chinese": 0.2682926829268293, + "arts": 0.2875, + "astronomy": 0.23030303030303031, + "business_ethics": 0.3492822966507177, + "chinese_civil_service_exam": 0.35, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.3157894736842105, + "chinese_literature": 0.35294117647058826, + "chinese_teacher_qualification": 0.3575418994413408, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.37383177570093457, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.2962962962962963, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.31135531135531136, + "computer_science": 0.3431372549019608, + "computer_security": 0.3567251461988304, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.2949640287769784, + "economics": 0.32075471698113206, + "education": 0.3496932515337423, + "electrical_engineering": 0.38953488372093026, + "elementary_chinese": 0.25396825396825395, + "elementary_commonsense": 0.2222222222222222, + "elementary_information_and_technology": 0.29831932773109243, + "elementary_mathematics": 0.2217391304347826, + "ethnology": 0.34074074074074073, + "food_science": 0.3986013986013986, + "genetics": 0.2215909090909091, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.31746031746031744, + "international_law": 0.35135135135135137, + "journalism": 0.3488372093023256, + "jurisprudence": 0.32116788321167883, + "legal_and_moral_basis": 0.4766355140186916, + "logical": 0.25203252032520324, + "machine_learning": 0.27049180327868855, + "management": 0.3476190476190476, + "marketing": 0.39444444444444443, + "marxist_theory": 0.3915343915343915, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2620689655172414, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.2898936170212766, + "professional_psychology": 0.3103448275862069, + "public_relations": 0.3045977011494253, + "security_study": 0.25925925925925924, + "sociology": 0.30973451327433627, + "sports_science": 0.3696969696969697, + "traditional_chinese_medicine": 0.33513513513513515, + "virology": 0.25443786982248523, + "world_history": 0.35403726708074534, + "world_religions": 0.3125 + } + }, + "prompt_4": { + "accuracy": 0.30150233120359177, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.27439024390243905, + "arts": 0.3375, + "astronomy": 0.2727272727272727, + "business_ethics": 0.3014354066985646, + "chinese_civil_service_exam": 0.3125, + "chinese_driving_rule": 0.2748091603053435, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.3281733746130031, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.3463687150837989, + "clinical_knowledge": 0.22784810126582278, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.32710280373831774, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.25925925925925924, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.2893772893772894, + "computer_science": 0.3284313725490196, + "computer_security": 0.3567251461988304, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.302158273381295, + "economics": 0.3081761006289308, + "education": 0.3312883435582822, + "electrical_engineering": 0.38953488372093026, + "elementary_chinese": 0.21428571428571427, + "elementary_commonsense": 0.22727272727272727, + "elementary_information_and_technology": 0.3697478991596639, + "elementary_mathematics": 0.2217391304347826, + "ethnology": 0.32592592592592595, + "food_science": 0.2937062937062937, + "genetics": 0.25, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.2958579881656805, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.22033898305084745, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2972972972972973, + "journalism": 0.313953488372093, + "jurisprudence": 0.29683698296836986, + "legal_and_moral_basis": 0.49065420560747663, + "logical": 0.25203252032520324, + "machine_learning": 0.26229508196721313, + "management": 0.3380952380952381, + "marketing": 0.3888888888888889, + "marxist_theory": 0.37566137566137564, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.23448275862068965, + "philosophy": 0.3047619047619048, + "professional_accounting": 0.2914285714285714, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.3146551724137931, + "public_relations": 0.3333333333333333, + "security_study": 0.35555555555555557, + "sociology": 0.34513274336283184, + "sports_science": 0.296969696969697, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.21893491124260356, + "world_history": 0.3416149068322981, + "world_religions": 0.33125 + } + }, + "prompt_5": { + "accuracy": 0.30676912450354, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.27439024390243905, + "arts": 0.33125, + "astronomy": 0.296969696969697, + "business_ethics": 0.3157894736842105, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.3816793893129771, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.33436532507739936, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.3575418994413408, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.3177570093457944, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.2222222222222222, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.304029304029304, + "computer_science": 0.3431372549019608, + "computer_security": 0.36257309941520466, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.30935251798561153, + "economics": 0.31446540880503143, + "education": 0.32515337423312884, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.20238095238095238, + "elementary_commonsense": 0.25252525252525254, + "elementary_information_and_technology": 0.33613445378151263, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.3925925925925926, + "food_science": 0.3776223776223776, + "genetics": 0.2556818181818182, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25, + "high_school_geography": 0.22033898305084745, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.32867132867132864, + "human_sexuality": 0.3333333333333333, + "international_law": 0.2972972972972973, + "journalism": 0.3430232558139535, + "jurisprudence": 0.30170316301703165, + "legal_and_moral_basis": 0.5, + "logical": 0.3252032520325203, + "machine_learning": 0.26229508196721313, + "management": 0.32857142857142857, + "marketing": 0.3388888888888889, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.32413793103448274, + "philosophy": 0.37142857142857144, + "professional_accounting": 0.28, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.27393617021276595, + "professional_psychology": 0.35344827586206895, + "public_relations": 0.28735632183908044, + "security_study": 0.28888888888888886, + "sociology": 0.30973451327433627, + "sports_science": 0.34545454545454546, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.28402366863905326, + "world_history": 0.35403726708074534, + "world_religions": 0.31875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18181818181818182 + }, + "prompt_2": { + "accuracy": 0.2727272727272727 + }, + "prompt_3": { + "accuracy": 0.24242424242424243 + }, + "prompt_4": { + "accuracy": 0.21212121212121213 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24545454545454545 + }, + "prompt_2": { + "accuracy": 0.30227272727272725 + }, + "prompt_3": { + "accuracy": 0.2772727272727273 + }, + "prompt_4": { + "accuracy": 0.3409090909090909 + }, + "prompt_5": { + "accuracy": 0.3568181818181818 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3264406779661017 + }, + "prompt_2": { + "accuracy": 0.34067796610169493 + }, + "prompt_3": { + "accuracy": 0.32169491525423727 + }, + "prompt_4": { + "accuracy": 0.3176271186440678 + }, + "prompt_5": { + "accuracy": 0.32508474576271185 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5818997756170531 + }, + "prompt_2": { + "accuracy": 0.5729244577412117 + }, + "prompt_3": { + "accuracy": 0.5740463724756919 + }, + "prompt_4": { + "accuracy": 0.5789080029917726 + }, + "prompt_5": { + "accuracy": 0.5785340314136126 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7172954434100931 + }, + "prompt_2": { + "accuracy": 0.679078882900539 + }, + "prompt_3": { + "accuracy": 0.7143557079862812 + }, + "prompt_4": { + "accuracy": 0.7163155316021558 + }, + "prompt_5": { + "accuracy": 0.7070063694267515 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.32583369095249576, + "rouge2": 0.1294815362830081, + "rougeL": 0.2525251020951568, + "avg_rouge": 0.23594677644355355 + }, + "prompt_2": { + "rouge1": 0.3590237337412753, + "rouge2": 0.1453568305867354, + "rougeL": 0.2765029787232779, + "avg_rouge": 0.26029451435042955 + }, + "prompt_3": { + "rouge1": 0.3471833990838025, + "rouge2": 0.13958296208198523, + "rougeL": 0.26753379642108543, + "avg_rouge": 0.2514333858622911 + }, + "prompt_4": { + "rouge1": 0.34697836939225496, + "rouge2": 0.13728263176601782, + "rougeL": 0.2651241811175288, + "avg_rouge": 0.2497950607586005 + }, + "prompt_5": { + "rouge1": 0.35381877190608735, + "rouge2": 0.13575244353098986, + "rougeL": 0.2702713915849261, + "avg_rouge": 0.25328086900733443 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2387743045728041, + "rouge2": 0.06567214112086292, + "rougeL": 0.1770013332296026, + "avg_rouge": 0.16048259297442322 + }, + "prompt_2": { + "rouge1": 0.23707910182275616, + "rouge2": 0.06484353356425522, + "rougeL": 0.17600484323047638, + "avg_rouge": 0.15930915953916258 + }, + "prompt_3": { + "rouge1": 0.234543244675593, + "rouge2": 0.06408893210306399, + "rougeL": 0.17369553162309, + "avg_rouge": 0.157442569467249 + }, + "prompt_4": { + "rouge1": 0.23446309124608575, + "rouge2": 0.06463566668879009, + "rougeL": 0.17298418761184173, + "avg_rouge": 0.15736098184890587 + }, + "prompt_5": { + "rouge1": 0.23246646200493865, + "rouge2": 0.0635968333016238, + "rougeL": 0.17134428052157763, + "avg_rouge": 0.15580252527604668 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8360091743119266 + }, + "prompt_2": { + "accuracy": 0.8256880733944955 + }, + "prompt_3": { + "accuracy": 0.8176605504587156 + }, + "prompt_4": { + "accuracy": 0.8715596330275229 + }, + "prompt_5": { + "accuracy": 0.7649082568807339 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6931927133269415 + }, + "prompt_2": { + "accuracy": 0.6462128475551294 + }, + "prompt_3": { + "accuracy": 0.6845637583892618 + }, + "prompt_4": { + "accuracy": 0.6931927133269415 + }, + "prompt_5": { + "accuracy": 0.6912751677852349 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.504 + }, + "prompt_2": { + "accuracy": 0.4935 + }, + "prompt_3": { + "accuracy": 0.505 + }, + "prompt_4": { + "accuracy": 0.462 + }, + "prompt_5": { + "accuracy": 0.4965 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.388 + }, + "prompt_2": { + "accuracy": 0.3715 + }, + "prompt_3": { + "accuracy": 0.385 + }, + "prompt_4": { + "accuracy": 0.372 + }, + "prompt_5": { + "accuracy": 0.3685 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5195 + }, + "prompt_2": { + "accuracy": 0.526 + }, + "prompt_3": { + "accuracy": 0.5795 + }, + "prompt_4": { + "accuracy": 0.548 + }, + "prompt_5": { + "accuracy": 0.5205 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5211267605633803 + }, + "prompt_2": { + "accuracy": 0.5492957746478874 + }, + "prompt_3": { + "accuracy": 0.49295774647887325 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5774647887323944 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.51985559566787 + }, + "prompt_2": { + "accuracy": 0.5270758122743683 + }, + "prompt_3": { + "accuracy": 0.5451263537906137 + }, + "prompt_4": { + "accuracy": 0.5270758122743683 + }, + "prompt_5": { + "accuracy": 0.49458483754512633 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46568627450980393 + }, + "prompt_2": { + "accuracy": 0.46568627450980393 + }, + "prompt_3": { + "accuracy": 0.46568627450980393 + }, + "prompt_4": { + "accuracy": 0.39705882352941174 + }, + "prompt_5": { + "accuracy": 0.46568627450980393 + } } }, "five_shot": { @@ -2708,53 +22943,1733 @@ "model_link": "https://huggingface.co/lmsys/vicuna-13b-v1.3", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4504761904761905, + "language_acc": { + "Malay": 0.38, + "English": 0.6, + "Vietnamese": 0.38, + "Spanish": 0.5266666666666666, + "Indonesian": 0.44, + "Filipino": 0.4066666666666667, + "Chinese": 0.42 + }, + "consistency_score_2": 0.5034920634920635, + "consistency_score_3": 0.3281904761904762, + "consistency_score_4": 0.24247619047619043, + "consistency_score_5": 0.193015873015873, + "consistency_score_6": 0.1619047619047619, + "consistency_score_7": 0.14, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5333333333333333, + "Malay,Vietnamese": 0.43333333333333335, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.5933333333333334, + "Malay,Filipino": 0.4266666666666667, + "Malay,Chinese": 0.4533333333333333, + "English,Vietnamese": 0.46, + "English,Spanish": 0.68, + "English,Indonesian": 0.56, + "English,Filipino": 0.48, + "English,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Chinese": 0.4266666666666667, + "Spanish,Indonesian": 0.5133333333333333, + "Spanish,Filipino": 0.4666666666666667, + "Spanish,Chinese": 0.5666666666666667, + "Indonesian,Filipino": 0.52, + "Indonesian,Chinese": 0.5333333333333333, + "Filipino,Chinese": 0.46 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.29333333333333333, + "Malay,English,Spanish": 0.38666666666666666, + "Malay,English,Indonesian": 0.4, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.34, + "Malay,Vietnamese,Spanish": 0.26666666666666666, + "Malay,Vietnamese,Indonesian": 0.31333333333333335, + "Malay,Vietnamese,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Chinese": 0.24666666666666667, + "Malay,Spanish,Indonesian": 0.3466666666666667, + "Malay,Spanish,Filipino": 0.26, + "Malay,Spanish,Chinese": 0.32666666666666666, + "Malay,Indonesian,Filipino": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.3466666666666667, + "Malay,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish": 0.35333333333333333, + "English,Vietnamese,Indonesian": 0.3333333333333333, + "English,Vietnamese,Filipino": 0.30666666666666664, + "English,Vietnamese,Chinese": 0.32, + "English,Spanish,Indonesian": 0.42, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.46, + "English,Indonesian,Filipino": 0.34, + "English,Indonesian,Chinese": 0.3933333333333333, + "English,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.3, + "Vietnamese,Spanish,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.37333333333333335, + "Spanish,Filipino,Chinese": 0.32666666666666666, + "Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.24, + "Malay,English,Vietnamese,Filipino": 0.2, + "Malay,English,Vietnamese,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian": 0.30666666666666664, + "Malay,English,Spanish,Filipino": 0.24, + "Malay,English,Spanish,Chinese": 0.2866666666666667, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.2866666666666667, + "Malay,English,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.2, + "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.28, + "Malay,Spanish,Filipino,Chinese": 0.22, + "Malay,Indonesian,Filipino,Chinese": 0.22, + "English,Vietnamese,Spanish,Indonesian": 0.26, + "English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.26, + "English,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Spanish,Indonesian,Chinese": 0.32666666666666666, + "English,Spanish,Filipino,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.24, + "Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.18, + "Malay,English,Vietnamese,Filipino,Chinese": 0.16, + "Malay,English,Spanish,Indonesian,Filipino": 0.2, + "Malay,English,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14 + } + }, + "AC3_2": 0.4755109737240234, + "AC3_3": 0.3797311526862998, + "AC3_4": 0.31525903814922207, + "AC3_5": 0.2702412666669569, + "AC3_6": 0.23819891871989718, + "AC3_7": 0.21361290318963008 + }, + "prompt_2": { + "overall_acc": 0.4228571428571429, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.58, + "Vietnamese": 0.36, + "Spanish": 0.5266666666666666, + "Indonesian": 0.4066666666666667, + "Filipino": 0.36666666666666664, + "Chinese": 0.3933333333333333 + }, + "consistency_score_2": 0.5212698412698412, + "consistency_score_3": 0.3493333333333334, + "consistency_score_4": 0.2666666666666666, + "consistency_score_5": 0.22126984126984126, + "consistency_score_6": 0.19333333333333336, + "consistency_score_7": 0.17333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.48, + "Malay,Spanish": 0.46, + "Malay,Indonesian": 0.64, + "Malay,Filipino": 0.48, + "Malay,Chinese": 0.56, + "English,Vietnamese": 0.4533333333333333, + "English,Spanish": 0.6733333333333333, + "English,Indonesian": 0.5066666666666667, + "English,Filipino": 0.4866666666666667, + "English,Chinese": 0.58, + "Vietnamese,Spanish": 0.54, + "Vietnamese,Indonesian": 0.5066666666666667, + "Vietnamese,Filipino": 0.52, + "Vietnamese,Chinese": 0.43333333333333335, + "Spanish,Indonesian": 0.5333333333333333, + "Spanish,Filipino": 0.5333333333333333, + "Spanish,Chinese": 0.5866666666666667, + "Indonesian,Filipino": 0.4866666666666667, + "Indonesian,Chinese": 0.4866666666666667, + "Filipino,Chinese": 0.5066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.29333333333333333, + "Malay,English,Spanish": 0.36666666666666664, + "Malay,English,Indonesian": 0.37333333333333335, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.3933333333333333, + "Malay,Vietnamese,Spanish": 0.32, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian": 0.36, + "Malay,Spanish,Filipino": 0.31333333333333335, + "Malay,Spanish,Chinese": 0.36, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.3933333333333333, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.4, + "English,Vietnamese,Indonesian": 0.31333333333333335, + "English,Vietnamese,Filipino": 0.31333333333333335, + "English,Vietnamese,Chinese": 0.31333333333333335, + "English,Spanish,Indonesian": 0.42, + "English,Spanish,Filipino": 0.3933333333333333, + "English,Spanish,Chinese": 0.4533333333333333, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.37333333333333335, + "English,Filipino,Chinese": 0.3466666666666667, + "Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "Vietnamese,Spanish,Filipino": 0.38, + "Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Filipino": 0.34, + "Vietnamese,Indonesian,Chinese": 0.29333333333333333, + "Vietnamese,Filipino,Chinese": 0.32, + "Spanish,Indonesian,Filipino": 0.3466666666666667, + "Spanish,Indonesian,Chinese": 0.36, + "Spanish,Filipino,Chinese": 0.38, + "Indonesian,Filipino,Chinese": 0.32 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.24, + "Malay,English,Vietnamese,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian": 0.30666666666666664, + "Malay,English,Spanish,Filipino": 0.26, + "Malay,English,Spanish,Chinese": 0.31333333333333335, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.32666666666666666, + "Malay,English,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.26, + "Malay,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.28, + "Malay,Spanish,Indonesian,Chinese": 0.30666666666666664, + "Malay,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Chinese": 0.28, + "English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian,Filipino": 0.28, + "English,Spanish,Indonesian,Chinese": 0.30666666666666664, + "English,Spanish,Filipino,Chinese": 0.30666666666666664, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Vietnamese,Spanish,Indonesian,Chinese": 0.26, + "Vietnamese,Spanish,Filipino,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Spanish,Indonesian,Filipino,Chinese": 0.28 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.22, + "Malay,English,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + } + }, + "AC3_2": 0.46693438365695117, + "AC3_3": 0.38259496788335906, + "AC3_4": 0.3270718231569854, + "AC3_5": 0.29051890441170214, + "AC3_6": 0.265347758844109, + "AC3_7": 0.2458785942079597 + }, + "prompt_3": { + "overall_acc": 0.4419047619047619, + "language_acc": { + "Malay": 0.36, + "English": 0.6, + "Vietnamese": 0.35333333333333333, + "Spanish": 0.5266666666666666, + "Indonesian": 0.4266666666666667, + "Filipino": 0.38666666666666666, + "Chinese": 0.44 + }, + "consistency_score_2": 0.5133333333333333, + "consistency_score_3": 0.3386666666666666, + "consistency_score_4": 0.25466666666666665, + "consistency_score_5": 0.20571428571428574, + "consistency_score_6": 0.17238095238095238, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.44666666666666666, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.4533333333333333, + "Malay,Indonesian": 0.58, + "Malay,Filipino": 0.49333333333333335, + "Malay,Chinese": 0.5133333333333333, + "English,Vietnamese": 0.44, + "English,Spanish": 0.6666666666666666, + "English,Indonesian": 0.54, + "English,Filipino": 0.46, + "English,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Indonesian": 0.52, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Chinese": 0.4266666666666667, + "Spanish,Indonesian": 0.5666666666666667, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.6, + "Indonesian,Filipino": 0.47333333333333333, + "Indonesian,Chinese": 0.54, + "Filipino,Chinese": 0.4866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.25333333333333335, + "Malay,English,Spanish": 0.34, + "Malay,English,Indonesian": 0.3466666666666667, + "Malay,English,Filipino": 0.28, + "Malay,English,Chinese": 0.34, + "Malay,Vietnamese,Spanish": 0.3, + "Malay,Vietnamese,Indonesian": 0.3333333333333333, + "Malay,Vietnamese,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian": 0.36, + "Malay,Spanish,Filipino": 0.32, + "Malay,Spanish,Chinese": 0.35333333333333333, + "Malay,Indonesian,Filipino": 0.3333333333333333, + "Malay,Indonesian,Chinese": 0.36666666666666664, + "Malay,Filipino,Chinese": 0.3333333333333333, + "English,Vietnamese,Spanish": 0.36666666666666664, + "English,Vietnamese,Indonesian": 0.32666666666666666, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.3, + "English,Spanish,Indonesian": 0.4266666666666667, + "English,Spanish,Filipino": 0.38, + "English,Spanish,Chinese": 0.44666666666666666, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Indonesian,Chinese": 0.38666666666666666, + "English,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Spanish,Chinese": 0.34, + "Vietnamese,Indonesian,Filipino": 0.34, + "Vietnamese,Indonesian,Chinese": 0.32, + "Vietnamese,Filipino,Chinese": 0.2866666666666667, + "Spanish,Indonesian,Filipino": 0.3333333333333333, + "Spanish,Indonesian,Chinese": 0.4066666666666667, + "Spanish,Filipino,Chinese": 0.37333333333333335, + "Indonesian,Filipino,Chinese": 0.32 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian": 0.2866666666666667, + "Malay,English,Spanish,Filipino": 0.24, + "Malay,English,Spanish,Chinese": 0.28, + "Malay,English,Indonesian,Filipino": 0.22, + "Malay,English,Indonesian,Chinese": 0.28, + "Malay,English,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian": 0.26, + "Malay,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.24, + "Malay,Vietnamese,Indonesian,Chinese": 0.24, + "Malay,Vietnamese,Filipino,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.26, + "Malay,Spanish,Indonesian,Chinese": 0.3, + "Malay,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.28, + "English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "English,Vietnamese,Filipino,Chinese": 0.22, + "English,Spanish,Indonesian,Filipino": 0.28, + "English,Spanish,Indonesian,Chinese": 0.32, + "English,Spanish,Filipino,Chinese": 0.30666666666666664, + "English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.28 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.22, + "Malay,English,Indonesian,Filipino,Chinese": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + } + }, + "AC3_2": 0.4749484878200037, + "AC3_3": 0.3834585976408047, + "AC3_4": 0.32312095519926026, + "AC3_5": 0.28073949575496987, + "AC3_6": 0.24801476555578644, + "AC3_7": 0.2202373246659252 + }, + "prompt_4": { + "overall_acc": 0.43999999999999995, + "language_acc": { + "Malay": 0.3466666666666667, + "English": 0.58, + "Vietnamese": 0.36, + "Spanish": 0.5533333333333333, + "Indonesian": 0.4066666666666667, + "Filipino": 0.3933333333333333, + "Chinese": 0.44 + }, + "consistency_score_2": 0.5206349206349208, + "consistency_score_3": 0.34838095238095246, + "consistency_score_4": 0.2643809523809524, + "consistency_score_5": 0.2165079365079365, + "consistency_score_6": 0.18476190476190477, + "consistency_score_7": 0.16, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.48, + "Malay,Spanish": 0.4533333333333333, + "Malay,Indonesian": 0.62, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.5, + "English,Vietnamese": 0.4533333333333333, + "English,Spanish": 0.6733333333333333, + "English,Indonesian": 0.5133333333333333, + "English,Filipino": 0.47333333333333333, + "English,Chinese": 0.5933333333333334, + "Vietnamese,Spanish": 0.4866666666666667, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Filipino": 0.52, + "Vietnamese,Chinese": 0.46, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.5466666666666666, + "Spanish,Chinese": 0.5733333333333334, + "Indonesian,Filipino": 0.5333333333333333, + "Indonesian,Chinese": 0.5333333333333333, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.3, + "Malay,English,Spanish": 0.37333333333333335, + "Malay,English,Indonesian": 0.36666666666666664, + "Malay,English,Filipino": 0.29333333333333333, + "Malay,English,Chinese": 0.36666666666666664, + "Malay,Vietnamese,Spanish": 0.29333333333333333, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.3, + "Malay,Vietnamese,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian": 0.36, + "Malay,Spanish,Filipino": 0.3, + "Malay,Spanish,Chinese": 0.32666666666666666, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.38, + "Malay,Filipino,Chinese": 0.30666666666666664, + "English,Vietnamese,Spanish": 0.38, + "English,Vietnamese,Indonesian": 0.34, + "English,Vietnamese,Filipino": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.3333333333333333, + "English,Spanish,Indonesian": 0.41333333333333333, + "English,Spanish,Filipino": 0.4, + "English,Spanish,Chinese": 0.4533333333333333, + "English,Indonesian,Filipino": 0.32, + "English,Indonesian,Chinese": 0.38666666666666666, + "English,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian": 0.36, + "Vietnamese,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Spanish,Chinese": 0.3333333333333333, + "Vietnamese,Indonesian,Filipino": 0.37333333333333335, + "Vietnamese,Indonesian,Chinese": 0.3333333333333333, + "Vietnamese,Filipino,Chinese": 0.3, + "Spanish,Indonesian,Filipino": 0.36666666666666664, + "Spanish,Indonesian,Chinese": 0.37333333333333335, + "Spanish,Filipino,Chinese": 0.35333333333333333, + "Indonesian,Filipino,Chinese": 0.3466666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.26, + "Malay,English,Vietnamese,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.26666666666666666, + "Malay,English,Spanish,Chinese": 0.2866666666666667, + "Malay,English,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Indonesian,Chinese": 0.30666666666666664, + "Malay,English,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.26, + "Malay,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.26, + "Malay,Spanish,Indonesian,Chinese": 0.28, + "Malay,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.26, + "English,Vietnamese,Spanish,Indonesian": 0.3, + "English,Vietnamese,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Chinese": 0.2866666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian,Filipino": 0.29333333333333333, + "English,Spanish,Indonesian,Chinese": 0.31333333333333335, + "English,Spanish,Filipino,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.22, + "Malay,English,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.26, + "Malay,English,Spanish,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.24, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.22, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.2, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16 + } + }, + "AC3_2": 0.4769332451585479, + "AC3_3": 0.3888668760076939, + "AC3_4": 0.3302974580385597, + "AC3_5": 0.29021276591324124, + "AC3_6": 0.2602439023973695, + "AC3_7": 0.23466666662755553 + }, + "prompt_5": { + "overall_acc": 0.4390476190476191, + "language_acc": { + "Malay": 0.37333333333333335, + "English": 0.6066666666666667, + "Vietnamese": 0.34, + "Spanish": 0.5133333333333333, + "Indonesian": 0.44666666666666666, + "Filipino": 0.36666666666666664, + "Chinese": 0.4266666666666667 + }, + "consistency_score_2": 0.5184126984126985, + "consistency_score_3": 0.3449523809523809, + "consistency_score_4": 0.26038095238095244, + "consistency_score_5": 0.2104761904761905, + "consistency_score_6": 0.1771428571428571, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.4866666666666667, + "Malay,Indonesian": 0.5733333333333334, + "Malay,Filipino": 0.47333333333333333, + "Malay,Chinese": 0.5266666666666666, + "English,Vietnamese": 0.4866666666666667, + "English,Spanish": 0.68, + "English,Indonesian": 0.5533333333333333, + "English,Filipino": 0.5066666666666667, + "English,Chinese": 0.5866666666666667, + "Vietnamese,Spanish": 0.49333333333333335, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Filipino": 0.5533333333333333, + "Vietnamese,Chinese": 0.4533333333333333, + "Spanish,Indonesian": 0.5333333333333333, + "Spanish,Filipino": 0.49333333333333335, + "Spanish,Chinese": 0.5533333333333333, + "Indonesian,Filipino": 0.4666666666666667, + "Indonesian,Chinese": 0.5133333333333333, + "Filipino,Chinese": 0.43333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.32, + "Malay,English,Spanish": 0.38, + "Malay,English,Indonesian": 0.38, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.38, + "Malay,Vietnamese,Spanish": 0.30666666666666664, + "Malay,Vietnamese,Indonesian": 0.34, + "Malay,Vietnamese,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian": 0.35333333333333333, + "Malay,Spanish,Filipino": 0.29333333333333333, + "Malay,Spanish,Chinese": 0.34, + "Malay,Indonesian,Filipino": 0.31333333333333335, + "Malay,Indonesian,Chinese": 0.36666666666666664, + "Malay,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish": 0.3933333333333333, + "English,Vietnamese,Indonesian": 0.36, + "English,Vietnamese,Filipino": 0.3466666666666667, + "English,Vietnamese,Chinese": 0.3466666666666667, + "English,Spanish,Indonesian": 0.44666666666666666, + "English,Spanish,Filipino": 0.38666666666666666, + "English,Spanish,Chinese": 0.44666666666666666, + "English,Indonesian,Filipino": 0.32666666666666666, + "English,Indonesian,Chinese": 0.4066666666666667, + "English,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Vietnamese,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Spanish,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Vietnamese,Indonesian,Chinese": 0.32, + "Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Spanish,Indonesian,Filipino": 0.32666666666666666, + "Spanish,Indonesian,Chinese": 0.36666666666666664, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.26, + "Malay,English,Vietnamese,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Chinese": 0.29333333333333333, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.32, + "Malay,English,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.28, + "Malay,Spanish,Filipino,Chinese": 0.22, + "Malay,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "English,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.28, + "English,Vietnamese,Indonesian,Filipino": 0.26, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino": 0.29333333333333333, + "English,Spanish,Indonesian,Chinese": 0.32666666666666666, + "English,Spanish,Filipino,Chinese": 0.28, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Vietnamese,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Vietnamese,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.22, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2, + "Malay,English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.26, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + } + }, + "AC3_2": 0.47544082349461547, + "AC3_3": 0.38635337123120517, + "AC3_4": 0.32689438733784615, + "AC3_5": 0.2845440580486371, + "AC3_6": 0.252435416165701, + "AC3_7": 0.22728831721779427 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3595779220779221, + "language_acc": { + "English": 0.4715909090909091, + "Vietnamese": 0.2897727272727273, + "Chinese": 0.4090909090909091, + "Indonesian": 0.3125, + "Filipino": 0.25, + "Spanish": 0.4431818181818182, + "Malay": 0.3409090909090909 + }, + "consistency_score_2": 0.4770021645021644, + "consistency_score_3": 0.28685064935064936, + "consistency_score_4": 0.1938311688311688, + "consistency_score_5": 0.13988095238095236, + "consistency_score_6": 0.10470779220779221, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4090909090909091, + "English,Chinese": 0.45454545454545453, + "English,Indonesian": 0.4318181818181818, + "English,Filipino": 0.42613636363636365, + "English,Spanish": 0.5795454545454546, + "English,Malay": 0.4772727272727273, + "Vietnamese,Chinese": 0.4318181818181818, + "Vietnamese,Indonesian": 0.45454545454545453, + "Vietnamese,Filipino": 0.5340909090909091, + "Vietnamese,Spanish": 0.45454545454545453, + "Vietnamese,Malay": 0.4318181818181818, + "Chinese,Indonesian": 0.5056818181818182, + "Chinese,Filipino": 0.4943181818181818, + "Chinese,Spanish": 0.5056818181818182, + "Chinese,Malay": 0.5284090909090909, + "Indonesian,Filipino": 0.4431818181818182, + "Indonesian,Spanish": 0.4715909090909091, + "Indonesian,Malay": 0.5909090909090909, + "Filipino,Spanish": 0.42045454545454547, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.48295454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23863636363636365, + "English,Vietnamese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Filipino": 0.26704545454545453, + "English,Vietnamese,Spanish": 0.3068181818181818, + "English,Vietnamese,Malay": 0.2556818181818182, + "English,Chinese,Indonesian": 0.26136363636363635, + "English,Chinese,Filipino": 0.2556818181818182, + "English,Chinese,Spanish": 0.3409090909090909, + "English,Chinese,Malay": 0.2840909090909091, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.3068181818181818, + "English,Indonesian,Malay": 0.3068181818181818, + "English,Filipino,Spanish": 0.29545454545454547, + "English,Filipino,Malay": 0.26136363636363635, + "English,Spanish,Malay": 0.3409090909090909, + "Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "Vietnamese,Chinese,Filipino": 0.30113636363636365, + "Vietnamese,Chinese,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Vietnamese,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Filipino,Malay": 0.2840909090909091, + "Vietnamese,Spanish,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.30113636363636365, + "Chinese,Indonesian,Malay": 0.35795454545454547, + "Chinese,Filipino,Spanish": 0.2727272727272727, + "Chinese,Filipino,Malay": 0.30113636363636365, + "Chinese,Spanish,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish": 0.25, + "Indonesian,Filipino,Malay": 0.3068181818181818, + "Indonesian,Spanish,Malay": 0.3352272727272727, + "Filipino,Spanish,Malay": 0.2727272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Spanish": 0.20454545454545456, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino": 0.16477272727272727, + "English,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Chinese,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Chinese,Spanish,Malay": 0.22727272727272727, + "English,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.125, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.125, + "English,Chinese,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + } + }, + "AC3_2": 0.41004907926752876, + "AC3_3": 0.3191231482739319, + "AC3_4": 0.2518838597145548, + "AC3_5": 0.20141038534893288, + "AC3_6": 0.16218724453867558, + "AC3_7": 0.13027222312610573 + }, + "prompt_2": { + "overall_acc": 0.33685064935064934, + "language_acc": { + "English": 0.48295454545454547, + "Vietnamese": 0.25, + "Chinese": 0.39204545454545453, + "Indonesian": 0.3068181818181818, + "Filipino": 0.26704545454545453, + "Spanish": 0.3522727272727273, + "Malay": 0.3068181818181818 + }, + "consistency_score_2": 0.49512987012987014, + "consistency_score_3": 0.3125, + "consistency_score_4": 0.22012987012987015, + "consistency_score_5": 0.1623376623376623, + "consistency_score_6": 0.12175324675324674, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3693181818181818, + "English,Chinese": 0.5284090909090909, + "English,Indonesian": 0.48863636363636365, + "English,Filipino": 0.4715909090909091, + "English,Spanish": 0.6136363636363636, + "English,Malay": 0.4772727272727273, + "Vietnamese,Chinese": 0.45454545454545453, + "Vietnamese,Indonesian": 0.4659090909090909, + "Vietnamese,Filipino": 0.5284090909090909, + "Vietnamese,Spanish": 0.4659090909090909, + "Vietnamese,Malay": 0.48863636363636365, + "Chinese,Indonesian": 0.5454545454545454, + "Chinese,Filipino": 0.5113636363636364, + "Chinese,Spanish": 0.48863636363636365, + "Chinese,Malay": 0.5113636363636364, + "Indonesian,Filipino": 0.4659090909090909, + "Indonesian,Spanish": 0.4715909090909091, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.4602272727272727, + "Filipino,Malay": 0.5454545454545454, + "Spanish,Malay": 0.4715909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.26136363636363635, + "English,Vietnamese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Filipino": 0.2727272727272727, + "English,Vietnamese,Spanish": 0.3068181818181818, + "English,Vietnamese,Malay": 0.2556818181818182, + "English,Chinese,Indonesian": 0.3352272727272727, + "English,Chinese,Filipino": 0.3181818181818182, + "English,Chinese,Spanish": 0.375, + "English,Chinese,Malay": 0.3352272727272727, + "English,Indonesian,Filipino": 0.2897727272727273, + "English,Indonesian,Spanish": 0.3522727272727273, + "English,Indonesian,Malay": 0.3352272727272727, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Filipino,Malay": 0.32386363636363635, + "English,Spanish,Malay": 0.36363636363636365, + "Vietnamese,Chinese,Indonesian": 0.2897727272727273, + "Vietnamese,Chinese,Filipino": 0.32954545454545453, + "Vietnamese,Chinese,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish": 0.3125, + "Vietnamese,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Spanish,Malay": 0.2840909090909091, + "Chinese,Indonesian,Filipino": 0.3352272727272727, + "Chinese,Indonesian,Spanish": 0.3125, + "Chinese,Indonesian,Malay": 0.3693181818181818, + "Chinese,Filipino,Spanish": 0.30113636363636365, + "Chinese,Filipino,Malay": 0.3522727272727273, + "Chinese,Spanish,Malay": 0.3181818181818182, + "Indonesian,Filipino,Spanish": 0.2727272727272727, + "Indonesian,Filipino,Malay": 0.3409090909090909, + "Indonesian,Spanish,Malay": 0.32954545454545453, + "Filipino,Spanish,Malay": 0.29545454545454547 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.21022727272727273, + "English,Vietnamese,Chinese,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Malay": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "English,Vietnamese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Filipino,Malay": 0.21022727272727273, + "English,Vietnamese,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino": 0.23295454545454544, + "English,Chinese,Indonesian,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Chinese,Filipino,Spanish": 0.23295454545454544, + "English,Chinese,Filipino,Malay": 0.23863636363636365, + "English,Chinese,Spanish,Malay": 0.26136363636363635, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Filipino,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Filipino,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "Chinese,Filipino,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + } + }, + "AC3_2": 0.40093443137404594, + "AC3_3": 0.32421874995007033, + "AC3_4": 0.26626026256920565, + "AC3_5": 0.21908985319230542, + "AC3_6": 0.1788587518286007, + "AC3_7": 0.14317750557287662 + }, + "prompt_3": { + "overall_acc": 0.3474025974025974, + "language_acc": { + "English": 0.4602272727272727, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.38636363636363635, + "Indonesian": 0.32386363636363635, + "Filipino": 0.2840909090909091, + "Spanish": 0.4034090909090909, + "Malay": 0.3125 + }, + "consistency_score_2": 0.47023809523809534, + "consistency_score_3": 0.27532467532467536, + "consistency_score_4": 0.18051948051948055, + "consistency_score_5": 0.12635281385281383, + "consistency_score_6": 0.09172077922077923, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3693181818181818, + "English,Chinese": 0.4943181818181818, + "English,Indonesian": 0.4943181818181818, + "English,Filipino": 0.45454545454545453, + "English,Spanish": 0.5965909090909091, + "English,Malay": 0.45454545454545453, + "Vietnamese,Chinese": 0.4034090909090909, + "Vietnamese,Indonesian": 0.42045454545454547, + "Vietnamese,Filipino": 0.5113636363636364, + "Vietnamese,Spanish": 0.4659090909090909, + "Vietnamese,Malay": 0.3806818181818182, + "Chinese,Indonesian": 0.5, + "Chinese,Filipino": 0.48295454545454547, + "Chinese,Spanish": 0.4602272727272727, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.45454545454545453, + "Indonesian,Malay": 0.5795454545454546, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.5113636363636364, + "Spanish,Malay": 0.4431818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2215909090909091, + "English,Vietnamese,Indonesian": 0.23295454545454544, + "English,Vietnamese,Filipino": 0.25, + "English,Vietnamese,Spanish": 0.29545454545454547, + "English,Vietnamese,Malay": 0.21022727272727273, + "English,Chinese,Indonesian": 0.30113636363636365, + "English,Chinese,Filipino": 0.2840909090909091, + "English,Chinese,Spanish": 0.32954545454545453, + "English,Chinese,Malay": 0.2840909090909091, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.32954545454545453, + "English,Indonesian,Malay": 0.3181818181818182, + "English,Filipino,Spanish": 0.30113636363636365, + "English,Filipino,Malay": 0.2727272727272727, + "English,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "Vietnamese,Chinese,Filipino": 0.26136363636363635, + "Vietnamese,Chinese,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Filipino,Spanish": 0.2840909090909091, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.22727272727272727, + "Chinese,Indonesian,Filipino": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.2784090909090909, + "Chinese,Indonesian,Malay": 0.3409090909090909, + "Chinese,Filipino,Spanish": 0.26136363636363635, + "Chinese,Filipino,Malay": 0.30113636363636365, + "Chinese,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish": 0.2556818181818182, + "Indonesian,Filipino,Malay": 0.3352272727272727, + "Indonesian,Spanish,Malay": 0.3125, + "Filipino,Spanish,Malay": 0.26136363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.16477272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Spanish,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Chinese,Indonesian,Malay": 0.2159090909090909, + "English,Chinese,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Filipino,Malay": 0.19318181818181818, + "English,Chinese,Spanish,Malay": 0.2159090909090909, + "English,Indonesian,Filipino,Spanish": 0.20454545454545456, + "English,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Indonesian,Spanish,Malay": 0.2215909090909091, + "English,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.2215909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.125, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + } + }, + "AC3_2": 0.3995934574532557, + "AC3_3": 0.3071922862878988, + "AC3_4": 0.23758406410732627, + "AC3_5": 0.18530783889522337, + "AC3_6": 0.14512566912219504, + "AC3_7": 0.11399147724529805 + }, + "prompt_4": { + "overall_acc": 0.34415584415584416, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.2897727272727273, + "Chinese": 0.3806818181818182, + "Indonesian": 0.30113636363636365, + "Filipino": 0.29545454545454547, + "Spanish": 0.4090909090909091, + "Malay": 0.3181818181818182 + }, + "consistency_score_2": 0.45995670995671, + "consistency_score_3": 0.2649350649350649, + "consistency_score_4": 0.17435064935064937, + "consistency_score_5": 0.12662337662337664, + "consistency_score_6": 0.09821428571428573, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3693181818181818, + "English,Chinese": 0.4715909090909091, + "English,Indonesian": 0.4318181818181818, + "English,Filipino": 0.4375, + "English,Spanish": 0.5568181818181818, + "English,Malay": 0.4715909090909091, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.4431818181818182, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,Spanish": 0.44886363636363635, + "Vietnamese,Malay": 0.45454545454545453, + "Chinese,Indonesian": 0.4772727272727273, + "Chinese,Filipino": 0.45454545454545453, + "Chinese,Spanish": 0.45454545454545453, + "Chinese,Malay": 0.5, + "Indonesian,Filipino": 0.4147727272727273, + "Indonesian,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.3693181818181818, + "Filipino,Malay": 0.5227272727272727, + "Spanish,Malay": 0.4431818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2215909090909091, + "English,Vietnamese,Indonesian": 0.2159090909090909, + "English,Vietnamese,Filipino": 0.23295454545454544, + "English,Vietnamese,Spanish": 0.26704545454545453, + "English,Vietnamese,Malay": 0.2556818181818182, + "English,Chinese,Indonesian": 0.25, + "English,Chinese,Filipino": 0.26136363636363635, + "English,Chinese,Spanish": 0.30113636363636365, + "English,Chinese,Malay": 0.2727272727272727, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.30113636363636365, + "English,Indonesian,Malay": 0.30113636363636365, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.2784090909090909, + "English,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "Vietnamese,Chinese,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Spanish": 0.25, + "Vietnamese,Chinese,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish": 0.25, + "Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Vietnamese,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Filipino,Malay": 0.2784090909090909, + "Vietnamese,Spanish,Malay": 0.26704545454545453, + "Chinese,Indonesian,Filipino": 0.24431818181818182, + "Chinese,Indonesian,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish": 0.22727272727272727, + "Chinese,Filipino,Malay": 0.29545454545454547, + "Chinese,Spanish,Malay": 0.2784090909090909, + "Indonesian,Filipino,Spanish": 0.20454545454545456, + "Indonesian,Filipino,Malay": 0.3068181818181818, + "Indonesian,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.2556818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino": 0.1590909090909091, + "English,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Malay": 0.1590909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.1875, + "English,Chinese,Indonesian,Malay": 0.17613636363636365, + "English,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Chinese,Spanish,Malay": 0.19886363636363635, + "English,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.1875, + "Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.125, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + } + }, + "AC3_2": 0.39371799124641715, + "AC3_3": 0.2993935701333285, + "AC3_4": 0.231448576425855, + "AC3_5": 0.18513210923071569, + "AC3_6": 0.15281782434291216, + "AC3_7": 0.12922326712380297 + }, + "prompt_5": { + "overall_acc": 0.3457792207792208, + "language_acc": { + "English": 0.4431818181818182, + "Vietnamese": 0.25, + "Chinese": 0.375, + "Indonesian": 0.3352272727272727, + "Filipino": 0.29545454545454547, + "Spanish": 0.375, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.4734848484848485, + "consistency_score_3": 0.2811688311688312, + "consistency_score_4": 0.19204545454545455, + "consistency_score_5": 0.14312770562770563, + "consistency_score_6": 0.11201298701298702, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38636363636363635, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.48295454545454547, + "English,Filipino": 0.4375, + "English,Spanish": 0.5795454545454546, + "English,Malay": 0.4772727272727273, + "Vietnamese,Chinese": 0.4090909090909091, + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Filipino": 0.5397727272727273, + "Vietnamese,Spanish": 0.4659090909090909, + "Vietnamese,Malay": 0.4318181818181818, + "Chinese,Indonesian": 0.5056818181818182, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.4943181818181818, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.4375, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Malay": 0.5511363636363636, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.4772727272727273, + "Spanish,Malay": 0.4659090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23295454545454544, + "English,Vietnamese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Filipino": 0.26704545454545453, + "English,Vietnamese,Spanish": 0.2897727272727273, + "English,Vietnamese,Malay": 0.2556818181818182, + "English,Chinese,Indonesian": 0.32386363636363635, + "English,Chinese,Filipino": 0.2840909090909091, + "English,Chinese,Spanish": 0.32954545454545453, + "English,Chinese,Malay": 0.2897727272727273, + "English,Indonesian,Filipino": 0.26136363636363635, + "English,Indonesian,Spanish": 0.32386363636363635, + "English,Indonesian,Malay": 0.3125, + "English,Filipino,Spanish": 0.3068181818181818, + "English,Filipino,Malay": 0.2556818181818182, + "English,Spanish,Malay": 0.3125, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Filipino": 0.29545454545454547, + "Vietnamese,Chinese,Spanish": 0.26704545454545453, + "Vietnamese,Chinese,Malay": 0.25, + "Vietnamese,Indonesian,Filipino": 0.2784090909090909, + "Vietnamese,Indonesian,Spanish": 0.25, + "Vietnamese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Filipino,Spanish": 0.29545454545454547, + "Vietnamese,Filipino,Malay": 0.2897727272727273, + "Vietnamese,Spanish,Malay": 0.26136363636363635, + "Chinese,Indonesian,Filipino": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.29545454545454547, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Filipino,Spanish": 0.2784090909090909, + "Chinese,Filipino,Malay": 0.2784090909090909, + "Chinese,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish": 0.2556818181818182, + "Indonesian,Filipino,Malay": 0.2840909090909091, + "Indonesian,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.2784090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino": 0.1875, + "English,Vietnamese,Chinese,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.1875, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Filipino,Malay": 0.1875, + "English,Vietnamese,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish": 0.22727272727272727, + "English,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Chinese,Filipino,Spanish": 0.20454545454545456, + "English,Chinese,Filipino,Malay": 0.17613636363636365, + "English,Chinese,Spanish,Malay": 0.2215909090909091, + "English,Indonesian,Filipino,Spanish": 0.20454545454545456, + "English,Indonesian,Filipino,Malay": 0.1875, + "English,Indonesian,Spanish,Malay": 0.21022727272727273, + "English,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1875, + "Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "Chinese,Indonesian,Spanish,Malay": 0.19886363636363635, + "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.125, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + } + }, + "AC3_2": 0.39967875580566425, + "AC3_3": 0.31014480077373685, + "AC3_4": 0.2469404274710296, + "AC3_5": 0.202454020757935, + "AC3_6": 0.16921110800393285, + "AC3_7": 0.14396755657399377 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5048543689320388 + }, + "prompt_2": { + "accuracy": 0.47572815533980584 + }, + "prompt_3": { + "accuracy": 0.4174757281553398 + }, + "prompt_4": { + "accuracy": 0.4368932038834951 + }, + "prompt_5": { + "accuracy": 0.5048543689320388 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3523809523809524 + }, + "prompt_2": { + "accuracy": 0.34285714285714286 + }, + "prompt_3": { + "accuracy": 0.3619047619047619 + }, + "prompt_4": { + "accuracy": 0.37142857142857144 + }, + "prompt_5": { + "accuracy": 0.34285714285714286 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6261682242990654 + }, + "prompt_2": { + "accuracy": 0.5514018691588785 + }, + "prompt_3": { + "accuracy": 0.5233644859813084 + }, + "prompt_4": { + "accuracy": 0.5887850467289719 + }, + "prompt_5": { + "accuracy": 0.5887850467289719 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.2, + "history": 0.2, + "literature": 0.2, + "politics": 0.7, + "culture": 0.5, + "film": 0.4, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.47, + "category_acc": { + "brand": 0.7, + "demographics": 0.2, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.7, + "culture": 0.6, + "film": 0.4, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.1, + "history": 0.4, + "literature": 0.2, + "politics": 0.4, + "culture": 0.2, + "film": 0.3, + "law": 0.6, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.45, + "category_acc": { + "brand": 0.6, + "demographics": 0.2, + "biology": 0.4, + "history": 0.4, + "literature": 0.2, + "politics": 0.5, + "culture": 0.5, + "film": 0.6, + "law": 0.5, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.6, + "culture": 0.6, + "film": 0.5, + "law": 0.5, + "geography": 0.8 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.23114191212021232 + }, + "prompt_2": { + "bleu_score": 0.20285199024756406 + }, + "prompt_3": { + "bleu_score": 0.20618902370150952 + }, + "prompt_4": { + "bleu_score": 0.23467767402430645 + }, + "prompt_5": { + "bleu_score": 0.20671523069201078 + } }, "indommlu": { "prompt_1": -1, @@ -2764,179 +24679,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.27432167196415835 + }, + "prompt_2": { + "bleu_score": 0.27598679208417676 + }, + "prompt_3": { + "bleu_score": 0.2251444671046283 + }, + "prompt_4": { + "bleu_score": 0.27347223860585895 + }, + "prompt_5": { + "bleu_score": 0.26079340912892385 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.14722673241590406 + }, + "prompt_2": { + "bleu_score": 0.14399792877850104 + }, + "prompt_3": { + "bleu_score": 0.10762950846287851 + }, + "prompt_4": { + "bleu_score": 0.1527037738689188 + }, + "prompt_5": { + "bleu_score": 0.13998911077194784 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.18625252225832042 + }, + "prompt_2": { + "bleu_score": 0.19098443238075288 + }, + "prompt_3": { + "bleu_score": 0.17375534516558966 + }, + "prompt_4": { + "bleu_score": 0.18263631888380907 + }, + "prompt_5": { + "bleu_score": 0.1750583058226331 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.26150676487948166 + }, + "prompt_2": { + "bleu_score": 0.26653421085493356 + }, + "prompt_3": { + "bleu_score": 0.25224612376703553 + }, + "prompt_4": { + "bleu_score": 0.26372483339844877 + }, + "prompt_5": { + "bleu_score": 0.24360404017317555 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4982497082847141 + }, + "prompt_2": { + "accuracy": 0.4982497082847141 + }, + "prompt_3": { + "accuracy": 0.5029171528588098 + }, + "prompt_4": { + "accuracy": 0.49941656942823803 + }, + "prompt_5": { + "accuracy": 0.4970828471411902 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5090454057919199, + "category_acc": { + "high_school_european_history": 0.6585365853658537, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5454545454545454, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.690677966101695, + "virology": 0.45454545454545453, + "high_school_microeconomics": 0.510548523206751, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.5728155339805825, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.5516129032258065, + "professional_medicine": 0.4907749077490775, + "nutrition": 0.5475409836065573, + "global_facts": 0.29292929292929293, + "machine_learning": 0.3963963963963964, + "security_studies": 0.5737704918032787, + "public_relations": 0.6055045871559633, + "professional_psychology": 0.5171849427168577, + "prehistory": 0.5820433436532507, + "anatomy": 0.5, + "human_sexuality": 0.6, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.75, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6358024691358025, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.29708222811671087, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7022058823529411, + "formal_logic": 0.352, + "high_school_statistics": 0.3627906976744186, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.4358974358974359, + "miscellaneous": 0.7161125319693095, + "high_school_chemistry": 0.3910891089108911, + "marketing": 0.7939914163090128, + "professional_law": 0.40574037834311805, + "management": 0.6568627450980392, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.7352941176470589, + "sociology": 0.71, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.4755784061696658, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.26286353467561524, + "moral_disputes": 0.5565217391304348, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5231788079470199, + "college_biology": 0.5524475524475524 + } + }, + "prompt_2": { + "accuracy": 0.5036825169824812, + "category_acc": { + "high_school_european_history": 0.6524390243902439, + "business_ethics": 0.5151515151515151, + "clinical_knowledge": 0.5037878787878788, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.7093596059113301, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.690677966101695, + "virology": 0.4666666666666667, + "high_school_microeconomics": 0.5274261603375527, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.5598705501618123, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.5580645161290323, + "professional_medicine": 0.46494464944649444, + "nutrition": 0.5475409836065573, + "global_facts": 0.2727272727272727, + "machine_learning": 0.38738738738738737, + "security_studies": 0.5204918032786885, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.5188216039279869, + "prehistory": 0.5603715170278638, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.4476744186046512, + "high_school_government_and_politics": 0.734375, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6049382716049383, + "high_school_geography": 0.6294416243654822, + "elementary_mathematics": 0.3050397877984085, + "human_aging": 0.5855855855855856, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6764705882352942, + "formal_logic": 0.328, + "high_school_statistics": 0.3395348837209302, + "international_law": 0.6, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.4444444444444444, + "miscellaneous": 0.7148337595907929, + "high_school_chemistry": 0.38613861386138615, + "marketing": 0.7982832618025751, + "professional_law": 0.40378343118069143, + "management": 0.6568627450980392, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.7529411764705882, + "sociology": 0.71, + "us_foreign_policy": 0.7373737373737373, + "high_school_macroeconomics": 0.45758354755784064, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.28859060402684567, + "moral_disputes": 0.5826086956521739, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.4966887417218543, + "college_biology": 0.5804195804195804 + } + }, + "prompt_3": { + "accuracy": 0.501394351090454, + "category_acc": { + "high_school_european_history": 0.6463414634146342, + "business_ethics": 0.47474747474747475, + "clinical_knowledge": 0.5151515151515151, + "medical_genetics": 0.5757575757575758, + "high_school_us_history": 0.7241379310344828, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.6567796610169492, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.5147679324894515, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.5631067961165048, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.3879003558718861, + "philosophy": 0.5516129032258065, + "professional_medicine": 0.47232472324723246, + "nutrition": 0.5344262295081967, + "global_facts": 0.25252525252525254, + "machine_learning": 0.3783783783783784, + "security_studies": 0.5, + "public_relations": 0.6055045871559633, + "professional_psychology": 0.5106382978723404, + "prehistory": 0.5603715170278638, + "anatomy": 0.5298507462686567, + "human_sexuality": 0.6307692307692307, + "college_medicine": 0.46511627906976744, + "high_school_government_and_politics": 0.7291666666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6294416243654822, + "elementary_mathematics": 0.3103448275862069, + "human_aging": 0.5675675675675675, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.6727941176470589, + "formal_logic": 0.304, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.7122762148337596, + "high_school_chemistry": 0.4158415841584158, + "marketing": 0.7854077253218884, + "professional_law": 0.3998695368558382, + "management": 0.6274509803921569, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.6635514018691588, + "world_religions": 0.7235294117647059, + "sociology": 0.71, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.4524421593830334, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.2807606263982103, + "moral_disputes": 0.5681159420289855, + "electrical_engineering": 0.4722222222222222, + "astronomy": 0.48344370860927155, + "college_biology": 0.5664335664335665 + } + }, + "prompt_4": { + "accuracy": 0.5013228459063283, + "category_acc": { + "high_school_european_history": 0.6524390243902439, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5378787878787878, + "medical_genetics": 0.5959595959595959, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.26, + "high_school_world_history": 0.6991525423728814, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.48945147679324896, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.5469255663430421, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.3879003558718861, + "philosophy": 0.5580645161290323, + "professional_medicine": 0.46863468634686345, + "nutrition": 0.5344262295081967, + "global_facts": 0.25252525252525254, + "machine_learning": 0.36936936936936937, + "security_studies": 0.5409836065573771, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.5155482815057283, + "prehistory": 0.5603715170278638, + "anatomy": 0.5, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.47093023255813954, + "high_school_government_and_politics": 0.6875, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.5617283950617284, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.28116710875331563, + "human_aging": 0.5585585585585585, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.6801470588235294, + "formal_logic": 0.36, + "high_school_statistics": 0.35348837209302325, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.4230769230769231, + "miscellaneous": 0.7071611253196931, + "high_school_chemistry": 0.38613861386138615, + "marketing": 0.7939914163090128, + "professional_law": 0.40769732550554466, + "management": 0.6470588235294118, + "college_physics": 0.16831683168316833, + "jurisprudence": 0.6728971962616822, + "world_religions": 0.7294117647058823, + "sociology": 0.715, + "us_foreign_policy": 0.696969696969697, + "high_school_macroeconomics": 0.4910025706940874, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.2807606263982103, + "moral_disputes": 0.5565217391304348, + "electrical_engineering": 0.4305555555555556, + "astronomy": 0.5231788079470199, + "college_biology": 0.5944055944055944 + } + }, + "prompt_5": { + "accuracy": 0.4993922059349303, + "category_acc": { + "high_school_european_history": 0.6646341463414634, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.49242424242424243, + "medical_genetics": 0.5959595959595959, + "high_school_us_history": 0.7241379310344828, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.690677966101695, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.4936708860759494, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.5436893203883495, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.40569395017793597, + "philosophy": 0.5483870967741935, + "professional_medicine": 0.4833948339483395, + "nutrition": 0.5377049180327869, + "global_facts": 0.31313131313131315, + "machine_learning": 0.40540540540540543, + "security_studies": 0.5491803278688525, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.4877250409165303, + "prehistory": 0.5386996904024768, + "anatomy": 0.5149253731343284, + "human_sexuality": 0.5923076923076923, + "college_medicine": 0.46511627906976744, + "high_school_government_and_politics": 0.6927083333333334, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.5617283950617284, + "high_school_geography": 0.6395939086294417, + "elementary_mathematics": 0.3076923076923077, + "human_aging": 0.581081081081081, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.6709558823529411, + "formal_logic": 0.32, + "high_school_statistics": 0.33488372093023255, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.4230769230769231, + "miscellaneous": 0.69693094629156, + "high_school_chemistry": 0.3712871287128713, + "marketing": 0.7811158798283262, + "professional_law": 0.4142204827136334, + "management": 0.6274509803921569, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.7352941176470589, + "sociology": 0.73, + "us_foreign_policy": 0.7171717171717171, + "high_school_macroeconomics": 0.4704370179948586, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.29082774049217003, + "moral_disputes": 0.5623188405797102, + "electrical_engineering": 0.4722222222222222, + "astronomy": 0.5298013245033113, + "college_biology": 0.5594405594405595 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3558692421991085 + }, + "prompt_2": { + "accuracy": 0.3588410104011887 + }, + "prompt_3": { + "accuracy": 0.3588410104011887 + }, + "prompt_4": { + "accuracy": 0.35661218424962854 + }, + "prompt_5": { + "accuracy": 0.36701337295690933 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33748443337484435, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.4523809523809524, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.42105263157894735, + "marxism": 0.5, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.37037037037037035, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.25, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.48, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.2692307692307692, + "sports_science": 0.20833333333333334, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.25, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.2777777777777778, + "fire_engineer": 0.5833333333333334, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.46296296296296297, + "physician": 0.3333333333333333 + } + }, + "prompt_2": { + "accuracy": 0.35367372353673726, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.42857142857142855, + "college_physics": 0.25, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.375, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.31666666666666665, + "business_administration": 0.39473684210526316, + "marxism": 0.5, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.25, + "high_school_history": 0.52, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.28846153846153844, + "sports_science": 0.20833333333333334, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.2777777777777778, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.42592592592592593, + "physician": 0.35185185185185186 + } + }, + "prompt_3": { + "accuracy": 0.3437110834371108, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.40476190476190477, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.375, + "high_school_chemistry": 0.375, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.3333333333333333, + "business_administration": 0.39473684210526316, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.5, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4074074074074074, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.25, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.25, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.44, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.23076923076923078, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.25, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.2777777777777778, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.35185185185185186, + "physician": 0.3333333333333333 + } + }, + "prompt_4": { + "accuracy": 0.34184308841843086, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.38095238095238093, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.5, + "high_school_chemistry": 0.375, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.35, + "business_administration": 0.5, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.48, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.3269230769230769, + "sports_science": 0.20833333333333334, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.25, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.3148148148148148, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.4074074074074074, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.34869240348692404, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.40476190476190477, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.375, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.31666666666666665, + "business_administration": 0.4473684210526316, + "marxism": 0.5, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.3333333333333333, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.25, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.48, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.28846153846153844, + "sports_science": 0.2916666666666667, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.375, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.2962962962962963, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.4074074074074074, + "physician": 0.3333333333333333 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30824372759856633 + }, + "prompt_2": { + "accuracy": 0.3225806451612903 + }, + "prompt_3": { + "accuracy": 0.2939068100358423 + }, + "prompt_4": { + "accuracy": 0.2974910394265233 + }, + "prompt_5": { + "accuracy": 0.30824372759856633 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.350716629252288, + "category_acc": { + "agronomy": 0.35502958579881655, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.2804878048780488, + "arts": 0.3125, + "astronomy": 0.3090909090909091, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.4198473282442748, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.3925233644859813, + "chinese_history": 0.3653250773993808, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.41899441340782123, + "clinical_knowledge": 0.28270042194092826, + "college_actuarial_science": 0.18867924528301888, + "college_education": 0.4392523364485981, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.336996336996337, + "computer_science": 0.36764705882352944, + "computer_security": 0.4269005847953216, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.35251798561151076, + "economics": 0.29559748427672955, + "education": 0.4294478527607362, + "electrical_engineering": 0.37790697674418605, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.31313131313131315, + "elementary_information_and_technology": 0.47478991596638653, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.34814814814814815, + "food_science": 0.35664335664335667, + "genetics": 0.36363636363636365, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.3136094674556213, + "high_school_chemistry": 0.19696969696969696, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.3356643356643357, + "human_sexuality": 0.3968253968253968, + "international_law": 0.32972972972972975, + "journalism": 0.4011627906976744, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.6448598130841121, + "logical": 0.36585365853658536, + "machine_learning": 0.38524590163934425, + "management": 0.37142857142857144, + "marketing": 0.4777777777777778, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.35344827586206895, + "nutrition": 0.4068965517241379, + "philosophy": 0.44761904761904764, + "professional_accounting": 0.4, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.41810344827586204, + "public_relations": 0.41954022988505746, + "security_study": 0.3925925925925926, + "sociology": 0.3805309734513274, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.35502958579881655, + "world_history": 0.33540372670807456, + "world_religions": 0.3625 + } + }, + "prompt_2": { + "accuracy": 0.3393196339146952, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.2865853658536585, + "arts": 0.29375, + "astronomy": 0.2909090909090909, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.40458015267175573, + "chinese_food_culture": 0.27205882352941174, + "chinese_foreign_policy": 0.34579439252336447, + "chinese_history": 0.3498452012383901, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.39106145251396646, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.4205607476635514, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.3148148148148148, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.32234432234432236, + "computer_science": 0.35294117647058826, + "computer_security": 0.4152046783625731, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.30935251798561153, + "economics": 0.31446540880503143, + "education": 0.3987730061349693, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.46638655462184875, + "elementary_mathematics": 0.22608695652173913, + "ethnology": 0.3333333333333333, + "food_science": 0.34265734265734266, + "genetics": 0.36363636363636365, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.3135593220338983, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.38461538461538464, + "human_sexuality": 0.35714285714285715, + "international_law": 0.3567567567567568, + "journalism": 0.3546511627906977, + "jurisprudence": 0.36253041362530414, + "legal_and_moral_basis": 0.6448598130841121, + "logical": 0.34146341463414637, + "machine_learning": 0.319672131147541, + "management": 0.3904761904761905, + "marketing": 0.45, + "marxist_theory": 0.4074074074074074, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.35172413793103446, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.38285714285714284, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.23670212765957446, + "professional_psychology": 0.3879310344827586, + "public_relations": 0.39655172413793105, + "security_study": 0.37037037037037035, + "sociology": 0.4026548672566372, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.3668639053254438, + "world_history": 0.34782608695652173, + "world_religions": 0.29375 + } + }, + "prompt_3": { + "accuracy": 0.3381108616819202, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2804878048780488, + "arts": 0.28125, + "astronomy": 0.3090909090909091, + "business_ethics": 0.4019138755980861, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.37404580152671757, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.3364485981308411, + "chinese_history": 0.3591331269349845, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.4022346368715084, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.42990654205607476, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.3148148148148148, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.3333333333333333, + "computer_science": 0.3382352941176471, + "computer_security": 0.38596491228070173, + "conceptual_physics": 0.3197278911564626, + "construction_project_management": 0.35251798561151076, + "economics": 0.2578616352201258, + "education": 0.39263803680981596, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.47478991596638653, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.32592592592592595, + "food_science": 0.3916083916083916, + "genetics": 0.3522727272727273, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.2958579881656805, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.3220338983050847, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.35664335664335667, + "human_sexuality": 0.373015873015873, + "international_law": 0.3675675675675676, + "journalism": 0.3488372093023256, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.6121495327102804, + "logical": 0.35772357723577236, + "machine_learning": 0.36065573770491804, + "management": 0.38571428571428573, + "marketing": 0.45555555555555555, + "marxist_theory": 0.37566137566137564, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.3931034482758621, + "philosophy": 0.42857142857142855, + "professional_accounting": 0.37714285714285717, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.35344827586206895, + "public_relations": 0.39655172413793105, + "security_study": 0.35555555555555557, + "sociology": 0.4026548672566372, + "sports_science": 0.38181818181818183, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.33727810650887574, + "world_history": 0.32919254658385094, + "world_religions": 0.30625 + } + }, + "prompt_4": { + "accuracy": 0.34450008634087376, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.25609756097560976, + "arts": 0.30625, + "astronomy": 0.2727272727272727, + "business_ethics": 0.42105263157894735, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.4198473282442748, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.42990654205607476, + "chinese_history": 0.34674922600619196, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.41899441340782123, + "clinical_knowledge": 0.27848101265822783, + "college_actuarial_science": 0.18867924528301888, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.28703703703703703, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.32234432234432236, + "computer_science": 0.36764705882352944, + "computer_security": 0.42105263157894735, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3669064748201439, + "economics": 0.3081761006289308, + "education": 0.4233128834355828, + "electrical_engineering": 0.4127906976744186, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.46638655462184875, + "elementary_mathematics": 0.20869565217391303, + "ethnology": 0.32592592592592595, + "food_science": 0.34965034965034963, + "genetics": 0.32954545454545453, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.23636363636363636, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.38095238095238093, + "international_law": 0.32972972972972975, + "journalism": 0.36046511627906974, + "jurisprudence": 0.34549878345498786, + "legal_and_moral_basis": 0.6355140186915887, + "logical": 0.35772357723577236, + "machine_learning": 0.3360655737704918, + "management": 0.35714285714285715, + "marketing": 0.4722222222222222, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.33620689655172414, + "nutrition": 0.3448275862068966, + "philosophy": 0.4095238095238095, + "professional_accounting": 0.44, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.2632978723404255, + "professional_psychology": 0.3922413793103448, + "public_relations": 0.43103448275862066, + "security_study": 0.37037037037037035, + "sociology": 0.42035398230088494, + "sports_science": 0.37575757575757573, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.34911242603550297, + "world_history": 0.36024844720496896, + "world_religions": 0.3375 + } + }, + "prompt_5": { + "accuracy": 0.3501985840096702, + "category_acc": { + "agronomy": 0.378698224852071, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.3170731707317073, + "arts": 0.3, + "astronomy": 0.3090909090909091, + "business_ethics": 0.4258373205741627, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.4122137404580153, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.37770897832817335, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.3854748603351955, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.38317757009345793, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.35185185185185186, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.3443223443223443, + "computer_science": 0.35784313725490197, + "computer_security": 0.40350877192982454, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.3597122302158273, + "economics": 0.3270440251572327, + "education": 0.3987730061349693, + "electrical_engineering": 0.36627906976744184, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.48739495798319327, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.37037037037037035, + "food_science": 0.35664335664335667, + "genetics": 0.375, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.3, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.3412698412698413, + "international_law": 0.34054054054054056, + "journalism": 0.37790697674418605, + "jurisprudence": 0.36982968369829683, + "legal_and_moral_basis": 0.6261682242990654, + "logical": 0.3821138211382114, + "machine_learning": 0.4016393442622951, + "management": 0.3904761904761905, + "marketing": 0.4444444444444444, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.3931034482758621, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.38857142857142857, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.23138297872340424, + "professional_psychology": 0.4224137931034483, + "public_relations": 0.41379310344827586, + "security_study": 0.35555555555555557, + "sociology": 0.3893805309734513, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.3668639053254438, + "world_history": 0.3416149068322981, + "world_religions": 0.36875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.3333333333333333 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.30303030303030304 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38636363636363635 + }, + "prompt_2": { + "accuracy": 0.39545454545454545 + }, + "prompt_3": { + "accuracy": 0.3477272727272727 + }, + "prompt_4": { + "accuracy": 0.4863636363636364 + }, + "prompt_5": { + "accuracy": 0.47045454545454546 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35898305084745763 + }, + "prompt_2": { + "accuracy": 0.3423728813559322 + }, + "prompt_3": { + "accuracy": 0.3488135593220339 + }, + "prompt_4": { + "accuracy": 0.3583050847457627 + }, + "prompt_5": { + "accuracy": 0.34915254237288135 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6488406881077038 + }, + "prompt_2": { + "accuracy": 0.6069558713537772 + }, + "prompt_3": { + "accuracy": 0.6009723261032162 + }, + "prompt_4": { + "accuracy": 0.6275243081525804 + }, + "prompt_5": { + "accuracy": 0.6443530291697831 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7976482116609506 + }, + "prompt_2": { + "accuracy": 0.8045075943165115 + }, + "prompt_3": { + "accuracy": 0.8030377266046056 + }, + "prompt_4": { + "accuracy": 0.7981381675649192 + }, + "prompt_5": { + "accuracy": 0.7981381675649192 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.36514596746927236, + "rouge2": 0.14774258589554132, + "rougeL": 0.2829121839685572, + "avg_rouge": 0.26526691244445694 + }, + "prompt_2": { + "rouge1": 0.3949660517234636, + "rouge2": 0.16270112090747404, + "rougeL": 0.30626271945860906, + "avg_rouge": 0.2879766306965156 + }, + "prompt_3": { + "rouge1": 0.371148843600123, + "rouge2": 0.14903391549746833, + "rougeL": 0.28611285679570436, + "avg_rouge": 0.2687652052977652 + }, + "prompt_4": { + "rouge1": 0.37677350306949225, + "rouge2": 0.15117839678269593, + "rougeL": 0.29155632704918555, + "avg_rouge": 0.27316940896712455 + }, + "prompt_5": { + "rouge1": 0.3753284127775681, + "rouge2": 0.14176872240660032, + "rougeL": 0.29122643602548925, + "avg_rouge": 0.26944119040321923 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.233468362410829, + "rouge2": 0.06259556349630206, + "rougeL": 0.17239415348887777, + "avg_rouge": 0.15615269313200295 + }, + "prompt_2": { + "rouge1": 0.23321869621528424, + "rouge2": 0.06225244128997574, + "rougeL": 0.17256552490988328, + "avg_rouge": 0.15601222080504776 + }, + "prompt_3": { + "rouge1": 0.23244899430145272, + "rouge2": 0.060566929788044545, + "rougeL": 0.17170240672577713, + "avg_rouge": 0.15490611027175813 + }, + "prompt_4": { + "rouge1": 0.23320770463635543, + "rouge2": 0.061685228966028974, + "rougeL": 0.17258091901231234, + "avg_rouge": 0.15582461753823226 + }, + "prompt_5": { + "rouge1": 0.23424663128524303, + "rouge2": 0.0634272343445468, + "rougeL": 0.17351811684274057, + "avg_rouge": 0.15706399415751013 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9059633027522935 + }, + "prompt_2": { + "accuracy": 0.8772935779816514 + }, + "prompt_3": { + "accuracy": 0.9013761467889908 + }, + "prompt_4": { + "accuracy": 0.9094036697247706 + }, + "prompt_5": { + "accuracy": 0.7465596330275229 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7459252157238735 + }, + "prompt_2": { + "accuracy": 0.7401725790987536 + }, + "prompt_3": { + "accuracy": 0.7372962607861937 + }, + "prompt_4": { + "accuracy": 0.7123681687440077 + }, + "prompt_5": { + "accuracy": 0.700862895493768 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.526 + }, + "prompt_2": { + "accuracy": 0.587 + }, + "prompt_3": { + "accuracy": 0.5495 + }, + "prompt_4": { + "accuracy": 0.5785 + }, + "prompt_5": { + "accuracy": 0.6195 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.483 + }, + "prompt_2": { + "accuracy": 0.4455 + }, + "prompt_3": { + "accuracy": 0.4665 + }, + "prompt_4": { + "accuracy": 0.411 + }, + "prompt_5": { + "accuracy": 0.406 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.52 + }, + "prompt_2": { + "accuracy": 0.5395 + }, + "prompt_3": { + "accuracy": 0.492 + }, + "prompt_4": { + "accuracy": 0.5635 + }, + "prompt_5": { + "accuracy": 0.5265 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5492957746478874 + }, + "prompt_2": { + "accuracy": 0.5352112676056338 + }, + "prompt_3": { + "accuracy": 0.5633802816901409 + }, + "prompt_4": { + "accuracy": 0.4507042253521127 + }, + "prompt_5": { + "accuracy": 0.5211267605633803 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5126353790613718 + }, + "prompt_2": { + "accuracy": 0.49458483754512633 + }, + "prompt_3": { + "accuracy": 0.48014440433212996 + }, + "prompt_4": { + "accuracy": 0.5992779783393501 + }, + "prompt_5": { + "accuracy": 0.5018050541516246 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.47058823529411764 + }, + "prompt_2": { + "accuracy": 0.6102941176470589 + }, + "prompt_3": { + "accuracy": 0.5735294117647058 + }, + "prompt_4": { + "accuracy": 0.5612745098039216 + }, + "prompt_5": { + "accuracy": 0.5 + } } }, "five_shot": { @@ -3046,53 +26151,1733 @@ "model_link": "https://huggingface.co/huggyllama/llama-7b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2876190476190476, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.3333333333333333, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.30666666666666664, + "Indonesian": 0.26666666666666666, + "Filipino": 0.24666666666666667, + "Chinese": 0.28 + }, + "consistency_score_2": 0.52, + "consistency_score_3": 0.3358095238095237, + "consistency_score_4": 0.23542857142857138, + "consistency_score_5": 0.17206349206349206, + "consistency_score_6": 0.12952380952380954, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5266666666666666, + "Malay,Vietnamese": 0.5666666666666667, + "Malay,Spanish": 0.56, + "Malay,Indonesian": 0.7533333333333333, + "Malay,Filipino": 0.6133333333333333, + "Malay,Chinese": 0.49333333333333335, + "English,Vietnamese": 0.4266666666666667, + "English,Spanish": 0.58, + "English,Indonesian": 0.5333333333333333, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.4866666666666667, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Chinese": 0.42, + "Spanish,Indonesian": 0.5666666666666667, + "Spanish,Filipino": 0.47333333333333333, + "Spanish,Chinese": 0.4533333333333333, + "Indonesian,Filipino": 0.6066666666666667, + "Indonesian,Chinese": 0.44666666666666666, + "Filipino,Chinese": 0.42 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.32666666666666666, + "Malay,English,Spanish": 0.38, + "Malay,English,Indonesian": 0.44, + "Malay,English,Filipino": 0.35333333333333333, + "Malay,English,Chinese": 0.32, + "Malay,Vietnamese,Spanish": 0.3333333333333333, + "Malay,Vietnamese,Indonesian": 0.48, + "Malay,Vietnamese,Filipino": 0.4, + "Malay,Vietnamese,Chinese": 0.32, + "Malay,Spanish,Indonesian": 0.4666666666666667, + "Malay,Spanish,Filipino": 0.36666666666666664, + "Malay,Spanish,Chinese": 0.31333333333333335, + "Malay,Indonesian,Filipino": 0.5066666666666667, + "Malay,Indonesian,Chinese": 0.38666666666666666, + "Malay,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish": 0.29333333333333333, + "English,Vietnamese,Indonesian": 0.3333333333333333, + "English,Vietnamese,Filipino": 0.26666666666666666, + "English,Vietnamese,Chinese": 0.24, + "English,Spanish,Indonesian": 0.38, + "English,Spanish,Filipino": 0.32, + "English,Spanish,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Indonesian,Chinese": 0.29333333333333333, + "English,Filipino,Chinese": 0.26, + "Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.24, + "Vietnamese,Indonesian,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian,Chinese": 0.29333333333333333, + "Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino": 0.36, + "Spanish,Indonesian,Chinese": 0.29333333333333333, + "Spanish,Filipino,Chinese": 0.24, + "Indonesian,Filipino,Chinese": 0.30666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.29333333333333333, + "Malay,English,Vietnamese,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Indonesian": 0.3333333333333333, + "Malay,English,Spanish,Filipino": 0.26, + "Malay,English,Spanish,Chinese": 0.24, + "Malay,English,Indonesian,Filipino": 0.3, + "Malay,English,Indonesian,Chinese": 0.26, + "Malay,English,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian": 0.3, + "Malay,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.2, + "English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian,Filipino": 0.26, + "English,Spanish,Indonesian,Chinese": 0.24, + "English,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Indonesian,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino,Chinese": 0.2 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.3703773584447056, + "AC3_3": 0.30985174510910163, + "AC3_4": 0.25891998746480427, + "AC3_5": 0.21531702178951845, + "AC3_6": 0.17861274185759501, + "AC3_7": 0.1484029483646626 + }, + "prompt_2": { + "overall_acc": 0.2666666666666667, + "language_acc": { + "Malay": 0.26666666666666666, + "English": 0.31333333333333335, + "Vietnamese": 0.22666666666666666, + "Spanish": 0.2866666666666667, + "Indonesian": 0.28, + "Filipino": 0.26, + "Chinese": 0.23333333333333334 + }, + "consistency_score_2": 0.4165079365079365, + "consistency_score_3": 0.2083809523809524, + "consistency_score_4": 0.11733333333333333, + "consistency_score_5": 0.07079365079365078, + "consistency_score_6": 0.04380952380952381, + "consistency_score_7": 0.02666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3466666666666667, + "Malay,Vietnamese": 0.44, + "Malay,Spanish": 0.3933333333333333, + "Malay,Indonesian": 0.5933333333333334, + "Malay,Filipino": 0.48, + "Malay,Chinese": 0.4066666666666667, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.42, + "English,Indonesian": 0.38666666666666666, + "English,Filipino": 0.35333333333333333, + "English,Chinese": 0.38666666666666666, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.42, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Chinese": 0.4, + "Spanish,Indonesian": 0.4066666666666667, + "Spanish,Filipino": 0.42, + "Spanish,Chinese": 0.4533333333333333, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.35333333333333333, + "Filipino,Chinese": 0.38 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.18, + "Malay,English,Spanish": 0.16, + "Malay,English,Indonesian": 0.22666666666666666, + "Malay,English,Filipino": 0.18666666666666668, + "Malay,English,Chinese": 0.16, + "Malay,Vietnamese,Spanish": 0.21333333333333335, + "Malay,Vietnamese,Indonesian": 0.29333333333333333, + "Malay,Vietnamese,Filipino": 0.26, + "Malay,Vietnamese,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian": 0.24, + "Malay,Spanish,Filipino": 0.22666666666666666, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.26, + "Malay,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish": 0.18666666666666668, + "English,Vietnamese,Indonesian": 0.18, + "English,Vietnamese,Filipino": 0.16666666666666666, + "English,Vietnamese,Chinese": 0.18, + "English,Spanish,Indonesian": 0.18666666666666668, + "English,Spanish,Filipino": 0.19333333333333333, + "English,Spanish,Chinese": 0.21333333333333335, + "English,Indonesian,Filipino": 0.19333333333333333, + "English,Indonesian,Chinese": 0.16666666666666666, + "English,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino": 0.23333333333333334, + "Spanish,Indonesian,Chinese": 0.19333333333333333, + "Spanish,Filipino,Chinese": 0.19333333333333333, + "Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.1, + "Malay,English,Vietnamese,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian": 0.10666666666666667, + "Malay,English,Spanish,Filipino": 0.1, + "Malay,English,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino": 0.14, + "Malay,English,Indonesian,Chinese": 0.12, + "Malay,English,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.14, + "Malay,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian": 0.1, + "English,Vietnamese,Spanish,Filipino": 0.1, + "English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.06, + "Malay,English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + } + }, + "AC3_2": 0.32515489462402863, + "AC3_3": 0.2339481421587185, + "AC3_4": 0.16296296292052467, + "AC3_5": 0.11188460329077109, + "AC3_6": 0.07525562369764262, + "AC3_7": 0.04848484846831956 + }, + "prompt_3": { + "overall_acc": 0.2676190476190476, + "language_acc": { + "Malay": 0.28, + "English": 0.2733333333333333, + "Vietnamese": 0.25333333333333335, + "Spanish": 0.26666666666666666, + "Indonesian": 0.26666666666666666, + "Filipino": 0.28, + "Chinese": 0.25333333333333335 + }, + "consistency_score_2": 0.41968253968253966, + "consistency_score_3": 0.21714285714285714, + "consistency_score_4": 0.1278095238095238, + "consistency_score_5": 0.08285714285714288, + "consistency_score_6": 0.05904761904761905, + "consistency_score_7": 0.04666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3333333333333333, + "Malay,Vietnamese": 0.36666666666666664, + "Malay,Spanish": 0.38, + "Malay,Indonesian": 0.5333333333333333, + "Malay,Filipino": 0.4066666666666667, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.49333333333333335, + "English,Spanish": 0.5466666666666666, + "English,Indonesian": 0.3466666666666667, + "English,Filipino": 0.4266666666666667, + "English,Chinese": 0.36666666666666664, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian": 0.32666666666666666, + "Vietnamese,Filipino": 0.47333333333333333, + "Vietnamese,Chinese": 0.4, + "Spanish,Indonesian": 0.36666666666666664, + "Spanish,Filipino": 0.5533333333333333, + "Spanish,Chinese": 0.49333333333333335, + "Indonesian,Filipino": 0.37333333333333335, + "Indonesian,Chinese": 0.38666666666666666, + "Filipino,Chinese": 0.4266666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.19333333333333333, + "Malay,English,Spanish": 0.22, + "Malay,English,Indonesian": 0.2, + "Malay,English,Filipino": 0.18666666666666668, + "Malay,English,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish": 0.2, + "Malay,Vietnamese,Indonesian": 0.2, + "Malay,Vietnamese,Filipino": 0.22, + "Malay,Vietnamese,Chinese": 0.18, + "Malay,Spanish,Indonesian": 0.23333333333333334, + "Malay,Spanish,Filipino": 0.24666666666666667, + "Malay,Spanish,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.24, + "Malay,Indonesian,Chinese": 0.23333333333333334, + "Malay,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Indonesian": 0.2, + "English,Vietnamese,Filipino": 0.26, + "English,Vietnamese,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian": 0.22666666666666666, + "English,Spanish,Filipino": 0.31333333333333335, + "English,Spanish,Chinese": 0.26, + "English,Indonesian,Filipino": 0.17333333333333334, + "English,Indonesian,Chinese": 0.18, + "English,Filipino,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Vietnamese,Spanish,Filipino": 0.3, + "Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Indonesian,Filipino": 0.18, + "Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Filipino,Chinese": 0.24, + "Spanish,Indonesian,Filipino": 0.23333333333333334, + "Spanish,Indonesian,Chinese": 0.22, + "Spanish,Filipino,Chinese": 0.3, + "Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.14, + "Malay,English,Vietnamese,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Chinese": 0.08, + "Malay,English,Spanish,Indonesian": 0.13333333333333333, + "Malay,English,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino": 0.14, + "English,Spanish,Indonesian,Chinese": 0.14, + "English,Spanish,Filipino,Chinese": 0.16666666666666666, + "English,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + } + }, + "AC3_2": 0.326828989284901, + "AC3_3": 0.23975301707094626, + "AC3_4": 0.17299844018012414, + "AC3_5": 0.12653726704464097, + "AC3_6": 0.09674857695219621, + "AC3_7": 0.07947474744946005 + }, + "prompt_4": { + "overall_acc": 0.29047619047619044, + "language_acc": { + "Malay": 0.25333333333333335, + "English": 0.4, + "Vietnamese": 0.2866666666666667, + "Spanish": 0.2866666666666667, + "Indonesian": 0.24666666666666667, + "Filipino": 0.26666666666666666, + "Chinese": 0.29333333333333333 + }, + "consistency_score_2": 0.49492063492063504, + "consistency_score_3": 0.304952380952381, + "consistency_score_4": 0.20495238095238102, + "consistency_score_5": 0.14476190476190473, + "consistency_score_6": 0.10571428571428569, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.31333333333333335, + "Malay,Vietnamese": 0.6733333333333333, + "Malay,Spanish": 0.6, + "Malay,Indonesian": 0.9066666666666666, + "Malay,Filipino": 0.7933333333333333, + "Malay,Chinese": 0.34, + "English,Vietnamese": 0.3466666666666667, + "English,Spanish": 0.44666666666666666, + "English,Indonesian": 0.32666666666666666, + "English,Filipino": 0.32, + "English,Chinese": 0.42, + "Vietnamese,Spanish": 0.4866666666666667, + "Vietnamese,Indonesian": 0.68, + "Vietnamese,Filipino": 0.5666666666666667, + "Vietnamese,Chinese": 0.31333333333333335, + "Spanish,Indonesian": 0.5533333333333333, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.4, + "Indonesian,Filipino": 0.7666666666666667, + "Indonesian,Chinese": 0.3466666666666667, + "Filipino,Chinese": 0.26666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.25333333333333335, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.3, + "Malay,English,Filipino": 0.25333333333333335, + "Malay,English,Chinese": 0.16, + "Malay,Vietnamese,Spanish": 0.4266666666666667, + "Malay,Vietnamese,Indonesian": 0.6333333333333333, + "Malay,Vietnamese,Filipino": 0.5333333333333333, + "Malay,Vietnamese,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian": 0.54, + "Malay,Spanish,Filipino": 0.48, + "Malay,Spanish,Chinese": 0.23333333333333334, + "Malay,Indonesian,Filipino": 0.7466666666666667, + "Malay,Indonesian,Chinese": 0.31333333333333335, + "Malay,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.21333333333333335, + "English,Vietnamese,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian": 0.24666666666666667, + "English,Spanish,Filipino": 0.2, + "English,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino": 0.25333333333333335, + "English,Indonesian,Chinese": 0.16666666666666666, + "English,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "Vietnamese,Spanish,Filipino": 0.36, + "Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino": 0.5266666666666666, + "Vietnamese,Indonesian,Chinese": 0.24, + "Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Spanish,Indonesian,Filipino": 0.4533333333333333, + "Spanish,Indonesian,Chinese": 0.22, + "Spanish,Filipino,Chinese": 0.18, + "Indonesian,Filipino,Chinese": 0.22 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.25333333333333335, + "Malay,English,Vietnamese,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Chinese": 0.12, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Chinese": 0.14, + "Malay,English,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.4, + "Malay,Vietnamese,Spanish,Filipino": 0.34, + "Malay,Vietnamese,Spanish,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino": 0.5133333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.44666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.22, + "Malay,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.22, + "English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + } + }, + "AC3_2": 0.36608923273623434, + "AC3_3": 0.29753831149985743, + "AC3_4": 0.24033247280991443, + "AC3_5": 0.19322705007543337, + "AC3_6": 0.15501373622461015, + "AC3_7": 0.12544987143143382 + }, + "prompt_5": { + "overall_acc": 0.3009523809523809, + "language_acc": { + "Malay": 0.28, + "English": 0.30666666666666664, + "Vietnamese": 0.2733333333333333, + "Spanish": 0.38, + "Indonesian": 0.2733333333333333, + "Filipino": 0.25333333333333335, + "Chinese": 0.34 + }, + "consistency_score_2": 0.473968253968254, + "consistency_score_3": 0.27657142857142863, + "consistency_score_4": 0.1765714285714286, + "consistency_score_5": 0.1187301587301587, + "consistency_score_6": 0.08285714285714285, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.44, + "Malay,Vietnamese": 0.5866666666666667, + "Malay,Spanish": 0.43333333333333335, + "Malay,Indonesian": 0.8133333333333334, + "Malay,Filipino": 0.5866666666666667, + "Malay,Chinese": 0.3333333333333333, + "English,Vietnamese": 0.4266666666666667, + "English,Spanish": 0.4866666666666667, + "English,Indonesian": 0.41333333333333333, + "English,Filipino": 0.42, + "English,Chinese": 0.4666666666666667, + "Vietnamese,Spanish": 0.5133333333333333, + "Vietnamese,Indonesian": 0.5933333333333334, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Chinese": 0.35333333333333333, + "Spanish,Indonesian": 0.44666666666666666, + "Spanish,Filipino": 0.43333333333333335, + "Spanish,Chinese": 0.4533333333333333, + "Indonesian,Filipino": 0.5666666666666667, + "Indonesian,Chinese": 0.36, + "Filipino,Chinese": 0.32 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2866666666666667, + "Malay,English,Spanish": 0.26666666666666666, + "Malay,English,Indonesian": 0.36666666666666664, + "Malay,English,Filipino": 0.2866666666666667, + "Malay,English,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.5066666666666667, + "Malay,Vietnamese,Filipino": 0.4066666666666667, + "Malay,Vietnamese,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian": 0.37333333333333335, + "Malay,Spanish,Filipino": 0.3, + "Malay,Spanish,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.5, + "Malay,Indonesian,Chinese": 0.29333333333333333, + "Malay,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish": 0.2866666666666667, + "English,Vietnamese,Indonesian": 0.26, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.22, + "English,Spanish,Indonesian": 0.25333333333333335, + "English,Spanish,Filipino": 0.26, + "English,Spanish,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.25333333333333335, + "English,Indonesian,Chinese": 0.20666666666666667, + "English,Filipino,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian": 0.32, + "Vietnamese,Spanish,Filipino": 0.28, + "Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Indonesian,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino": 0.2866666666666667, + "Spanish,Indonesian,Chinese": 0.22, + "Spanish,Filipino,Chinese": 0.18, + "Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian": 0.24666666666666667, + "Malay,English,Vietnamese,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Chinese": 0.14, + "Malay,English,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.18, + "Malay,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Spanish,Indonesian,Chinese": 0.14, + "English,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino,Chinese": 0.08, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.36814576378854713, + "AC3_3": 0.2882472671947179, + "AC3_4": 0.2225631089624526, + "AC3_5": 0.17028167995366275, + "AC3_6": 0.12993973764781788, + "AC3_7": 0.1000527704208297 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.26136363636363635, + "language_acc": { + "English": 0.26704545454545453, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.2556818181818182, + "Indonesian": 0.20454545454545456, + "Filipino": 0.26136363636363635, + "Spanish": 0.32386363636363635, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.46049783549783546, + "consistency_score_3": 0.2527597402597403, + "consistency_score_4": 0.15032467532467533, + "consistency_score_5": 0.09415584415584417, + "consistency_score_6": 0.060876623376623376, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3806818181818182, + "English,Chinese": 0.4147727272727273, + "English,Indonesian": 0.4715909090909091, + "English,Filipino": 0.38636363636363635, + "English,Spanish": 0.42613636363636365, + "English,Malay": 0.44886363636363635, + "Vietnamese,Chinese": 0.4375, + "Vietnamese,Indonesian": 0.4602272727272727, + "Vietnamese,Filipino": 0.5454545454545454, + "Vietnamese,Spanish": 0.39204545454545453, + "Vietnamese,Malay": 0.5284090909090909, + "Chinese,Indonesian": 0.4318181818181818, + "Chinese,Filipino": 0.45454545454545453, + "Chinese,Spanish": 0.42045454545454547, + "Chinese,Malay": 0.4602272727272727, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.6079545454545454, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.5397727272727273, + "Spanish,Malay": 0.42613636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.19886363636363635, + "English,Vietnamese,Indonesian": 0.2159090909090909, + "English,Vietnamese,Filipino": 0.2215909090909091, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.23295454545454544, + "English,Chinese,Indonesian": 0.23295454545454544, + "English,Chinese,Filipino": 0.23295454545454544, + "English,Chinese,Spanish": 0.2215909090909091, + "English,Chinese,Malay": 0.24431818181818182, + "English,Indonesian,Filipino": 0.2556818181818182, + "English,Indonesian,Spanish": 0.23863636363636365, + "English,Indonesian,Malay": 0.3125, + "English,Filipino,Spanish": 0.22727272727272727, + "English,Filipino,Malay": 0.24431818181818182, + "English,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian": 0.23295454545454544, + "Vietnamese,Chinese,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Malay": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Filipino,Spanish": 0.25, + "Vietnamese,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Spanish,Malay": 0.2159090909090909, + "Chinese,Indonesian,Filipino": 0.29545454545454547, + "Chinese,Indonesian,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.22727272727272727, + "Chinese,Filipino,Malay": 0.30113636363636365, + "Chinese,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish": 0.2727272727272727, + "Indonesian,Filipino,Malay": 0.35795454545454547, + "Indonesian,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.26136363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "English,Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Chinese,Filipino,Spanish": 0.14204545454545456, + "English,Chinese,Filipino,Malay": 0.16477272727272727, + "English,Chinese,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Indonesian,Filipino,Malay": 0.1875, + "English,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.125, + "English,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.3334639497970652, + "AC3_3": 0.25698969481466943, + "AC3_4": 0.19086965868607209, + "AC3_5": 0.1384391863454519, + "AC3_6": 0.09875200363319699, + "AC3_7": 0.0690394510919965 + }, + "prompt_2": { + "overall_acc": 0.2711038961038961, + "language_acc": { + "English": 0.2840909090909091, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.26704545454545453, + "Indonesian": 0.2897727272727273, + "Filipino": 0.24431818181818182, + "Spanish": 0.2784090909090909, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.37689393939393934, + "consistency_score_3": 0.17061688311688308, + "consistency_score_4": 0.08506493506493507, + "consistency_score_5": 0.04464285714285713, + "consistency_score_6": 0.025162337662337664, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3806818181818182, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.3522727272727273, + "English,Filipino": 0.3409090909090909, + "English,Spanish": 0.4375, + "English,Malay": 0.3068181818181818, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.4090909090909091, + "Vietnamese,Filipino": 0.3806818181818182, + "Vietnamese,Spanish": 0.4772727272727273, + "Vietnamese,Malay": 0.38636363636363635, + "Chinese,Indonesian": 0.3409090909090909, + "Chinese,Filipino": 0.30113636363636365, + "Chinese,Spanish": 0.375, + "Chinese,Malay": 0.3409090909090909, + "Indonesian,Filipino": 0.4147727272727273, + "Indonesian,Spanish": 0.36363636363636365, + "Indonesian,Malay": 0.48863636363636365, + "Filipino,Spanish": 0.3409090909090909, + "Filipino,Malay": 0.44886363636363635, + "Spanish,Malay": 0.3068181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1590909090909091, + "English,Vietnamese,Indonesian": 0.1590909090909091, + "English,Vietnamese,Filipino": 0.1534090909090909, + "English,Vietnamese,Spanish": 0.25, + "English,Vietnamese,Malay": 0.14772727272727273, + "English,Chinese,Indonesian": 0.125, + "English,Chinese,Filipino": 0.11931818181818182, + "English,Chinese,Spanish": 0.16477272727272727, + "English,Chinese,Malay": 0.11931818181818182, + "English,Indonesian,Filipino": 0.16477272727272727, + "English,Indonesian,Spanish": 0.17613636363636365, + "English,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Spanish": 0.17613636363636365, + "English,Filipino,Malay": 0.13636363636363635, + "English,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Vietnamese,Chinese,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Filipino,Spanish": 0.1875, + "Vietnamese,Filipino,Malay": 0.19886363636363635, + "Vietnamese,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino": 0.125, + "Chinese,Indonesian,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Malay": 0.17613636363636365, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.1534090909090909, + "Chinese,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish": 0.17613636363636365, + "Indonesian,Filipino,Malay": 0.26136363636363635, + "Indonesian,Spanish,Malay": 0.19318181818181818, + "Filipino,Spanish,Malay": 0.1534090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.0625, + "English,Vietnamese,Chinese,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Chinese,Indonesian,Malay": 0.0625, + "English,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Chinese,Filipino,Malay": 0.045454545454545456, + "English,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.31536344652563514, + "AC3_3": 0.2094304996256451, + "AC3_4": 0.1294972119585747, + "AC3_5": 0.07666177008222064, + "AC3_6": 0.046050524802104476, + "AC3_7": 0.032074263753273484 + }, + "prompt_3": { + "overall_acc": 0.23214285714285715, + "language_acc": { + "English": 0.25, + "Vietnamese": 0.22727272727272727, + "Chinese": 0.26704545454545453, + "Indonesian": 0.21022727272727273, + "Filipino": 0.2215909090909091, + "Spanish": 0.2784090909090909, + "Malay": 0.17045454545454544 + }, + "consistency_score_2": 0.34794372294372294, + "consistency_score_3": 0.14659090909090908, + "consistency_score_4": 0.06996753246753247, + "consistency_score_5": 0.03706709956709956, + "consistency_score_6": 0.022727272727272728, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3409090909090909, + "English,Chinese": 0.2727272727272727, + "English,Indonesian": 0.3352272727272727, + "English,Filipino": 0.32386363636363635, + "English,Spanish": 0.3522727272727273, + "English,Malay": 0.32954545454545453, + "Vietnamese,Chinese": 0.36363636363636365, + "Vietnamese,Indonesian": 0.32386363636363635, + "Vietnamese,Filipino": 0.32386363636363635, + "Vietnamese,Spanish": 0.4375, + "Vietnamese,Malay": 0.32954545454545453, + "Chinese,Indonesian": 0.35795454545454547, + "Chinese,Filipino": 0.2897727272727273, + "Chinese,Spanish": 0.35795454545454547, + "Chinese,Malay": 0.39204545454545453, + "Indonesian,Filipino": 0.3352272727272727, + "Indonesian,Spanish": 0.36363636363636365, + "Indonesian,Malay": 0.4431818181818182, + "Filipino,Spanish": 0.3125, + "Filipino,Malay": 0.4090909090909091, + "Spanish,Malay": 0.3125 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.14204545454545456, + "English,Vietnamese,Indonesian": 0.13068181818181818, + "English,Vietnamese,Filipino": 0.11931818181818182, + "English,Vietnamese,Spanish": 0.21022727272727273, + "English,Vietnamese,Malay": 0.125, + "English,Chinese,Indonesian": 0.11363636363636363, + "English,Chinese,Filipino": 0.07386363636363637, + "English,Chinese,Spanish": 0.14204545454545456, + "English,Chinese,Malay": 0.11931818181818182, + "English,Indonesian,Filipino": 0.13068181818181818, + "English,Indonesian,Spanish": 0.14204545454545456, + "English,Indonesian,Malay": 0.16477272727272727, + "English,Filipino,Spanish": 0.13636363636363635, + "English,Filipino,Malay": 0.16477272727272727, + "English,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "Vietnamese,Chinese,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Spanish": 0.1875, + "Vietnamese,Chinese,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino": 0.125, + "Chinese,Indonesian,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Malay": 0.19318181818181818, + "Chinese,Filipino,Spanish": 0.10227272727272728, + "Chinese,Filipino,Malay": 0.16477272727272727, + "Chinese,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish": 0.14204545454545456, + "Indonesian,Filipino,Malay": 0.2159090909090909, + "Indonesian,Spanish,Malay": 0.18181818181818182, + "Filipino,Spanish,Malay": 0.1590909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino": 0.045454545454545456, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.06818181818181818, + "English,Vietnamese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish": 0.03409090909090909, + "English,Chinese,Filipino,Malay": 0.05113636363636364, + "English,Chinese,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish": 0.0625, + "English,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.27848480805433795, + "AC3_3": 0.17970424341590321, + "AC3_4": 0.10752667533095456, + "AC3_5": 0.06392677671710101, + "AC3_6": 0.041401273869106255, + "AC3_7": 0.03175895764197816 + }, + "prompt_4": { + "overall_acc": 0.25892857142857145, + "language_acc": { + "English": 0.2840909090909091, + "Vietnamese": 0.2784090909090909, + "Chinese": 0.2215909090909091, + "Indonesian": 0.2215909090909091, + "Filipino": 0.26704545454545453, + "Spanish": 0.2840909090909091, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.48322510822510817, + "consistency_score_3": 0.28863636363636375, + "consistency_score_4": 0.18506493506493504, + "consistency_score_5": 0.1209415584415584, + "consistency_score_6": 0.07873376623376624, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.4034090909090909, + "English,Indonesian": 0.5056818181818182, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.4375, + "English,Malay": 0.6022727272727273, + "Vietnamese,Chinese": 0.375, + "Vietnamese,Indonesian": 0.5909090909090909, + "Vietnamese,Filipino": 0.5284090909090909, + "Vietnamese,Spanish": 0.4659090909090909, + "Vietnamese,Malay": 0.5909090909090909, + "Chinese,Indonesian": 0.375, + "Chinese,Filipino": 0.3522727272727273, + "Chinese,Spanish": 0.36363636363636365, + "Chinese,Malay": 0.4147727272727273, + "Indonesian,Filipino": 0.5852272727272727, + "Indonesian,Spanish": 0.5113636363636364, + "Indonesian,Malay": 0.75, + "Filipino,Spanish": 0.4147727272727273, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.5568181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Filipino": 0.21022727272727273, + "English,Vietnamese,Spanish": 0.23295454545454544, + "English,Vietnamese,Malay": 0.3522727272727273, + "English,Chinese,Indonesian": 0.22727272727272727, + "English,Chinese,Filipino": 0.1590909090909091, + "English,Chinese,Spanish": 0.21022727272727273, + "English,Chinese,Malay": 0.2727272727272727, + "English,Indonesian,Filipino": 0.2897727272727273, + "English,Indonesian,Spanish": 0.29545454545454547, + "English,Indonesian,Malay": 0.4659090909090909, + "English,Filipino,Spanish": 0.19886363636363635, + "English,Filipino,Malay": 0.3125, + "English,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "Vietnamese,Chinese,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino": 0.3806818181818182, + "Vietnamese,Indonesian,Spanish": 0.3522727272727273, + "Vietnamese,Indonesian,Malay": 0.48295454545454547, + "Vietnamese,Filipino,Spanish": 0.2784090909090909, + "Vietnamese,Filipino,Malay": 0.36363636363636365, + "Vietnamese,Spanish,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino": 0.24431818181818182, + "Chinese,Indonesian,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.17613636363636365, + "Chinese,Filipino,Malay": 0.25, + "Chinese,Spanish,Malay": 0.23863636363636365, + "Indonesian,Filipino,Spanish": 0.3125, + "Indonesian,Filipino,Malay": 0.4659090909090909, + "Indonesian,Spanish,Malay": 0.45454545454545453, + "Filipino,Spanish,Malay": 0.3068181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.08522727272727272, + "English,Vietnamese,Chinese,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "English,Vietnamese,Filipino,Spanish": 0.125, + "English,Vietnamese,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino": 0.13636363636363635, + "English,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Spanish,Malay": 0.18181818181818182, + "English,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Indonesian,Filipino,Malay": 0.2840909090909091, + "English,Indonesian,Spanish,Malay": 0.2897727272727273, + "English,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.3125, + "Vietnamese,Filipino,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "Indonesian,Filipino,Spanish,Malay": 0.2840909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.3371829591754409, + "AC3_3": 0.2729765786634588, + "AC3_4": 0.2158527030070119, + "AC3_5": 0.16487332107991856, + "AC3_6": 0.12075034337083275, + "AC3_7": 0.08540575913475919 + }, + "prompt_5": { + "overall_acc": 0.2654220779220779, + "language_acc": { + "English": 0.30113636363636365, + "Vietnamese": 0.29545454545454547, + "Chinese": 0.22727272727272727, + "Indonesian": 0.22727272727272727, + "Filipino": 0.2556818181818182, + "Spanish": 0.29545454545454547, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.44101731601731614, + "consistency_score_3": 0.24058441558441562, + "consistency_score_4": 0.14659090909090908, + "consistency_score_5": 0.0963203463203463, + "consistency_score_6": 0.06574675324675323, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.39204545454545453, + "English,Chinese": 0.3806818181818182, + "English,Indonesian": 0.4659090909090909, + "English,Filipino": 0.3806818181818182, + "English,Spanish": 0.45454545454545453, + "English,Malay": 0.4602272727272727, + "Vietnamese,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian": 0.4431818181818182, + "Vietnamese,Filipino": 0.5170454545454546, + "Vietnamese,Spanish": 0.375, + "Vietnamese,Malay": 0.5, + "Chinese,Indonesian": 0.3693181818181818, + "Chinese,Filipino": 0.3181818181818182, + "Chinese,Spanish": 0.3522727272727273, + "Chinese,Malay": 0.35795454545454547, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,Spanish": 0.5, + "Indonesian,Malay": 0.6420454545454546, + "Filipino,Spanish": 0.4034090909090909, + "Filipino,Malay": 0.5852272727272727, + "Spanish,Malay": 0.4772727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1590909090909091, + "English,Vietnamese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Filipino": 0.2159090909090909, + "English,Vietnamese,Spanish": 0.20454545454545456, + "English,Vietnamese,Malay": 0.24431818181818182, + "English,Chinese,Indonesian": 0.2215909090909091, + "English,Chinese,Filipino": 0.1534090909090909, + "English,Chinese,Spanish": 0.19318181818181818, + "English,Chinese,Malay": 0.1875, + "English,Indonesian,Filipino": 0.26136363636363635, + "English,Indonesian,Spanish": 0.2727272727272727, + "English,Indonesian,Malay": 0.32954545454545453, + "English,Filipino,Spanish": 0.21022727272727273, + "English,Filipino,Malay": 0.2556818181818182, + "English,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Vietnamese,Chinese,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Spanish,Malay": 0.2556818181818182, + "Chinese,Indonesian,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Spanish": 0.21022727272727273, + "Chinese,Indonesian,Malay": 0.2556818181818182, + "Chinese,Filipino,Spanish": 0.14772727272727273, + "Chinese,Filipino,Malay": 0.21022727272727273, + "Chinese,Spanish,Malay": 0.1875, + "Indonesian,Filipino,Spanish": 0.3125, + "Indonesian,Filipino,Malay": 0.4147727272727273, + "Indonesian,Spanish,Malay": 0.36363636363636365, + "Filipino,Spanish,Malay": 0.3068181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino": 0.125, + "English,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Indonesian,Filipino,Malay": 0.21022727272727273, + "English,Indonesian,Spanish,Malay": 0.21022727272727273, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.125, + "Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.26704545454545453 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.125, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.33139638985196856, + "AC3_3": 0.2523936601878616, + "AC3_4": 0.1888700837710515, + "AC3_5": 0.14134668618420174, + "AC3_6": 0.10538817796664901, + "AC3_7": 0.07761690004624106 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30097087378640774 + }, + "prompt_2": { + "accuracy": 0.30097087378640774 + }, + "prompt_3": { + "accuracy": 0.2524271844660194 + }, + "prompt_4": { + "accuracy": 0.2621359223300971 + }, + "prompt_5": { + "accuracy": 0.2524271844660194 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.22857142857142856 + }, + "prompt_2": { + "accuracy": 0.26666666666666666 + }, + "prompt_3": { + "accuracy": 0.22857142857142856 + }, + "prompt_4": { + "accuracy": 0.21904761904761905 + }, + "prompt_5": { + "accuracy": 0.23809523809523808 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3364485981308411 + }, + "prompt_2": { + "accuracy": 0.18691588785046728 + }, + "prompt_3": { + "accuracy": 0.2336448598130841 + }, + "prompt_4": { + "accuracy": 0.2897196261682243 + }, + "prompt_5": { + "accuracy": 0.2897196261682243 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.6, + "demographics": 0.4, + "biology": 0.5, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.6, + "culture": 0.0, + "film": 0.3, + "law": 0.1, + "geography": 0.1 + } + }, + "prompt_2": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.6, + "politics": 0.3, + "culture": 0.5, + "film": 0.1, + "law": 0.5, + "geography": 0.1 + } + }, + "prompt_3": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.7, + "culture": 0.2, + "film": 0.3, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_4": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.7, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.7, + "culture": 0.4, + "film": 0.3, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.3, + "history": 0.2, + "literature": 0.3, + "politics": 0.7, + "culture": 0.2, + "film": 0.3, + "law": 0.2, + "geography": 0.1 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06613108306758435 + }, + "prompt_2": { + "bleu_score": 0.07390527306844838 + }, + "prompt_3": { + "bleu_score": 0.07260635552063222 + }, + "prompt_4": { + "bleu_score": 0.06446905230287721 + }, + "prompt_5": { + "bleu_score": 0.05080385202957262 + } }, "indommlu": { "prompt_1": -1, @@ -3102,179 +27887,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.05796852492790938 + }, + "prompt_2": { + "bleu_score": 0.05838116957393706 + }, + "prompt_3": { + "bleu_score": 0.05334620997421614 + }, + "prompt_4": { + "bleu_score": 0.0729294336876194 + }, + "prompt_5": { + "bleu_score": 0.0565648095306441 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.022937848919210767 + }, + "prompt_2": { + "bleu_score": 0.024457977823295552 + }, + "prompt_3": { + "bleu_score": 0.02225284052793872 + }, + "prompt_4": { + "bleu_score": 0.02752159526708015 + }, + "prompt_5": { + "bleu_score": 0.023340309267861667 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.04039321007020438 + }, + "prompt_2": { + "bleu_score": 0.048201400507552596 + }, + "prompt_3": { + "bleu_score": 0.03547587624246876 + }, + "prompt_4": { + "bleu_score": 0.052883641461521864 + }, + "prompt_5": { + "bleu_score": 0.0426839455500516 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.05361503908462424 + }, + "prompt_2": { + "bleu_score": 0.054976111754401445 + }, + "prompt_3": { + "bleu_score": 0.04937043891109955 + }, + "prompt_4": { + "bleu_score": 0.06599850972882783 + }, + "prompt_5": { + "bleu_score": 0.05392316238305716 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2847141190198366 + }, + "prompt_2": { + "accuracy": 0.2765460910151692 + }, + "prompt_3": { + "accuracy": 0.28588098016336055 + }, + "prompt_4": { + "accuracy": 0.3162193698949825 + }, + "prompt_5": { + "accuracy": 0.30921820303383896 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2813013943510905, + "category_acc": { + "high_school_european_history": 0.32926829268292684, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.26515151515151514, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.3448275862068966, + "high_school_physics": 0.24666666666666667, + "high_school_world_history": 0.326271186440678, + "virology": 0.24242424242424243, + "high_school_microeconomics": 0.24472573839662448, + "econometrics": 0.20353982300884957, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.2977346278317152, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.23843416370106763, + "philosophy": 0.2645161290322581, + "professional_medicine": 0.2066420664206642, + "nutrition": 0.3639344262295082, + "global_facts": 0.21212121212121213, + "machine_learning": 0.27927927927927926, + "security_studies": 0.2540983606557377, + "public_relations": 0.21100917431192662, + "professional_psychology": 0.26677577741407527, + "prehistory": 0.2848297213622291, + "anatomy": 0.26119402985074625, + "human_sexuality": 0.26153846153846155, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.34375, + "college_chemistry": 0.1717171717171717, + "logical_fallacies": 0.3333333333333333, + "high_school_geography": 0.24873096446700507, + "elementary_mathematics": 0.2493368700265252, + "human_aging": 0.25225225225225223, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.27205882352941174, + "formal_logic": 0.28, + "high_school_statistics": 0.2651162790697674, + "international_law": 0.4, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.32323232323232326, + "conceptual_physics": 0.29914529914529914, + "miscellaneous": 0.2595907928388747, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.38197424892703863, + "professional_law": 0.294194390084801, + "management": 0.23529411764705882, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.308411214953271, + "world_religions": 0.3764705882352941, + "sociology": 0.31, + "us_foreign_policy": 0.47474747474747475, + "high_school_macroeconomics": 0.2544987146529563, + "computer_security": 0.35353535353535354, + "moral_scenarios": 0.2695749440715884, + "moral_disputes": 0.28695652173913044, + "electrical_engineering": 0.2777777777777778, + "astronomy": 0.2913907284768212, + "college_biology": 0.32867132867132864 + } + }, + "prompt_2": { + "accuracy": 0.2772255988559171, + "category_acc": { + "high_school_european_history": 0.3902439024390244, + "business_ethics": 0.2828282828282828, + "clinical_knowledge": 0.2878787878787879, + "medical_genetics": 0.31313131313131315, + "high_school_us_history": 0.3251231527093596, + "high_school_physics": 0.3, + "high_school_world_history": 0.3389830508474576, + "virology": 0.2606060606060606, + "high_school_microeconomics": 0.24472573839662448, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.26262626262626265, + "high_school_biology": 0.2459546925566343, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.28113879003558717, + "philosophy": 0.2967741935483871, + "professional_medicine": 0.28044280442804426, + "nutrition": 0.3180327868852459, + "global_facts": 0.2727272727272727, + "machine_learning": 0.22522522522522523, + "security_studies": 0.3483606557377049, + "public_relations": 0.26605504587155965, + "professional_psychology": 0.28477905073649756, + "prehistory": 0.28173374613003094, + "anatomy": 0.27611940298507465, + "human_sexuality": 0.24615384615384617, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.3229166666666667, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.36419753086419754, + "high_school_geography": 0.2436548223350254, + "elementary_mathematics": 0.23076923076923078, + "human_aging": 0.27927927927927926, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.24080882352941177, + "formal_logic": 0.384, + "high_school_statistics": 0.2930232558139535, + "international_law": 0.39166666666666666, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.35353535353535354, + "conceptual_physics": 0.24786324786324787, + "miscellaneous": 0.23785166240409208, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.24892703862660945, + "professional_law": 0.27332028701891714, + "management": 0.16666666666666666, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.2897196261682243, + "world_religions": 0.3352941176470588, + "sociology": 0.3, + "us_foreign_policy": 0.3434343434343434, + "high_school_macroeconomics": 0.2570694087403599, + "computer_security": 0.31313131313131315, + "moral_scenarios": 0.22818791946308725, + "moral_disputes": 0.3101449275362319, + "electrical_engineering": 0.3194444444444444, + "astronomy": 0.2913907284768212, + "college_biology": 0.2937062937062937 + } + }, + "prompt_3": { + "accuracy": 0.27908473364318914, + "category_acc": { + "high_school_european_history": 0.3902439024390244, + "business_ethics": 0.20202020202020202, + "clinical_knowledge": 0.2537878787878788, + "medical_genetics": 0.32323232323232326, + "high_school_us_history": 0.3054187192118227, + "high_school_physics": 0.28, + "high_school_world_history": 0.3135593220338983, + "virology": 0.18787878787878787, + "high_school_microeconomics": 0.2489451476793249, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.18181818181818182, + "high_school_biology": 0.33980582524271846, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.2846975088967972, + "philosophy": 0.2838709677419355, + "professional_medicine": 0.24723247232472326, + "nutrition": 0.3377049180327869, + "global_facts": 0.2727272727272727, + "machine_learning": 0.22522522522522523, + "security_studies": 0.30327868852459017, + "public_relations": 0.22018348623853212, + "professional_psychology": 0.3044189852700491, + "prehistory": 0.2786377708978328, + "anatomy": 0.3283582089552239, + "human_sexuality": 0.3, + "college_medicine": 0.313953488372093, + "high_school_government_and_politics": 0.390625, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.2777777777777778, + "high_school_geography": 0.29441624365482233, + "elementary_mathematics": 0.26525198938992045, + "human_aging": 0.2072072072072072, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.25919117647058826, + "formal_logic": 0.384, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.375, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.30303030303030304, + "conceptual_physics": 0.18803418803418803, + "miscellaneous": 0.24936061381074168, + "high_school_chemistry": 0.21782178217821782, + "marketing": 0.31759656652360513, + "professional_law": 0.27266797129810827, + "management": 0.21568627450980393, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.27102803738317754, + "world_religions": 0.2647058823529412, + "sociology": 0.29, + "us_foreign_policy": 0.35353535353535354, + "high_school_macroeconomics": 0.2750642673521851, + "computer_security": 0.31313131313131315, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.3130434782608696, + "electrical_engineering": 0.3263888888888889, + "astronomy": 0.33774834437086093, + "college_biology": 0.2727272727272727 + } + }, + "prompt_4": { + "accuracy": 0.29831962817304253, + "category_acc": { + "high_school_european_history": 0.4024390243902439, + "business_ethics": 0.30303030303030304, + "clinical_knowledge": 0.30303030303030304, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.33497536945812806, + "high_school_physics": 0.26, + "high_school_world_history": 0.326271186440678, + "virology": 0.23636363636363636, + "high_school_microeconomics": 0.270042194092827, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.3042071197411003, + "abstract_algebra": 0.20202020202020202, + "professional_accounting": 0.2277580071174377, + "philosophy": 0.29354838709677417, + "professional_medicine": 0.2140221402214022, + "nutrition": 0.380327868852459, + "global_facts": 0.26262626262626265, + "machine_learning": 0.25225225225225223, + "security_studies": 0.22950819672131148, + "public_relations": 0.21100917431192662, + "professional_psychology": 0.2945990180032733, + "prehistory": 0.3126934984520124, + "anatomy": 0.3283582089552239, + "human_sexuality": 0.3153846153846154, + "college_medicine": 0.2441860465116279, + "high_school_government_and_politics": 0.3541666666666667, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.3765432098765432, + "high_school_geography": 0.29949238578680204, + "elementary_mathematics": 0.26790450928381965, + "human_aging": 0.22072072072072071, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.30330882352941174, + "formal_logic": 0.288, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.4583333333333333, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.3034188034188034, + "miscellaneous": 0.3631713554987212, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.3776824034334764, + "professional_law": 0.30267449445531636, + "management": 0.29411764705882354, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.27102803738317754, + "world_religions": 0.4588235294117647, + "sociology": 0.325, + "us_foreign_policy": 0.36363636363636365, + "high_school_macroeconomics": 0.2570694087403599, + "computer_security": 0.30303030303030304, + "moral_scenarios": 0.23154362416107382, + "moral_disputes": 0.3159420289855073, + "electrical_engineering": 0.2777777777777778, + "astronomy": 0.31125827814569534, + "college_biology": 0.3776223776223776 + } + }, + "prompt_5": { + "accuracy": 0.3021809081158384, + "category_acc": { + "high_school_european_history": 0.3902439024390244, + "business_ethics": 0.29292929292929293, + "clinical_knowledge": 0.3106060606060606, + "medical_genetics": 0.35353535353535354, + "high_school_us_history": 0.3448275862068966, + "high_school_physics": 0.3, + "high_school_world_history": 0.3516949152542373, + "virology": 0.2787878787878788, + "high_school_microeconomics": 0.23628691983122363, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.2828282828282828, + "high_school_biology": 0.2977346278317152, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.298932384341637, + "philosophy": 0.3096774193548387, + "professional_medicine": 0.22878228782287824, + "nutrition": 0.380327868852459, + "global_facts": 0.1717171717171717, + "machine_learning": 0.21621621621621623, + "security_studies": 0.25, + "public_relations": 0.13761467889908258, + "professional_psychology": 0.3011456628477905, + "prehistory": 0.3126934984520124, + "anatomy": 0.26119402985074625, + "human_sexuality": 0.25384615384615383, + "college_medicine": 0.28488372093023256, + "high_school_government_and_politics": 0.3177083333333333, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.345679012345679, + "high_school_geography": 0.27918781725888325, + "elementary_mathematics": 0.2546419098143236, + "human_aging": 0.26576576576576577, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.2977941176470588, + "formal_logic": 0.336, + "high_school_statistics": 0.3209302325581395, + "international_law": 0.4666666666666667, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.29292929292929293, + "conceptual_physics": 0.2606837606837607, + "miscellaneous": 0.29539641943734013, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.4206008583690987, + "professional_law": 0.3111545988258317, + "management": 0.23529411764705882, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.35514018691588783, + "world_religions": 0.3941176470588235, + "sociology": 0.35, + "us_foreign_policy": 0.43434343434343436, + "high_school_macroeconomics": 0.3059125964010283, + "computer_security": 0.37373737373737376, + "moral_scenarios": 0.2785234899328859, + "moral_disputes": 0.37681159420289856, + "electrical_engineering": 0.25, + "astronomy": 0.2582781456953642, + "college_biology": 0.35664335664335667 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.274888558692422 + }, + "prompt_2": { + "accuracy": 0.25928677563150077 + }, + "prompt_3": { + "accuracy": 0.2659732540861813 + }, + "prompt_4": { + "accuracy": 0.2763744427934621 + }, + "prompt_5": { + "accuracy": 0.27340267459138184 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27459526774595266, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.125, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.3, + "business_administration": 0.39473684210526316, + "marxism": 0.375, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.36, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.375, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.11764705882352941, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.16666666666666666, + "tax_accountant": 0.25925925925925924, + "physician": 0.2777777777777778 + } + }, + "prompt_2": { + "accuracy": 0.2646326276463263, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2619047619047619, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.26666666666666666, + "business_administration": 0.3684210526315789, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.1836734693877551, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.375, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.14285714285714285, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.2692307692307692, + "sports_science": 0.2916666666666667, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.17647058823529413, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2222222222222222, + "physician": 0.14814814814814814 + } + }, + "prompt_3": { + "accuracy": 0.25965130759651306, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.21428571428571427, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.25, + "high_school_chemistry": 0.08333333333333333, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.35, + "business_administration": 0.3157894736842105, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.14285714285714285, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.37037037037037035, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.4, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.3076923076923077, + "sports_science": 0.25, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2962962962962963, + "fire_engineer": 0.1111111111111111, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2222222222222222, + "physician": 0.2037037037037037 + } + }, + "prompt_4": { + "accuracy": 0.25529265255292655, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.21428571428571427, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.125, + "high_school_chemistry": 0.125, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.13333333333333333, + "business_administration": 0.3684210526315789, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.10204081632653061, + "high_school_politics": 0.25, + "high_school_geography": 0.375, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.18518518518518517, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.25, + "high_school_chinese": 0.25, + "high_school_history": 0.32, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.3076923076923077, + "sports_science": 0.20833333333333334, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.375, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.2222222222222222, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2037037037037037, + "physician": 0.2222222222222222 + } + }, + "prompt_5": { + "accuracy": 0.263387297633873, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.2619047619047619, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.08333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.26666666666666666, + "business_administration": 0.3157894736842105, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.1836734693877551, + "high_school_politics": 0.25, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.3333333333333333, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.24, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.3269230769230769, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2222222222222222, + "physician": 0.2222222222222222 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3118279569892473 + }, + "prompt_2": { + "accuracy": 0.2867383512544803 + }, + "prompt_3": { + "accuracy": 0.26523297491039427 + }, + "prompt_4": { + "accuracy": 0.2867383512544803 + }, + "prompt_5": { + "accuracy": 0.2939068100358423 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2733552063546883, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.3719512195121951, + "arts": 0.26875, + "astronomy": 0.24848484848484848, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.2375, + "chinese_driving_rule": 0.32061068702290074, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.22910216718266255, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.26851851851851855, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.26373626373626374, + "computer_science": 0.29901960784313725, + "computer_security": 0.29239766081871343, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.28776978417266186, + "economics": 0.27044025157232704, + "education": 0.2822085889570552, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.21717171717171718, + "elementary_information_and_technology": 0.24369747899159663, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.2962962962962963, + "food_science": 0.42657342657342656, + "genetics": 0.23295454545454544, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2033898305084746, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.1888111888111888, + "human_sexuality": 0.30952380952380953, + "international_law": 0.21621621621621623, + "journalism": 0.22674418604651161, + "jurisprudence": 0.2944038929440389, + "legal_and_moral_basis": 0.34579439252336447, + "logical": 0.2764227642276423, + "machine_learning": 0.2459016393442623, + "management": 0.24761904761904763, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.27586206896551724, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.23222748815165878, + "professional_medicine": 0.28191489361702127, + "professional_psychology": 0.27586206896551724, + "public_relations": 0.27586206896551724, + "security_study": 0.32592592592592595, + "sociology": 0.2743362831858407, + "sports_science": 0.30303030303030304, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.28402366863905326, + "world_history": 0.2608695652173913, + "world_religions": 0.28125 + } + }, + "prompt_2": { + "accuracy": 0.26385771024002763, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.34146341463414637, + "arts": 0.23125, + "astronomy": 0.21818181818181817, + "business_ethics": 0.24401913875598086, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.29102167182662536, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.3037974683544304, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.205607476635514, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.18518518518518517, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.31135531135531136, + "computer_science": 0.27941176470588236, + "computer_security": 0.2631578947368421, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.2733812949640288, + "economics": 0.22012578616352202, + "education": 0.27607361963190186, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.22727272727272727, + "elementary_information_and_technology": 0.226890756302521, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.2518518518518518, + "food_science": 0.34965034965034963, + "genetics": 0.2215909090909091, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.2288135593220339, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.20279720279720279, + "human_sexuality": 0.2619047619047619, + "international_law": 0.2702702702702703, + "journalism": 0.22674418604651161, + "jurisprudence": 0.2725060827250608, + "legal_and_moral_basis": 0.3177570093457944, + "logical": 0.21951219512195122, + "machine_learning": 0.23770491803278687, + "management": 0.22857142857142856, + "marketing": 0.2611111111111111, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.27586206896551724, + "philosophy": 0.21904761904761905, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.24202127659574468, + "professional_psychology": 0.22413793103448276, + "public_relations": 0.2471264367816092, + "security_study": 0.3111111111111111, + "sociology": 0.27876106194690264, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.3076923076923077, + "world_history": 0.2236024844720497, + "world_religions": 0.26875 + } + }, + "prompt_3": { + "accuracy": 0.2646347781039544, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.3048780487804878, + "arts": 0.2375, + "astronomy": 0.2545454545454545, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.32061068702290074, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.28173374613003094, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.18994413407821228, + "clinical_knowledge": 0.24050632911392406, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.17592592592592593, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.27106227106227104, + "computer_science": 0.2549019607843137, + "computer_security": 0.27485380116959063, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2517985611510791, + "economics": 0.23270440251572327, + "education": 0.26380368098159507, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.21212121212121213, + "elementary_information_and_technology": 0.25210084033613445, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.25925925925925924, + "food_science": 0.3706293706293706, + "genetics": 0.2215909090909091, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.3, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.2619047619047619, + "international_law": 0.23783783783783785, + "journalism": 0.2616279069767442, + "jurisprudence": 0.25304136253041365, + "legal_and_moral_basis": 0.35046728971962615, + "logical": 0.24390243902439024, + "machine_learning": 0.3114754098360656, + "management": 0.2, + "marketing": 0.19444444444444445, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.25517241379310346, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.28160919540229884, + "security_study": 0.26666666666666666, + "sociology": 0.2610619469026549, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.3431952662721893, + "world_history": 0.2670807453416149, + "world_religions": 0.24375 + } + }, + "prompt_4": { + "accuracy": 0.2759454325677776, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.3170731707317073, + "arts": 0.275, + "astronomy": 0.2727272727272727, + "business_ethics": 0.23444976076555024, + "chinese_civil_service_exam": 0.23125, + "chinese_driving_rule": 0.32061068702290074, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.24458204334365324, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.329608938547486, + "clinical_knowledge": 0.28270042194092826, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.3055555555555556, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.26373626373626374, + "computer_science": 0.3137254901960784, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.26618705035971224, + "economics": 0.2893081761006289, + "education": 0.25766871165644173, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.21212121212121213, + "elementary_information_and_technology": 0.2647058823529412, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.3111111111111111, + "food_science": 0.3986013986013986, + "genetics": 0.20454545454545456, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.3431952662721893, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.23076923076923078, + "human_sexuality": 0.2857142857142857, + "international_law": 0.2648648648648649, + "journalism": 0.29069767441860467, + "jurisprudence": 0.24330900243309003, + "legal_and_moral_basis": 0.37850467289719625, + "logical": 0.23577235772357724, + "machine_learning": 0.22950819672131148, + "management": 0.2619047619047619, + "marketing": 0.2833333333333333, + "marxist_theory": 0.31216931216931215, + "modern_chinese": 0.20689655172413793, + "nutrition": 0.30344827586206896, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.2765957446808511, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.2988505747126437, + "security_study": 0.3037037037037037, + "sociology": 0.3274336283185841, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.3254437869822485, + "world_history": 0.2608695652173913, + "world_religions": 0.25 + } + }, + "prompt_5": { + "accuracy": 0.2712830253842169, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.3170731707317073, + "arts": 0.2375, + "astronomy": 0.2787878787878788, + "business_ethics": 0.27751196172248804, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.205607476635514, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.27941176470588236, + "chinese_teacher_qualification": 0.22346368715083798, + "clinical_knowledge": 0.22362869198312235, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.2830188679245283, + "college_law": 0.24074074074074073, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.27472527472527475, + "computer_science": 0.30392156862745096, + "computer_security": 0.28654970760233917, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2949640287769784, + "economics": 0.27044025157232704, + "education": 0.26993865030674846, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.25, + "elementary_commonsense": 0.24242424242424243, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.31851851851851853, + "food_science": 0.38461538461538464, + "genetics": 0.2159090909090909, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2033898305084746, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.30952380952380953, + "international_law": 0.25405405405405407, + "journalism": 0.27325581395348836, + "jurisprudence": 0.2871046228710462, + "legal_and_moral_basis": 0.3317757009345794, + "logical": 0.2601626016260163, + "machine_learning": 0.26229508196721313, + "management": 0.24761904761904763, + "marketing": 0.26666666666666666, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2689655172413793, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.26857142857142857, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.2553191489361702, + "professional_psychology": 0.2413793103448276, + "public_relations": 0.26436781609195403, + "security_study": 0.3111111111111111, + "sociology": 0.26548672566371684, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.28994082840236685, + "world_history": 0.2732919254658385, + "world_religions": 0.24375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.12121212121212122 + }, + "prompt_2": { + "accuracy": 0.15151515151515152 + }, + "prompt_3": { + "accuracy": 0.21212121212121213 + }, + "prompt_4": { + "accuracy": 0.09090909090909091 + }, + "prompt_5": { + "accuracy": 0.12121212121212122 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.12045454545454545 + }, + "prompt_2": { + "accuracy": 0.10681818181818181 + }, + "prompt_3": { + "accuracy": 0.1 + }, + "prompt_4": { + "accuracy": 0.16590909090909092 + }, + "prompt_5": { + "accuracy": 0.17727272727272728 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3183050847457627 + }, + "prompt_2": { + "accuracy": 0.32237288135593223 + }, + "prompt_3": { + "accuracy": 0.3271186440677966 + }, + "prompt_4": { + "accuracy": 0.3101694915254237 + }, + "prompt_5": { + "accuracy": 0.3247457627118644 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3451757666417352 + }, + "prompt_2": { + "accuracy": 0.3343305908750935 + }, + "prompt_3": { + "accuracy": 0.3395661929693343 + }, + "prompt_4": { + "accuracy": 0.3298429319371728 + }, + "prompt_5": { + "accuracy": 0.3365744203440538 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.43067123958843706 + }, + "prompt_2": { + "accuracy": 0.42626163645271925 + }, + "prompt_3": { + "accuracy": 0.46300832925036745 + }, + "prompt_4": { + "accuracy": 0.4429201371876531 + }, + "prompt_5": { + "accuracy": 0.458108770210681 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.24951239610416812, + "rouge2": 0.0778377335520455, + "rougeL": 0.19146771978168034, + "avg_rouge": 0.17293928314596466 + }, + "prompt_2": { + "rouge1": 0.23380448107747587, + "rouge2": 0.07380094831292337, + "rougeL": 0.17925792619854372, + "avg_rouge": 0.16228778519631434 + }, + "prompt_3": { + "rouge1": 0.22283258328620426, + "rouge2": 0.0736652239408231, + "rougeL": 0.1729814705348942, + "avg_rouge": 0.15649309258730718 + }, + "prompt_4": { + "rouge1": 0.24459098434458346, + "rouge2": 0.07716054553519137, + "rougeL": 0.18419980000823907, + "avg_rouge": 0.16865044329600465 + }, + "prompt_5": { + "rouge1": 0.24829476161030356, + "rouge2": 0.07769239727730436, + "rougeL": 0.18768219194312416, + "avg_rouge": 0.17122311694357736 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.22348626598014815, + "rouge2": 0.06002656719941176, + "rougeL": 0.16359196603794834, + "avg_rouge": 0.14903493307250273 + }, + "prompt_2": { + "rouge1": 0.22389865165462386, + "rouge2": 0.06011539614950573, + "rougeL": 0.16434962147848067, + "avg_rouge": 0.14945455642753677 + }, + "prompt_3": { + "rouge1": 0.2241403525660097, + "rouge2": 0.060523650055637396, + "rougeL": 0.16377682981747274, + "avg_rouge": 0.1494802774797066 + }, + "prompt_4": { + "rouge1": 0.2237100726521081, + "rouge2": 0.060536823647126946, + "rougeL": 0.1648987975831146, + "avg_rouge": 0.14971523129411654 + }, + "prompt_5": { + "rouge1": 0.20996971401463865, + "rouge2": 0.05503358450515352, + "rougeL": 0.15486258975753345, + "avg_rouge": 0.13995529609244187 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49770642201834864 + }, + "prompt_2": { + "accuracy": 0.5447247706422018 + }, + "prompt_3": { + "accuracy": 0.5458715596330275 + }, + "prompt_4": { + "accuracy": 0.5068807339449541 + }, + "prompt_5": { + "accuracy": 0.7970183486238532 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.44870565675934804 + }, + "prompt_2": { + "accuracy": 0.4813039309683605 + }, + "prompt_3": { + "accuracy": 0.3595397890699904 + }, + "prompt_4": { + "accuracy": 0.4582933844678811 + }, + "prompt_5": { + "accuracy": 0.5522531160115053 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.578 + }, + "prompt_2": { + "accuracy": 0.492 + }, + "prompt_3": { + "accuracy": 0.5 + }, + "prompt_4": { + "accuracy": 0.6005 + }, + "prompt_5": { + "accuracy": 0.503 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3465 + }, + "prompt_2": { + "accuracy": 0.354 + }, + "prompt_3": { + "accuracy": 0.3615 + }, + "prompt_4": { + "accuracy": 0.349 + }, + "prompt_5": { + "accuracy": 0.35 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5215 + }, + "prompt_2": { + "accuracy": 0.521 + }, + "prompt_3": { + "accuracy": 0.502 + }, + "prompt_4": { + "accuracy": 0.5275 + }, + "prompt_5": { + "accuracy": 0.501 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4507042253521127 + }, + "prompt_2": { + "accuracy": 0.5774647887323944 + }, + "prompt_3": { + "accuracy": 0.5492957746478874 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5090252707581228 + }, + "prompt_2": { + "accuracy": 0.48375451263537905 + }, + "prompt_3": { + "accuracy": 0.48014440433212996 + }, + "prompt_4": { + "accuracy": 0.5126353790613718 + }, + "prompt_5": { + "accuracy": 0.4729241877256318 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4681372549019608 + }, + "prompt_2": { + "accuracy": 0.4117647058823529 + }, + "prompt_3": { + "accuracy": 0.47058823529411764 + }, + "prompt_4": { + "accuracy": 0.5784313725490197 + }, + "prompt_5": { + "accuracy": 0.47058823529411764 + } } }, "five_shot": { @@ -3384,53 +29359,1733 @@ "model_link": "https://huggingface.co/huggyllama/llama-13b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3457142857142857, + "language_acc": { + "Malay": 0.3466666666666667, + "English": 0.48, + "Vietnamese": 0.26666666666666666, + "Spanish": 0.32, + "Indonesian": 0.3466666666666667, + "Filipino": 0.30666666666666664, + "Chinese": 0.35333333333333333 + }, + "consistency_score_2": 0.40126984126984133, + "consistency_score_3": 0.20647619047619048, + "consistency_score_4": 0.12457142857142856, + "consistency_score_5": 0.08412698412698412, + "consistency_score_6": 0.060952380952380945, + "consistency_score_7": 0.04666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4, + "Malay,Vietnamese": 0.5133333333333333, + "Malay,Spanish": 0.36666666666666664, + "Malay,Indonesian": 0.5933333333333334, + "Malay,Filipino": 0.4533333333333333, + "Malay,Chinese": 0.48, + "English,Vietnamese": 0.2733333333333333, + "English,Spanish": 0.42, + "English,Indonesian": 0.3933333333333333, + "English,Filipino": 0.43333333333333335, + "English,Chinese": 0.3933333333333333, + "Vietnamese,Spanish": 0.24666666666666667, + "Vietnamese,Indonesian": 0.44, + "Vietnamese,Filipino": 0.34, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.36, + "Spanish,Filipino": 0.37333333333333335, + "Spanish,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.4066666666666667, + "Indonesian,Chinese": 0.38, + "Filipino,Chinese": 0.37333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.18, + "Malay,English,Spanish": 0.22, + "Malay,English,Indonesian": 0.24666666666666667, + "Malay,English,Filipino": 0.22, + "Malay,English,Chinese": 0.26, + "Malay,Vietnamese,Spanish": 0.16666666666666666, + "Malay,Vietnamese,Indonesian": 0.3466666666666667, + "Malay,Vietnamese,Filipino": 0.24, + "Malay,Vietnamese,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian": 0.25333333333333335, + "Malay,Spanish,Filipino": 0.21333333333333335, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.30666666666666664, + "Malay,Indonesian,Chinese": 0.29333333333333333, + "Malay,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish": 0.12, + "English,Vietnamese,Indonesian": 0.15333333333333332, + "English,Vietnamese,Filipino": 0.14, + "English,Vietnamese,Chinese": 0.16, + "English,Spanish,Indonesian": 0.19333333333333333, + "English,Spanish,Filipino": 0.21333333333333335, + "English,Spanish,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino": 0.19333333333333333, + "English,Indonesian,Chinese": 0.2, + "English,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian": 0.14, + "Vietnamese,Spanish,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino": 0.2, + "Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino": 0.18, + "Spanish,Indonesian,Chinese": 0.19333333333333333, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.1, + "Malay,English,Vietnamese,Indonesian": 0.13333333333333333, + "Malay,English,Vietnamese,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian": 0.15333333333333332, + "Malay,English,Spanish,Filipino": 0.14, + "Malay,English,Spanish,Chinese": 0.14, + "Malay,English,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino": 0.1, + "Malay,Vietnamese,Spanish,Chinese": 0.14, + "Malay,Vietnamese,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Indonesian,Chinese": 0.18, + "Malay,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino": 0.16, + "Malay,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian": 0.08, + "English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.08666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino": 0.12, + "English,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + } + }, + "AC3_2": 0.3714261428681322, + "AC3_3": 0.25854038333661183, + "AC3_4": 0.18314875885708082, + "AC3_5": 0.1353239079659855, + "AC3_6": 0.10363332215584795, + "AC3_7": 0.08223300968778041 + }, + "prompt_2": { + "overall_acc": 0.3142857142857142, + "language_acc": { + "Malay": 0.25333333333333335, + "English": 0.36, + "Vietnamese": 0.31333333333333335, + "Spanish": 0.38666666666666666, + "Indonesian": 0.26666666666666666, + "Filipino": 0.32, + "Chinese": 0.3 + }, + "consistency_score_2": 0.4546031746031747, + "consistency_score_3": 0.252, + "consistency_score_4": 0.14990476190476193, + "consistency_score_5": 0.09301587301587301, + "consistency_score_6": 0.060000000000000005, + "consistency_score_7": 0.04, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.41333333333333333, + "Malay,Vietnamese": 0.48, + "Malay,Spanish": 0.6466666666666666, + "Malay,Indonesian": 0.66, + "Malay,Filipino": 0.5933333333333334, + "Malay,Chinese": 0.2733333333333333, + "English,Vietnamese": 0.4066666666666667, + "English,Spanish": 0.41333333333333333, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.4, + "English,Chinese": 0.36, + "Vietnamese,Spanish": 0.4666666666666667, + "Vietnamese,Indonesian": 0.46, + "Vietnamese,Filipino": 0.47333333333333333, + "Vietnamese,Chinese": 0.32, + "Spanish,Indonesian": 0.62, + "Spanish,Filipino": 0.64, + "Spanish,Chinese": 0.3466666666666667, + "Indonesian,Filipino": 0.6266666666666667, + "Indonesian,Chinese": 0.2733333333333333, + "Filipino,Chinese": 0.24666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24666666666666667, + "Malay,English,Spanish": 0.28, + "Malay,English,Indonesian": 0.29333333333333333, + "Malay,English,Filipino": 0.25333333333333335, + "Malay,English,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish": 0.3333333333333333, + "Malay,Vietnamese,Indonesian": 0.3466666666666667, + "Malay,Vietnamese,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian": 0.4866666666666667, + "Malay,Spanish,Filipino": 0.48, + "Malay,Spanish,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino": 0.47333333333333333, + "Malay,Indonesian,Chinese": 0.18, + "Malay,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.24, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.16, + "English,Spanish,Indonesian": 0.2866666666666667, + "English,Spanish,Filipino": 0.26666666666666666, + "English,Spanish,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.12666666666666668, + "English,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian": 0.34, + "Vietnamese,Spanish,Filipino": 0.34, + "Vietnamese,Spanish,Chinese": 0.14, + "Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Filipino,Chinese": 0.12, + "Spanish,Indonesian,Filipino": 0.4866666666666667, + "Spanish,Indonesian,Chinese": 0.18, + "Spanish,Filipino,Chinese": 0.17333333333333334, + "Indonesian,Filipino,Chinese": 0.16 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino": 0.18, + "Malay,English,Vietnamese,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian": 0.22666666666666666, + "Malay,English,Spanish,Filipino": 0.2, + "Malay,English,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Indonesian,Chinese": 0.08, + "Malay,English,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.08, + "Malay,Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.38666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Spanish,Filipino,Chinese": 0.07333333333333333, + "English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.06, + "Malay,English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + } + }, + "AC3_2": 0.3716409106512051, + "AC3_3": 0.279717457064631, + "AC3_4": 0.20298962420153663, + "AC3_5": 0.14354748910850437, + "AC3_6": 0.10076335875170445, + "AC3_7": 0.07096774191545265 + }, + "prompt_3": { + "overall_acc": 0.3314285714285714, + "language_acc": { + "Malay": 0.29333333333333333, + "English": 0.4533333333333333, + "Vietnamese": 0.28, + "Spanish": 0.4, + "Indonesian": 0.2733333333333333, + "Filipino": 0.32, + "Chinese": 0.3 + }, + "consistency_score_2": 0.4371428571428571, + "consistency_score_3": 0.23447619047619042, + "consistency_score_4": 0.136, + "consistency_score_5": 0.08317460317460316, + "consistency_score_6": 0.05238095238095238, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.43333333333333335, + "Malay,Vietnamese": 0.42, + "Malay,Spanish": 0.6133333333333333, + "Malay,Indonesian": 0.7533333333333333, + "Malay,Filipino": 0.5866666666666667, + "Malay,Chinese": 0.35333333333333333, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.38, + "English,Indonesian": 0.35333333333333333, + "English,Filipino": 0.36, + "English,Chinese": 0.4, + "Vietnamese,Spanish": 0.36, + "Vietnamese,Indonesian": 0.41333333333333333, + "Vietnamese,Filipino": 0.41333333333333333, + "Vietnamese,Chinese": 0.32, + "Spanish,Indonesian": 0.6333333333333333, + "Spanish,Filipino": 0.5866666666666667, + "Spanish,Chinese": 0.2733333333333333, + "Indonesian,Filipino": 0.6133333333333333, + "Indonesian,Chinese": 0.29333333333333333, + "Filipino,Chinese": 0.25333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22, + "Malay,English,Spanish": 0.26, + "Malay,English,Indonesian": 0.32, + "Malay,English,Filipino": 0.26, + "Malay,English,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish": 0.26666666666666666, + "Malay,Vietnamese,Indonesian": 0.3466666666666667, + "Malay,Vietnamese,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Chinese": 0.18, + "Malay,Spanish,Indonesian": 0.5266666666666666, + "Malay,Spanish,Filipino": 0.42, + "Malay,Spanish,Chinese": 0.18, + "Malay,Indonesian,Filipino": 0.48, + "Malay,Indonesian,Chinese": 0.22666666666666666, + "Malay,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish": 0.16666666666666666, + "English,Vietnamese,Indonesian": 0.18, + "English,Vietnamese,Filipino": 0.18, + "English,Vietnamese,Chinese": 0.16, + "English,Spanish,Indonesian": 0.23333333333333334, + "English,Spanish,Filipino": 0.24, + "English,Spanish,Chinese": 0.15333333333333332, + "English,Indonesian,Filipino": 0.22, + "English,Indonesian,Chinese": 0.14, + "English,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Vietnamese,Spanish,Filipino": 0.24, + "Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino": 0.3, + "Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Filipino,Chinese": 0.14, + "Spanish,Indonesian,Filipino": 0.43333333333333335, + "Spanish,Indonesian,Chinese": 0.15333333333333332, + "Spanish,Filipino,Chinese": 0.12, + "Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.17333333333333334, + "Malay,English,Vietnamese,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.20666666666666667, + "Malay,English,Spanish,Chinese": 0.1, + "Malay,English,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Filipino,Chinese": 0.1, + "Malay,Spanish,Indonesian,Filipino": 0.36, + "Malay,Spanish,Indonesian,Chinese": 0.12, + "Malay,Spanish,Filipino,Chinese": 0.1, + "Malay,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.12, + "English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.07333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Spanish,Indonesian,Chinese": 0.07333333333333333, + "English,Spanish,Filipino,Chinese": 0.08666666666666667, + "English,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino": 0.1, + "Malay,English,Vietnamese,Spanish,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.08, + "Malay,English,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + } + }, + "AC3_2": 0.37701540090686736, + "AC3_3": 0.2746473048512127, + "AC3_4": 0.1928606356555616, + "AC3_5": 0.1329774666054238, + "AC3_6": 0.09046437431177197, + "AC3_7": 0.06057441251603052 + }, + "prompt_4": { + "overall_acc": 0.3238095238095238, + "language_acc": { + "Malay": 0.2733333333333333, + "English": 0.44666666666666666, + "Vietnamese": 0.26666666666666666, + "Spanish": 0.36666666666666664, + "Indonesian": 0.26, + "Filipino": 0.3466666666666667, + "Chinese": 0.30666666666666664 + }, + "consistency_score_2": 0.4222222222222222, + "consistency_score_3": 0.2302857142857143, + "consistency_score_4": 0.14590476190476193, + "consistency_score_5": 0.10190476190476191, + "consistency_score_6": 0.07619047619047618, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.42, + "Malay,Vietnamese": 0.48, + "Malay,Spanish": 0.3466666666666667, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Filipino": 0.4, + "Malay,Chinese": 0.46, + "English,Vietnamese": 0.36, + "English,Spanish": 0.42, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.44666666666666666, + "English,Chinese": 0.36666666666666664, + "Vietnamese,Spanish": 0.26666666666666666, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Filipino": 0.4, + "Vietnamese,Chinese": 0.4666666666666667, + "Spanish,Indonesian": 0.38, + "Spanish,Filipino": 0.38, + "Spanish,Chinese": 0.32666666666666666, + "Indonesian,Filipino": 0.43333333333333335, + "Indonesian,Chinese": 0.5266666666666666, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.23333333333333334, + "Malay,English,Spanish": 0.21333333333333335, + "Malay,English,Indonesian": 0.30666666666666664, + "Malay,English,Filipino": 0.22666666666666666, + "Malay,English,Chinese": 0.22, + "Malay,Vietnamese,Spanish": 0.16666666666666666, + "Malay,Vietnamese,Indonesian": 0.36, + "Malay,Vietnamese,Filipino": 0.22, + "Malay,Vietnamese,Chinese": 0.3, + "Malay,Spanish,Indonesian": 0.25333333333333335, + "Malay,Spanish,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino": 0.29333333333333333, + "Malay,Indonesian,Chinese": 0.34, + "Malay,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish": 0.12, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Filipino": 0.18666666666666668, + "English,Vietnamese,Chinese": 0.21333333333333335, + "English,Spanish,Indonesian": 0.20666666666666667, + "English,Spanish,Filipino": 0.22, + "English,Spanish,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino": 0.22666666666666666, + "English,Indonesian,Chinese": 0.26666666666666666, + "English,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian": 0.16, + "Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Filipino": 0.24, + "Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "Vietnamese,Filipino,Chinese": 0.26, + "Spanish,Indonesian,Filipino": 0.22, + "Spanish,Indonesian,Chinese": 0.22, + "Spanish,Filipino,Chinese": 0.19333333333333333, + "Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.1, + "Malay,English,Vietnamese,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Chinese": 0.16, + "Malay,English,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Chinese": 0.12, + "Malay,English,Indonesian,Filipino": 0.18, + "Malay,English,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.1, + "Malay,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.16, + "Malay,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.12, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino": 0.13333333333333333, + "English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.3665248226459055, + "AC3_3": 0.2691548396115924, + "AC3_4": 0.2011663383497884, + "AC3_5": 0.15502290397977786, + "AC3_6": 0.12335600903945577, + "AC3_7": 0.10124069476270404 + }, + "prompt_5": { + "overall_acc": 0.3380952380952381, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.44, + "Vietnamese": 0.26666666666666666, + "Spanish": 0.36, + "Indonesian": 0.2733333333333333, + "Filipino": 0.3466666666666667, + "Chinese": 0.35333333333333333 + }, + "consistency_score_2": 0.47015873015873016, + "consistency_score_3": 0.287047619047619, + "consistency_score_4": 0.1986666666666667, + "consistency_score_5": 0.14888888888888888, + "consistency_score_6": 0.11714285714285713, + "consistency_score_7": 0.09333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.44, + "Malay,Vietnamese": 0.5333333333333333, + "Malay,Spanish": 0.38666666666666666, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Filipino": 0.5266666666666666, + "Malay,Chinese": 0.56, + "English,Vietnamese": 0.37333333333333335, + "English,Spanish": 0.46, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.5066666666666667, + "English,Chinese": 0.4066666666666667, + "Vietnamese,Spanish": 0.3, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Filipino": 0.54, + "Vietnamese,Chinese": 0.5, + "Spanish,Indonesian": 0.42, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.35333333333333333, + "Indonesian,Filipino": 0.5133333333333333, + "Indonesian,Chinese": 0.5533333333333333, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.32, + "Malay,English,Filipino": 0.32, + "Malay,English,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish": 0.21333333333333335, + "Malay,Vietnamese,Indonesian": 0.4266666666666667, + "Malay,Vietnamese,Filipino": 0.36666666666666664, + "Malay,Vietnamese,Chinese": 0.34, + "Malay,Spanish,Indonesian": 0.29333333333333333, + "Malay,Spanish,Filipino": 0.24666666666666667, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.4066666666666667, + "Malay,Filipino,Chinese": 0.3333333333333333, + "English,Vietnamese,Spanish": 0.19333333333333333, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian": 0.25333333333333335, + "English,Spanish,Filipino": 0.28, + "English,Spanish,Chinese": 0.22, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Filipino,Chinese": 0.32666666666666666, + "Spanish,Indonesian,Filipino": 0.26, + "Spanish,Indonesian,Chinese": 0.24, + "Spanish,Filipino,Chinese": 0.21333333333333335, + "Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian": 0.20666666666666667, + "Malay,English,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Chinese": 0.18666666666666668, + "Malay,English,Indonesian,Filipino": 0.26, + "Malay,English,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.14, + "Malay,Vietnamese,Indonesian,Filipino": 0.3, + "Malay,Vietnamese,Indonesian,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.24, + "Malay,Spanish,Indonesian,Filipino": 0.2, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.26, + "English,Vietnamese,Spanish,Indonesian": 0.16, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Spanish,Filipino,Chinese": 0.16666666666666666, + "English,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.14, + "Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + } + }, + "AC3_2": 0.3933378221186936, + "AC3_3": 0.31048721738453433, + "AC3_4": 0.25027206051642087, + "AC3_5": 0.20673620160900352, + "AC3_6": 0.1739988045045161, + "AC3_7": 0.14628403234284074 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.28490259740259744, + "language_acc": { + "English": 0.3352272727272727, + "Vietnamese": 0.2840909090909091, + "Chinese": 0.2840909090909091, + "Indonesian": 0.2727272727272727, + "Filipino": 0.30113636363636365, + "Spanish": 0.25, + "Malay": 0.26704545454545453 + }, + "consistency_score_2": 0.3330627705627706, + "consistency_score_3": 0.14139610389610388, + "consistency_score_4": 0.07564935064935065, + "consistency_score_5": 0.04816017316017315, + "consistency_score_6": 0.033279220779220776, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.30113636363636365, + "English,Chinese": 0.3806818181818182, + "English,Indonesian": 0.3352272727272727, + "English,Filipino": 0.36363636363636365, + "English,Spanish": 0.3125, + "English,Malay": 0.26136363636363635, + "Vietnamese,Chinese": 0.3522727272727273, + "Vietnamese,Indonesian": 0.3181818181818182, + "Vietnamese,Filipino": 0.35795454545454547, + "Vietnamese,Spanish": 0.2784090909090909, + "Vietnamese,Malay": 0.3125, + "Chinese,Indonesian": 0.3522727272727273, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,Spanish": 0.3352272727272727, + "Chinese,Malay": 0.35795454545454547, + "Indonesian,Filipino": 0.3522727272727273, + "Indonesian,Spanish": 0.25, + "Indonesian,Malay": 0.38636363636363635, + "Filipino,Spanish": 0.29545454545454547, + "Filipino,Malay": 0.4034090909090909, + "Spanish,Malay": 0.2897727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.16477272727272727, + "English,Vietnamese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.09659090909090909, + "English,Vietnamese,Malay": 0.08522727272727272, + "English,Chinese,Indonesian": 0.1534090909090909, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.14772727272727273, + "English,Chinese,Malay": 0.13636363636363635, + "English,Indonesian,Filipino": 0.1534090909090909, + "English,Indonesian,Spanish": 0.10795454545454546, + "English,Indonesian,Malay": 0.14204545454545456, + "English,Filipino,Spanish": 0.11931818181818182, + "English,Filipino,Malay": 0.14204545454545456, + "English,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "Vietnamese,Chinese,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino": 0.18181818181818182, + "Chinese,Indonesian,Spanish": 0.13068181818181818, + "Chinese,Indonesian,Malay": 0.19318181818181818, + "Chinese,Filipino,Spanish": 0.125, + "Chinese,Filipino,Malay": 0.20454545454545456, + "Chinese,Spanish,Malay": 0.14772727272727273, + "Indonesian,Filipino,Spanish": 0.11931818181818182, + "Indonesian,Filipino,Malay": 0.19318181818181818, + "Indonesian,Spanish,Malay": 0.125, + "Filipino,Spanish,Malay": 0.13068181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Indonesian,Malay": 0.0625, + "English,Vietnamese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Filipino,Malay": 0.0625, + "English,Vietnamese,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Indonesian,Spanish,Malay": 0.0625, + "English,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Malay": 0.125, + "Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.30710603970739425, + "AC3_3": 0.1889947922920426, + "AC3_4": 0.11955390396468958, + "AC3_5": 0.08239262765627324, + "AC3_6": 0.059596971886914894, + "AC3_7": 0.04209642598775508 + }, + "prompt_2": { + "overall_acc": 0.2922077922077922, + "language_acc": { + "English": 0.3409090909090909, + "Vietnamese": 0.3125, + "Chinese": 0.2897727272727273, + "Indonesian": 0.2784090909090909, + "Filipino": 0.2556818181818182, + "Spanish": 0.32954545454545453, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.3871753246753247, + "consistency_score_3": 0.18165584415584418, + "consistency_score_4": 0.09366883116883115, + "consistency_score_5": 0.05113636363636363, + "consistency_score_6": 0.029220779220779227, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3409090909090909, + "English,Chinese": 0.32954545454545453, + "English,Indonesian": 0.4147727272727273, + "English,Filipino": 0.38636363636363635, + "English,Spanish": 0.4090909090909091, + "English,Malay": 0.375, + "Vietnamese,Chinese": 0.3068181818181818, + "Vietnamese,Indonesian": 0.3068181818181818, + "Vietnamese,Filipino": 0.2556818181818182, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Malay": 0.32386363636363635, + "Chinese,Indonesian": 0.20454545454545456, + "Chinese,Filipino": 0.17045454545454544, + "Chinese,Spanish": 0.23295454545454544, + "Chinese,Malay": 0.2784090909090909, + "Indonesian,Filipino": 0.6363636363636364, + "Indonesian,Spanish": 0.5397727272727273, + "Indonesian,Malay": 0.6761363636363636, + "Filipino,Spanish": 0.5284090909090909, + "Filipino,Malay": 0.6136363636363636, + "Spanish,Malay": 0.4659090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13636363636363635, + "English,Vietnamese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Filipino": 0.14772727272727273, + "English,Vietnamese,Spanish": 0.1534090909090909, + "English,Vietnamese,Malay": 0.14204545454545456, + "English,Chinese,Indonesian": 0.09659090909090909, + "English,Chinese,Filipino": 0.09090909090909091, + "English,Chinese,Spanish": 0.11363636363636363, + "English,Chinese,Malay": 0.11363636363636363, + "English,Indonesian,Filipino": 0.2897727272727273, + "English,Indonesian,Spanish": 0.2727272727272727, + "English,Indonesian,Malay": 0.2784090909090909, + "English,Filipino,Spanish": 0.2556818181818182, + "English,Filipino,Malay": 0.2556818181818182, + "English,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian": 0.07954545454545454, + "Vietnamese,Chinese,Filipino": 0.056818181818181816, + "Vietnamese,Chinese,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino": 0.08522727272727272, + "Chinese,Indonesian,Spanish": 0.09659090909090909, + "Chinese,Indonesian,Malay": 0.13636363636363635, + "Chinese,Filipino,Spanish": 0.06818181818181818, + "Chinese,Filipino,Malay": 0.10227272727272728, + "Chinese,Spanish,Malay": 0.09659090909090909, + "Indonesian,Filipino,Spanish": 0.4090909090909091, + "Indonesian,Filipino,Malay": 0.5056818181818182, + "Indonesian,Spanish,Malay": 0.3806818181818182, + "Filipino,Spanish,Malay": 0.3693181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Malay": 0.06818181818181818, + "English,Chinese,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Filipino,Malay": 0.05113636363636364, + "English,Chinese,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Indonesian,Spanish,Malay": 0.1875, + "English,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.125, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "Indonesian,Filipino,Spanish,Malay": 0.32386363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.3330540426824391, + "AC3_3": 0.2240359845231356, + "AC3_4": 0.14186276488027125, + "AC3_5": 0.08704061893016173, + "AC3_6": 0.053128689475796936, + "AC3_7": 0.03221188259516737 + }, + "prompt_3": { + "overall_acc": 0.2865259740259741, + "language_acc": { + "English": 0.3181818181818182, + "Vietnamese": 0.26704545454545453, + "Chinese": 0.2556818181818182, + "Indonesian": 0.2897727272727273, + "Filipino": 0.29545454545454547, + "Spanish": 0.3068181818181818, + "Malay": 0.2727272727272727 + }, + "consistency_score_2": 0.38149350649350644, + "consistency_score_3": 0.1720779220779221, + "consistency_score_4": 0.08084415584415586, + "consistency_score_5": 0.03733766233766234, + "consistency_score_6": 0.016233766233766232, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3352272727272727, + "English,Chinese": 0.35795454545454547, + "English,Indonesian": 0.4431818181818182, + "English,Filipino": 0.36363636363636365, + "English,Spanish": 0.4375, + "English,Malay": 0.4659090909090909, + "Vietnamese,Chinese": 0.26136363636363635, + "Vietnamese,Indonesian": 0.30113636363636365, + "Vietnamese,Filipino": 0.21022727272727273, + "Vietnamese,Spanish": 0.24431818181818182, + "Vietnamese,Malay": 0.35795454545454547, + "Chinese,Indonesian": 0.23863636363636365, + "Chinese,Filipino": 0.14204545454545456, + "Chinese,Spanish": 0.2215909090909091, + "Chinese,Malay": 0.2727272727272727, + "Indonesian,Filipino": 0.6306818181818182, + "Indonesian,Spanish": 0.5340909090909091, + "Indonesian,Malay": 0.6534090909090909, + "Filipino,Spanish": 0.5511363636363636, + "Filipino,Malay": 0.5511363636363636, + "Spanish,Malay": 0.4375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.10227272727272728, + "English,Vietnamese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Filipino": 0.10227272727272728, + "English,Vietnamese,Spanish": 0.13068181818181818, + "English,Vietnamese,Malay": 0.16477272727272727, + "English,Chinese,Indonesian": 0.14204545454545456, + "English,Chinese,Filipino": 0.045454545454545456, + "English,Chinese,Spanish": 0.125, + "English,Chinese,Malay": 0.14204545454545456, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.29545454545454547, + "English,Indonesian,Malay": 0.3125, + "English,Filipino,Spanish": 0.26704545454545453, + "English,Filipino,Malay": 0.2556818181818182, + "English,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian": 0.07954545454545454, + "Vietnamese,Chinese,Filipino": 0.022727272727272728, + "Vietnamese,Chinese,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino": 0.07954545454545454, + "Chinese,Indonesian,Spanish": 0.10227272727272728, + "Chinese,Indonesian,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish": 0.056818181818181816, + "Chinese,Filipino,Malay": 0.07954545454545454, + "Chinese,Spanish,Malay": 0.09659090909090909, + "Indonesian,Filipino,Spanish": 0.39204545454545453, + "Indonesian,Filipino,Malay": 0.44886363636363635, + "Indonesian,Spanish,Malay": 0.3522727272727273, + "Filipino,Spanish,Malay": 0.32386363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish": 0.0625, + "English,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.21022727272727273, + "English,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.0625, + "Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.2727272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + } + }, + "AC3_2": 0.3272593141483563, + "AC3_3": 0.21502126187703077, + "AC3_4": 0.12610688027052067, + "AC3_5": 0.06606613935401069, + "AC3_6": 0.030726645997998493, + "AC3_7": 0.011142676763863496 + }, + "prompt_4": { + "overall_acc": 0.28652597402597396, + "language_acc": { + "English": 0.3068181818181818, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.3181818181818182, + "Indonesian": 0.29545454545454547, + "Filipino": 0.32386363636363635, + "Spanish": 0.2159090909090909, + "Malay": 0.2840909090909091 + }, + "consistency_score_2": 0.3601190476190476, + "consistency_score_3": 0.16542207792207791, + "consistency_score_4": 0.08198051948051945, + "consistency_score_5": 0.039772727272727265, + "consistency_score_6": 0.018668831168831168, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3181818181818182, + "English,Chinese": 0.4715909090909091, + "English,Indonesian": 0.4034090909090909, + "English,Filipino": 0.4318181818181818, + "English,Spanish": 0.2215909090909091, + "English,Malay": 0.38636363636363635, + "Vietnamese,Chinese": 0.35795454545454547, + "Vietnamese,Indonesian": 0.3465909090909091, + "Vietnamese,Filipino": 0.3181818181818182, + "Vietnamese,Spanish": 0.24431818181818182, + "Vietnamese,Malay": 0.3465909090909091, + "Chinese,Indonesian": 0.4318181818181818, + "Chinese,Filipino": 0.4090909090909091, + "Chinese,Spanish": 0.30113636363636365, + "Chinese,Malay": 0.4602272727272727, + "Indonesian,Filipino": 0.4090909090909091, + "Indonesian,Spanish": 0.25, + "Indonesian,Malay": 0.5170454545454546, + "Filipino,Spanish": 0.22727272727272727, + "Filipino,Malay": 0.4602272727272727, + "Spanish,Malay": 0.25 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17613636363636365, + "English,Vietnamese,Indonesian": 0.1590909090909091, + "English,Vietnamese,Filipino": 0.1534090909090909, + "English,Vietnamese,Spanish": 0.07954545454545454, + "English,Vietnamese,Malay": 0.14772727272727273, + "English,Chinese,Indonesian": 0.26704545454545453, + "English,Chinese,Filipino": 0.23863636363636365, + "English,Chinese,Spanish": 0.13068181818181818, + "English,Chinese,Malay": 0.2556818181818182, + "English,Indonesian,Filipino": 0.24431818181818182, + "English,Indonesian,Spanish": 0.09659090909090909, + "English,Indonesian,Malay": 0.23863636363636365, + "English,Filipino,Spanish": 0.09090909090909091, + "English,Filipino,Malay": 0.23863636363636365, + "English,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian": 0.1875, + "Vietnamese,Chinese,Filipino": 0.1590909090909091, + "Vietnamese,Chinese,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Filipino,Malay": 0.1875, + "Vietnamese,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino": 0.22727272727272727, + "Chinese,Indonesian,Spanish": 0.11363636363636363, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.11363636363636363, + "Chinese,Filipino,Malay": 0.2556818181818182, + "Chinese,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish": 0.09090909090909091, + "Indonesian,Filipino,Malay": 0.2897727272727273, + "Indonesian,Spanish,Malay": 0.125, + "Filipino,Spanish,Malay": 0.09090909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.03977272727272727, + "English,Vietnamese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish": 0.0625, + "English,Chinese,Indonesian,Malay": 0.17613636363636365, + "English,Chinese,Filipino,Spanish": 0.0625, + "English,Chinese,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Indonesian,Filipino,Malay": 0.17045454545454544, + "English,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.3191347877569835, + "AC3_3": 0.20974853984760414, + "AC3_4": 0.1274851249725563, + "AC3_5": 0.06984961553716412, + "AC3_6": 0.03505370957679837, + "AC3_7": 0.02186029228892121 + }, + "prompt_5": { + "overall_acc": 0.28165584415584416, + "language_acc": { + "English": 0.2556818181818182, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.3181818181818182, + "Indonesian": 0.29545454545454547, + "Filipino": 0.26704545454545453, + "Spanish": 0.2840909090909091, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.42261904761904756, + "consistency_score_3": 0.22581168831168832, + "consistency_score_4": 0.13360389610389609, + "consistency_score_5": 0.08279220779220776, + "consistency_score_6": 0.05275974025974026, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4659090909090909, + "English,Chinese": 0.5170454545454546, + "English,Indonesian": 0.4318181818181818, + "English,Filipino": 0.3693181818181818, + "English,Spanish": 0.3409090909090909, + "English,Malay": 0.38636363636363635, + "Vietnamese,Chinese": 0.5056818181818182, + "Vietnamese,Indonesian": 0.48295454545454547, + "Vietnamese,Filipino": 0.42045454545454547, + "Vietnamese,Spanish": 0.3806818181818182, + "Vietnamese,Malay": 0.5568181818181818, + "Chinese,Indonesian": 0.45454545454545453, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,Spanish": 0.36363636363636365, + "Chinese,Malay": 0.4602272727272727, + "Indonesian,Filipino": 0.4147727272727273, + "Indonesian,Spanish": 0.32954545454545453, + "Indonesian,Malay": 0.5625, + "Filipino,Spanish": 0.3125, + "Filipino,Malay": 0.4090909090909091, + "Spanish,Malay": 0.375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3125, + "English,Vietnamese,Indonesian": 0.2556818181818182, + "English,Vietnamese,Filipino": 0.20454545454545456, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.2897727272727273, + "English,Chinese,Indonesian": 0.2897727272727273, + "English,Chinese,Filipino": 0.20454545454545456, + "English,Chinese,Spanish": 0.19886363636363635, + "English,Chinese,Malay": 0.26704545454545453, + "English,Indonesian,Filipino": 0.21022727272727273, + "English,Indonesian,Spanish": 0.17613636363636365, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.14204545454545456, + "English,Filipino,Malay": 0.1875, + "English,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian": 0.29545454545454547, + "Vietnamese,Chinese,Filipino": 0.20454545454545456, + "Vietnamese,Chinese,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Filipino": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Spanish,Malay": 0.24431818181818182, + "Chinese,Indonesian,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish": 0.125, + "Chinese,Filipino,Malay": 0.19318181818181818, + "Chinese,Spanish,Malay": 0.21022727272727273, + "Indonesian,Filipino,Spanish": 0.1534090909090909, + "Indonesian,Filipino,Malay": 0.26136363636363635, + "Indonesian,Spanish,Malay": 0.2159090909090909, + "Filipino,Spanish,Malay": 0.16477272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.19318181818181818, + "English,Vietnamese,Chinese,Filipino": 0.13068181818181818, + "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay": 0.2159090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Filipino,Malay": 0.13068181818181818, + "English,Vietnamese,Spanish,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Filipino": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish": 0.125, + "English,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Indonesian,Filipino,Malay": 0.14204545454545456, + "English,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.125, + "Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.10227272727272728 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.33803029466689244, + "AC3_3": 0.2506610871040151, + "AC3_4": 0.1812374977986228, + "AC3_5": 0.12796835677511467, + "AC3_6": 0.0888719896342493, + "AC3_7": 0.06082028509408187 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4563106796116505 + }, + "prompt_2": { + "accuracy": 0.33980582524271846 + }, + "prompt_3": { + "accuracy": 0.2621359223300971 + }, + "prompt_4": { + "accuracy": 0.36893203883495146 + }, + "prompt_5": { + "accuracy": 0.4077669902912621 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3047619047619048 + }, + "prompt_2": { + "accuracy": 0.3238095238095238 + }, + "prompt_3": { + "accuracy": 0.3523809523809524 + }, + "prompt_4": { + "accuracy": 0.26666666666666666 + }, + "prompt_5": { + "accuracy": 0.34285714285714286 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4672897196261682 + }, + "prompt_2": { + "accuracy": 0.27102803738317754 + }, + "prompt_3": { + "accuracy": 0.308411214953271 + }, + "prompt_4": { + "accuracy": 0.29906542056074764 + }, + "prompt_5": { + "accuracy": 0.42990654205607476 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.7, + "culture": 0.3, + "film": 0.3, + "law": 0.1, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.1, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.6, + "culture": 0.4, + "film": 0.3, + "law": 0.2, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.5, + "culture": 0.3, + "film": 0.2, + "law": 0.5, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.5, + "politics": 0.8, + "culture": 0.4, + "film": 0.1, + "law": 0.3, + "geography": 0.2 + } + }, + "prompt_5": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.2, + "literature": 0.4, + "politics": 0.7, + "culture": 0.4, + "film": 0.2, + "law": 0.2, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06969720932343763 + }, + "prompt_2": { + "bleu_score": 0.2621274965360322 + }, + "prompt_3": { + "bleu_score": 0.25324253330052143 + }, + "prompt_4": { + "bleu_score": 0.06785809734091724 + }, + "prompt_5": { + "bleu_score": 0.050878707870262926 + } }, "indommlu": { "prompt_1": -1, @@ -3440,179 +31095,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07003183714753798 + }, + "prompt_2": { + "bleu_score": 0.06743279892244594 + }, + "prompt_3": { + "bleu_score": 0.065805194170142 + }, + "prompt_4": { + "bleu_score": 0.09243853790160195 + }, + "prompt_5": { + "bleu_score": 0.0635046502775598 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.03458197044096182 + }, + "prompt_2": { + "bleu_score": 0.03214726190019902 + }, + "prompt_3": { + "bleu_score": 0.030266400390973575 + }, + "prompt_4": { + "bleu_score": 0.044546316113437116 + }, + "prompt_5": { + "bleu_score": 0.03115076671412243 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.042403480989990555 + }, + "prompt_2": { + "bleu_score": 0.042444921762706304 + }, + "prompt_3": { + "bleu_score": 0.04069350796820028 + }, + "prompt_4": { + "bleu_score": 0.055348471502305034 + }, + "prompt_5": { + "bleu_score": 0.042780709788686505 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06797457642076879 + }, + "prompt_2": { + "bleu_score": 0.062340692539453194 + }, + "prompt_3": { + "bleu_score": 0.062330986982318745 + }, + "prompt_4": { + "bleu_score": 0.08890251302727019 + }, + "prompt_5": { + "bleu_score": 0.061522179783370115 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3792298716452742 + }, + "prompt_2": { + "accuracy": 0.34655775962660446 + }, + "prompt_3": { + "accuracy": 0.3313885647607935 + }, + "prompt_4": { + "accuracy": 0.37456242707117854 + }, + "prompt_5": { + "accuracy": 0.39323220536756126 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38240972470504114, + "category_acc": { + "high_school_european_history": 0.45121951219512196, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.45454545454545453, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.4482758620689655, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.4576271186440678, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.4050632911392405, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.43042071197411, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3167259786476868, + "philosophy": 0.45483870967741935, + "professional_medicine": 0.39114391143911437, + "nutrition": 0.28524590163934427, + "global_facts": 0.30303030303030304, + "machine_learning": 0.36936936936936937, + "security_studies": 0.28688524590163933, + "public_relations": 0.3486238532110092, + "professional_psychology": 0.3862520458265139, + "prehistory": 0.44272445820433437, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.3923076923076923, + "college_medicine": 0.4186046511627907, + "high_school_government_and_politics": 0.4895833333333333, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.4506172839506173, + "high_school_geography": 0.4720812182741117, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.4889705882352941, + "formal_logic": 0.416, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.22304832713754646, + "high_school_computer_science": 0.3939393939393939, + "conceptual_physics": 0.3504273504273504, + "miscellaneous": 0.45524296675191817, + "high_school_chemistry": 0.2722772277227723, + "marketing": 0.6094420600858369, + "professional_law": 0.3222439660795825, + "management": 0.47058823529411764, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.4953271028037383, + "world_religions": 0.4764705882352941, + "sociology": 0.565, + "us_foreign_policy": 0.5656565656565656, + "high_school_macroeconomics": 0.4190231362467866, + "computer_security": 0.45454545454545453, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.4144927536231884, + "electrical_engineering": 0.2916666666666667, + "astronomy": 0.4768211920529801, + "college_biology": 0.4195804195804196 + } + }, + "prompt_2": { + "accuracy": 0.3589560243117626, + "category_acc": { + "high_school_european_history": 0.45121951219512196, + "business_ethics": 0.42424242424242425, + "clinical_knowledge": 0.4621212121212121, + "medical_genetics": 0.43434343434343436, + "high_school_us_history": 0.39901477832512317, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.4025423728813559, + "virology": 0.3393939393939394, + "high_school_microeconomics": 0.38396624472573837, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.43042071197411, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.298932384341637, + "philosophy": 0.36451612903225805, + "professional_medicine": 0.5055350553505535, + "nutrition": 0.36065573770491804, + "global_facts": 0.21212121212121213, + "machine_learning": 0.27927927927927926, + "security_studies": 0.4385245901639344, + "public_relations": 0.30275229357798167, + "professional_psychology": 0.33060556464811786, + "prehistory": 0.3591331269349845, + "anatomy": 0.3283582089552239, + "human_sexuality": 0.4076923076923077, + "college_medicine": 0.38953488372093026, + "high_school_government_and_politics": 0.4375, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.36419753086419754, + "high_school_geography": 0.3553299492385787, + "elementary_mathematics": 0.2519893899204244, + "human_aging": 0.2972972972972973, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.47058823529411764, + "formal_logic": 0.336, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.38333333333333336, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.29292929292929293, + "conceptual_physics": 0.2692307692307692, + "miscellaneous": 0.36445012787723785, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.5107296137339056, + "professional_law": 0.33463796477495106, + "management": 0.4215686274509804, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.37383177570093457, + "world_religions": 0.4470588235294118, + "sociology": 0.445, + "us_foreign_policy": 0.5050505050505051, + "high_school_macroeconomics": 0.34704370179948585, + "computer_security": 0.3939393939393939, + "moral_scenarios": 0.2740492170022371, + "moral_disputes": 0.34782608695652173, + "electrical_engineering": 0.3819444444444444, + "astronomy": 0.3708609271523179, + "college_biology": 0.3006993006993007 + } + }, + "prompt_3": { + "accuracy": 0.35774043618162316, + "category_acc": { + "high_school_european_history": 0.4573170731707317, + "business_ethics": 0.3838383838383838, + "clinical_knowledge": 0.42803030303030304, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.3694581280788177, + "high_school_physics": 0.32, + "high_school_world_history": 0.3940677966101695, + "virology": 0.3333333333333333, + "high_school_microeconomics": 0.33755274261603374, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.4174757281553398, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.23487544483985764, + "philosophy": 0.4, + "professional_medicine": 0.4944649446494465, + "nutrition": 0.4262295081967213, + "global_facts": 0.26262626262626265, + "machine_learning": 0.22522522522522523, + "security_studies": 0.430327868852459, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.3453355155482815, + "prehistory": 0.37770897832817335, + "anatomy": 0.3283582089552239, + "human_sexuality": 0.3923076923076923, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.46875, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.42592592592592593, + "high_school_geography": 0.467005076142132, + "elementary_mathematics": 0.23076923076923078, + "human_aging": 0.3108108108108108, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.43566176470588236, + "formal_logic": 0.304, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.44166666666666665, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.30303030303030304, + "conceptual_physics": 0.25213675213675213, + "miscellaneous": 0.38618925831202044, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.463519313304721, + "professional_law": 0.31180691454664056, + "management": 0.4803921568627451, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.4205607476635514, + "world_religions": 0.4647058823529412, + "sociology": 0.43, + "us_foreign_policy": 0.46464646464646464, + "high_school_macroeconomics": 0.34704370179948585, + "computer_security": 0.3939393939393939, + "moral_scenarios": 0.2662192393736018, + "moral_disputes": 0.32753623188405795, + "electrical_engineering": 0.3472222222222222, + "astronomy": 0.39072847682119205, + "college_biology": 0.3916083916083916 + } + }, + "prompt_4": { + "accuracy": 0.40486235252055774, + "category_acc": { + "high_school_european_history": 0.5182926829268293, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.4621212121212121, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.47783251231527096, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.423728813559322, + "virology": 0.34545454545454546, + "high_school_microeconomics": 0.4008438818565401, + "econometrics": 0.21238938053097345, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.47249190938511326, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.35587188612099646, + "philosophy": 0.4967741935483871, + "professional_medicine": 0.4317343173431734, + "nutrition": 0.26885245901639343, + "global_facts": 0.2828282828282828, + "machine_learning": 0.36036036036036034, + "security_studies": 0.3401639344262295, + "public_relations": 0.4036697247706422, + "professional_psychology": 0.397708674304419, + "prehistory": 0.46439628482972134, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.5153846153846153, + "college_medicine": 0.4069767441860465, + "high_school_government_and_politics": 0.5, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.46296296296296297, + "high_school_geography": 0.5532994923857868, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.3963963963963964, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.5643382352941176, + "formal_logic": 0.384, + "high_school_statistics": 0.27906976744186046, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3504273504273504, + "miscellaneous": 0.5613810741687979, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.6695278969957081, + "professional_law": 0.30724070450097846, + "management": 0.45098039215686275, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.5327102803738317, + "world_religions": 0.5764705882352941, + "sociology": 0.555, + "us_foreign_policy": 0.6060606060606061, + "high_school_macroeconomics": 0.39588688946015427, + "computer_security": 0.47474747474747475, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.42318840579710143, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.44370860927152317, + "college_biology": 0.4195804195804196 + } + }, + "prompt_5": { + "accuracy": 0.39949946371111905, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.45454545454545453, + "clinical_knowledge": 0.4621212121212121, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.4630541871921182, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.3898305084745763, + "virology": 0.4, + "high_school_microeconomics": 0.4050632911392405, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.459546925566343, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.3309608540925267, + "philosophy": 0.49032258064516127, + "professional_medicine": 0.3726937269372694, + "nutrition": 0.3475409836065574, + "global_facts": 0.3333333333333333, + "machine_learning": 0.35135135135135137, + "security_studies": 0.29918032786885246, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.3878887070376432, + "prehistory": 0.4674922600619195, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.5, + "college_medicine": 0.436046511627907, + "high_school_government_and_politics": 0.5052083333333334, + "college_chemistry": 0.2828282828282828, + "logical_fallacies": 0.4382716049382716, + "high_school_geography": 0.5583756345177665, + "elementary_mathematics": 0.2891246684350133, + "human_aging": 0.34684684684684686, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.5441176470588235, + "formal_logic": 0.344, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.5166666666666667, + "high_school_mathematics": 0.20817843866171004, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.5485933503836317, + "high_school_chemistry": 0.3217821782178218, + "marketing": 0.6351931330472103, + "professional_law": 0.3150684931506849, + "management": 0.46078431372549017, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.5823529411764706, + "sociology": 0.5, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.4190231362467866, + "computer_security": 0.5050505050505051, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.4260869565217391, + "electrical_engineering": 0.4375, + "astronomy": 0.4304635761589404, + "college_biology": 0.44755244755244755 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2711738484398217 + }, + "prompt_2": { + "accuracy": 0.28454680534918275 + }, + "prompt_3": { + "accuracy": 0.2934621099554235 + }, + "prompt_4": { + "accuracy": 0.287518573551263 + }, + "prompt_5": { + "accuracy": 0.2704309063893016 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.25, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.30952380952380953, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.08333333333333333, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.26666666666666666, + "business_administration": 0.3684210526315789, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.10714285714285714, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.37037037037037035, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.25, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.2, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.19230769230769232, + "sports_science": 0.20833333333333334, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.25, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.24074074074074073, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2777777777777778, + "physician": 0.24074074074074073 + } + }, + "prompt_2": { + "accuracy": 0.2833125778331258, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.375, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.35714285714285715, + "college_physics": 0.125, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.5416666666666666, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.26666666666666666, + "business_administration": 0.2894736842105263, + "marxism": 0.375, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.2222222222222222, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.25, + "high_school_chinese": 0.08333333333333333, + "high_school_history": 0.28, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.34615384615384615, + "sports_science": 0.25, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.2037037037037037, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.35185185185185186, + "physician": 0.25925925925925924 + } + }, + "prompt_3": { + "accuracy": 0.29514321295143214, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.35714285714285715, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.25, + "business_administration": 0.47368421052631576, + "marxism": 0.375, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.2962962962962963, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.36, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.3076923076923077, + "sports_science": 0.20833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.25, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.25925925925925924, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2962962962962963, + "physician": 0.3148148148148148 + } + }, + "prompt_4": { + "accuracy": 0.28829389788293897, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.30952380952380953, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.25, + "college_economics": 0.2833333333333333, + "business_administration": 0.4473684210526316, + "marxism": 0.5, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.5185185185185185, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.17647058823529413, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.2, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.3076923076923077, + "sports_science": 0.20833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.25, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.14814814814814814, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.25925925925925924, + "physician": 0.3148148148148148 + } + }, + "prompt_5": { + "accuracy": 0.26650062266500624, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.30952380952380953, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.375, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.125, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.25, + "college_economics": 0.23333333333333334, + "business_administration": 0.34210526315789475, + "marxism": 0.375, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.08333333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.37037037037037035, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.10714285714285714, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.24, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.17307692307692307, + "sports_science": 0.2916666666666667, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.24074074074074073, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.25925925925925924, + "physician": 0.14814814814814814 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25448028673835127 + }, + "prompt_2": { + "accuracy": 0.25806451612903225 + }, + "prompt_3": { + "accuracy": 0.2616487455197133 + }, + "prompt_4": { + "accuracy": 0.27956989247311825 + }, + "prompt_5": { + "accuracy": 0.3225806451612903 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2680020721809705, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.3048780487804878, + "arts": 0.2875, + "astronomy": 0.2606060606060606, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.20588235294117646, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.25386996904024767, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.24050632911392406, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.2897196261682243, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.26851851851851855, + "college_mathematics": 0.2, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.28205128205128205, + "computer_science": 0.27941176470588236, + "computer_security": 0.2631578947368421, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.2805755395683453, + "economics": 0.25157232704402516, + "education": 0.3496932515337423, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.25396825396825395, + "elementary_commonsense": 0.21717171717171718, + "elementary_information_and_technology": 0.25630252100840334, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.3037037037037037, + "food_science": 0.35664335664335667, + "genetics": 0.2784090909090909, + "global_facts": 0.3288590604026846, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2857142857142857, + "international_law": 0.2810810810810811, + "journalism": 0.26744186046511625, + "jurisprudence": 0.23114355231143552, + "legal_and_moral_basis": 0.3037383177570093, + "logical": 0.3008130081300813, + "machine_learning": 0.21311475409836064, + "management": 0.24761904761904763, + "marketing": 0.24444444444444444, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.1896551724137931, + "nutrition": 0.31724137931034485, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.28191489361702127, + "professional_psychology": 0.23275862068965517, + "public_relations": 0.3045977011494253, + "security_study": 0.24444444444444444, + "sociology": 0.3008849557522124, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.24324324324324326, + "virology": 0.30177514792899407, + "world_history": 0.2422360248447205, + "world_religions": 0.2625 + } + }, + "prompt_2": { + "accuracy": 0.28121222586772576, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.2804878048780488, + "arts": 0.28125, + "astronomy": 0.296969696969697, + "business_ethics": 0.2535885167464115, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.3126934984520124, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.33519553072625696, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.22429906542056074, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.23443223443223443, + "computer_science": 0.27450980392156865, + "computer_security": 0.29239766081871343, + "conceptual_physics": 0.22448979591836735, + "construction_project_management": 0.2733812949640288, + "economics": 0.2893081761006289, + "education": 0.3128834355828221, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.20238095238095238, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.3403361344537815, + "elementary_mathematics": 0.2217391304347826, + "ethnology": 0.31851851851851853, + "food_science": 0.32867132867132864, + "genetics": 0.30113636363636365, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.21818181818181817, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.35714285714285715, + "international_law": 0.2918918918918919, + "journalism": 0.32558139534883723, + "jurisprudence": 0.26763990267639903, + "legal_and_moral_basis": 0.4158878504672897, + "logical": 0.2682926829268293, + "machine_learning": 0.2459016393442623, + "management": 0.26666666666666666, + "marketing": 0.28888888888888886, + "marxist_theory": 0.3439153439153439, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.2827586206896552, + "philosophy": 0.3047619047619048, + "professional_accounting": 0.34285714285714286, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.27586206896551724, + "security_study": 0.24444444444444444, + "sociology": 0.3407079646017699, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.28402366863905326, + "world_history": 0.2732919254658385, + "world_religions": 0.2875 + } + }, + "prompt_3": { + "accuracy": 0.284061474702124, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.27439024390243905, + "arts": 0.225, + "astronomy": 0.3151515151515151, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.18125, + "chinese_driving_rule": 0.3282442748091603, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.30959752321981426, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.2737430167597765, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.22429906542056074, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.3148148148148148, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.26373626373626374, + "computer_science": 0.29411764705882354, + "computer_security": 0.2982456140350877, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.2949640287769784, + "economics": 0.32075471698113206, + "education": 0.26993865030674846, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.23809523809523808, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.35294117647058826, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.2814814814814815, + "food_science": 0.34265734265734266, + "genetics": 0.2897727272727273, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.22727272727272727, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.3888888888888889, + "international_law": 0.2756756756756757, + "journalism": 0.313953488372093, + "jurisprudence": 0.2749391727493917, + "legal_and_moral_basis": 0.4158878504672897, + "logical": 0.2845528455284553, + "machine_learning": 0.26229508196721313, + "management": 0.28095238095238095, + "marketing": 0.34444444444444444, + "marxist_theory": 0.3492063492063492, + "modern_chinese": 0.25, + "nutrition": 0.32413793103448274, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.3314285714285714, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.25, + "public_relations": 0.3160919540229885, + "security_study": 0.23703703703703705, + "sociology": 0.28761061946902655, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.24324324324324326, + "virology": 0.28402366863905326, + "world_history": 0.2919254658385093, + "world_religions": 0.31875 + } + }, + "prompt_4": { + "accuracy": 0.2729235019858401, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2682926829268293, + "arts": 0.275, + "astronomy": 0.28484848484848485, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.23125, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.28792569659442724, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.22362869198312235, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.3425925925925926, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.31868131868131866, + "computer_science": 0.27941176470588236, + "computer_security": 0.2982456140350877, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.26618705035971224, + "economics": 0.2830188679245283, + "education": 0.3128834355828221, + "electrical_engineering": 0.25, + "elementary_chinese": 0.23809523809523808, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.2814814814814815, + "food_science": 0.34265734265734266, + "genetics": 0.32386363636363635, + "global_facts": 0.31543624161073824, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.21818181818181817, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.3968253968253968, + "international_law": 0.2702702702702703, + "journalism": 0.23255813953488372, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.32242990654205606, + "logical": 0.2845528455284553, + "machine_learning": 0.2459016393442623, + "management": 0.23809523809523808, + "marketing": 0.28888888888888886, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.32413793103448274, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.3028571428571429, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.24202127659574468, + "professional_psychology": 0.2672413793103448, + "public_relations": 0.23563218390804597, + "security_study": 0.3111111111111111, + "sociology": 0.26991150442477874, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.28994082840236685, + "world_history": 0.21739130434782608, + "world_religions": 0.2625 + } + }, + "prompt_5": { + "accuracy": 0.2603177344154723, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.2804878048780488, + "arts": 0.20625, + "astronomy": 0.30303030303030304, + "business_ethics": 0.22966507177033493, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.22900763358778625, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.25077399380804954, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.24022346368715083, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.35185185185185186, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.29304029304029305, + "computer_science": 0.25, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.2446043165467626, + "economics": 0.2893081761006289, + "education": 0.26993865030674846, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.18686868686868688, + "elementary_information_and_technology": 0.31932773109243695, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.3037037037037037, + "food_science": 0.2867132867132867, + "genetics": 0.25, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.1952662721893491, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.23636363636363636, + "high_school_politics": 0.23076923076923078, + "human_sexuality": 0.29365079365079366, + "international_law": 0.22702702702702704, + "journalism": 0.3081395348837209, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.3317757009345794, + "logical": 0.23577235772357724, + "machine_learning": 0.2540983606557377, + "management": 0.2761904761904762, + "marketing": 0.24444444444444444, + "marxist_theory": 0.24338624338624337, + "modern_chinese": 0.25, + "nutrition": 0.2620689655172414, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.27586206896551724, + "security_study": 0.23703703703703705, + "sociology": 0.26548672566371684, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.2603550295857988, + "world_history": 0.21739130434782608, + "world_religions": 0.20625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30303030303030304 + }, + "prompt_2": { + "accuracy": 0.42424242424242425 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.30303030303030304 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.1318181818181818 + }, + "prompt_2": { + "accuracy": 0.11363636363636363 + }, + "prompt_3": { + "accuracy": 0.10454545454545454 + }, + "prompt_4": { + "accuracy": 0.26136363636363635 + }, + "prompt_5": { + "accuracy": 0.19545454545454546 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35322033898305083 + }, + "prompt_2": { + "accuracy": 0.3342372881355932 + }, + "prompt_3": { + "accuracy": 0.328135593220339 + }, + "prompt_4": { + "accuracy": 0.3325423728813559 + }, + "prompt_5": { + "accuracy": 0.33491525423728813 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3335826477187734 + }, + "prompt_2": { + "accuracy": 0.3545250560957367 + }, + "prompt_3": { + "accuracy": 0.39715781600598354 + }, + "prompt_4": { + "accuracy": 0.3605086013462977 + }, + "prompt_5": { + "accuracy": 0.3556469708302169 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6295933365997061 + }, + "prompt_2": { + "accuracy": 0.5551200391964723 + }, + "prompt_3": { + "accuracy": 0.5908868201861832 + }, + "prompt_4": { + "accuracy": 0.6354728074473297 + }, + "prompt_5": { + "accuracy": 0.6643802057814797 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2609319550832433, + "rouge2": 0.09358952325864871, + "rougeL": 0.2038893253639436, + "avg_rouge": 0.18613693456861188 + }, + "prompt_2": { + "rouge1": 0.24324491788750943, + "rouge2": 0.09386877967387884, + "rougeL": 0.18886733918598472, + "avg_rouge": 0.1753270122491243 + }, + "prompt_3": { + "rouge1": 0.22720844667540735, + "rouge2": 0.08337107854855111, + "rougeL": 0.1763041008880063, + "avg_rouge": 0.1622945420373216 + }, + "prompt_4": { + "rouge1": 0.27169487508441365, + "rouge2": 0.09687097548820628, + "rougeL": 0.2103021914619331, + "avg_rouge": 0.19295601401151766 + }, + "prompt_5": { + "rouge1": 0.2707602539030733, + "rouge2": 0.08937029934630224, + "rougeL": 0.20597808832307926, + "avg_rouge": 0.1887028805241516 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.22630391800005498, + "rouge2": 0.06146314515305739, + "rougeL": 0.16765033762719583, + "avg_rouge": 0.15180580026010274 + }, + "prompt_2": { + "rouge1": 0.23481044574230941, + "rouge2": 0.06400175627232876, + "rougeL": 0.17277359759711033, + "avg_rouge": 0.1571952665372495 + }, + "prompt_3": { + "rouge1": 0.2352924673823909, + "rouge2": 0.06417462507199803, + "rougeL": 0.1733172217541741, + "avg_rouge": 0.15759477140285436 + }, + "prompt_4": { + "rouge1": 0.22806790033054172, + "rouge2": 0.06204092220616914, + "rougeL": 0.16781780362581905, + "avg_rouge": 0.1526422087208433 + }, + "prompt_5": { + "rouge1": 0.22367964952399105, + "rouge2": 0.06013676104190433, + "rougeL": 0.1648641161184796, + "avg_rouge": 0.14956017556145831 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6502293577981652 + }, + "prompt_2": { + "accuracy": 0.5011467889908257 + }, + "prompt_3": { + "accuracy": 0.5722477064220184 + }, + "prompt_4": { + "accuracy": 0.6685779816513762 + }, + "prompt_5": { + "accuracy": 0.6490825688073395 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6826462128475551 + }, + "prompt_2": { + "accuracy": 0.5215723873441994 + }, + "prompt_3": { + "accuracy": 0.6030680728667306 + }, + "prompt_4": { + "accuracy": 0.6682646212847555 + }, + "prompt_5": { + "accuracy": 0.6778523489932886 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4755 + }, + "prompt_2": { + "accuracy": 0.51 + }, + "prompt_3": { + "accuracy": 0.502 + }, + "prompt_4": { + "accuracy": 0.484 + }, + "prompt_5": { + "accuracy": 0.4355 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3555 + }, + "prompt_2": { + "accuracy": 0.3605 + }, + "prompt_3": { + "accuracy": 0.352 + }, + "prompt_4": { + "accuracy": 0.3475 + }, + "prompt_5": { + "accuracy": 0.3415 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4945 + }, + "prompt_2": { + "accuracy": 0.4775 + }, + "prompt_3": { + "accuracy": 0.4975 + }, + "prompt_4": { + "accuracy": 0.4875 + }, + "prompt_5": { + "accuracy": 0.513 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49295774647887325 + }, + "prompt_2": { + "accuracy": 0.43661971830985913 + }, + "prompt_3": { + "accuracy": 0.38028169014084506 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.4507042253521127 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.51985559566787 + }, + "prompt_2": { + "accuracy": 0.5054151624548736 + }, + "prompt_3": { + "accuracy": 0.5342960288808665 + }, + "prompt_4": { + "accuracy": 0.5270758122743683 + }, + "prompt_5": { + "accuracy": 0.5234657039711191 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6642156862745098 + }, + "prompt_2": { + "accuracy": 0.6299019607843137 + }, + "prompt_3": { + "accuracy": 0.6127450980392157 + }, + "prompt_4": { + "accuracy": 0.5784313725490197 + }, + "prompt_5": { + "accuracy": 0.6299019607843137 + } } }, "five_shot": { @@ -3722,53 +32567,1733 @@ "model_link": "https://huggingface.co/meta-llama/Llama-2-7b-hf", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.29714285714285715, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.3466666666666667, + "Vietnamese": 0.2866666666666667, + "Spanish": 0.29333333333333333, + "Indonesian": 0.28, + "Filipino": 0.3, + "Chinese": 0.26 + }, + "consistency_score_2": 0.673015873015873, + "consistency_score_3": 0.535809523809524, + "consistency_score_4": 0.4502857142857143, + "consistency_score_5": 0.3892063492063492, + "consistency_score_6": 0.34285714285714286, + "consistency_score_7": 0.30666666666666664, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6133333333333333, + "Malay,Vietnamese": 0.6733333333333333, + "Malay,Spanish": 0.66, + "Malay,Indonesian": 0.74, + "Malay,Filipino": 0.7333333333333333, + "Malay,Chinese": 0.6466666666666666, + "English,Vietnamese": 0.6133333333333333, + "English,Spanish": 0.64, + "English,Indonesian": 0.56, + "English,Filipino": 0.6133333333333333, + "English,Chinese": 0.62, + "Vietnamese,Spanish": 0.7266666666666667, + "Vietnamese,Indonesian": 0.68, + "Vietnamese,Filipino": 0.7733333333333333, + "Vietnamese,Chinese": 0.6533333333333333, + "Spanish,Indonesian": 0.7, + "Spanish,Filipino": 0.72, + "Spanish,Chinese": 0.6666666666666666, + "Indonesian,Filipino": 0.7066666666666667, + "Indonesian,Chinese": 0.6866666666666666, + "Filipino,Chinese": 0.7066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.48, + "Malay,English,Spanish": 0.4866666666666667, + "Malay,English,Indonesian": 0.47333333333333333, + "Malay,English,Filipino": 0.5066666666666667, + "Malay,English,Chinese": 0.46, + "Malay,Vietnamese,Spanish": 0.5533333333333333, + "Malay,Vietnamese,Indonesian": 0.5733333333333334, + "Malay,Vietnamese,Filipino": 0.6133333333333333, + "Malay,Vietnamese,Chinese": 0.52, + "Malay,Spanish,Indonesian": 0.5666666666666667, + "Malay,Spanish,Filipino": 0.58, + "Malay,Spanish,Chinese": 0.5133333333333333, + "Malay,Indonesian,Filipino": 0.6133333333333333, + "Malay,Indonesian,Chinese": 0.56, + "Malay,Filipino,Chinese": 0.5666666666666667, + "English,Vietnamese,Spanish": 0.5333333333333333, + "English,Vietnamese,Indonesian": 0.46, + "English,Vietnamese,Filipino": 0.5266666666666666, + "English,Vietnamese,Chinese": 0.47333333333333333, + "English,Spanish,Indonesian": 0.4666666666666667, + "English,Spanish,Filipino": 0.52, + "English,Spanish,Chinese": 0.49333333333333335, + "English,Indonesian,Filipino": 0.47333333333333333, + "English,Indonesian,Chinese": 0.46, + "English,Filipino,Chinese": 0.49333333333333335, + "Vietnamese,Spanish,Indonesian": 0.5666666666666667, + "Vietnamese,Spanish,Filipino": 0.6333333333333333, + "Vietnamese,Spanish,Chinese": 0.5733333333333334, + "Vietnamese,Indonesian,Filipino": 0.6, + "Vietnamese,Indonesian,Chinese": 0.5466666666666666, + "Vietnamese,Filipino,Chinese": 0.5866666666666667, + "Spanish,Indonesian,Filipino": 0.58, + "Spanish,Indonesian,Chinese": 0.5533333333333333, + "Spanish,Filipino,Chinese": 0.5733333333333334, + "Indonesian,Filipino,Chinese": 0.5733333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.43333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.41333333333333333, + "Malay,English,Vietnamese,Filipino": 0.44666666666666666, + "Malay,English,Vietnamese,Chinese": 0.3933333333333333, + "Malay,English,Spanish,Indonesian": 0.42, + "Malay,English,Spanish,Filipino": 0.4533333333333333, + "Malay,English,Spanish,Chinese": 0.4066666666666667, + "Malay,English,Indonesian,Filipino": 0.44, + "Malay,English,Indonesian,Chinese": 0.3933333333333333, + "Malay,English,Filipino,Chinese": 0.4266666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.48, + "Malay,Vietnamese,Spanish,Filipino": 0.5066666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.4533333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.5266666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.4666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.4866666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.5, + "Malay,Spanish,Indonesian,Chinese": 0.46, + "Malay,Spanish,Filipino,Chinese": 0.47333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.5, + "English,Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "English,Vietnamese,Spanish,Filipino": 0.47333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.44, + "English,Vietnamese,Indonesian,Filipino": 0.42, + "English,Vietnamese,Indonesian,Chinese": 0.38666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.42, + "English,Spanish,Indonesian,Filipino": 0.41333333333333333, + "English,Spanish,Indonesian,Chinese": 0.3933333333333333, + "English,Spanish,Filipino,Chinese": 0.43333333333333335, + "English,Indonesian,Filipino,Chinese": 0.4, + "Vietnamese,Spanish,Indonesian,Filipino": 0.5066666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.48, + "Vietnamese,Spanish,Filipino,Chinese": 0.5133333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.5, + "Spanish,Indonesian,Filipino,Chinese": 0.49333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.4066666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.36666666666666664, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.3933333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.3466666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.37333333333333335, + "Malay,English,Spanish,Indonesian,Filipino": 0.3933333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.35333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.38666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.37333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.44, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.42, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.44, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.4266666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.37333333333333335, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.3933333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.35333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.36, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.44 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.35333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.32, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.3466666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.34, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.38, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.30666666666666664 + } + }, + "AC3_2": 0.41226626772115527, + "AC3_3": 0.3822834928161481, + "AC3_4": 0.3580253385278963, + "AC3_5": 0.3370001321036682, + "AC3_6": 0.3183673468890306, + "AC3_7": 0.3018296529468579 + }, + "prompt_2": { + "overall_acc": 0.2809523809523809, + "language_acc": { + "Malay": 0.24, + "English": 0.3333333333333333, + "Vietnamese": 0.2866666666666667, + "Spanish": 0.3, + "Indonesian": 0.2733333333333333, + "Filipino": 0.26, + "Chinese": 0.2733333333333333 + }, + "consistency_score_2": 0.73015873015873, + "consistency_score_3": 0.6106666666666667, + "consistency_score_4": 0.5281904761904762, + "consistency_score_5": 0.46412698412698417, + "consistency_score_6": 0.4133333333333334, + "consistency_score_7": 0.37333333333333335, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6933333333333334, + "Malay,Vietnamese": 0.7266666666666667, + "Malay,Spanish": 0.7, + "Malay,Indonesian": 0.76, + "Malay,Filipino": 0.7333333333333333, + "Malay,Chinese": 0.6866666666666666, + "English,Vietnamese": 0.7333333333333333, + "English,Spanish": 0.76, + "English,Indonesian": 0.7533333333333333, + "English,Filipino": 0.7, + "English,Chinese": 0.72, + "Vietnamese,Spanish": 0.7533333333333333, + "Vietnamese,Indonesian": 0.74, + "Vietnamese,Filipino": 0.7333333333333333, + "Vietnamese,Chinese": 0.6933333333333334, + "Spanish,Indonesian": 0.7333333333333333, + "Spanish,Filipino": 0.7666666666666667, + "Spanish,Chinese": 0.7466666666666667, + "Indonesian,Filipino": 0.7466666666666667, + "Indonesian,Chinese": 0.7333333333333333, + "Filipino,Chinese": 0.72 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.6, + "Malay,English,Spanish": 0.5866666666666667, + "Malay,English,Indonesian": 0.6133333333333333, + "Malay,English,Filipino": 0.5733333333333334, + "Malay,English,Chinese": 0.56, + "Malay,Vietnamese,Spanish": 0.6133333333333333, + "Malay,Vietnamese,Indonesian": 0.6133333333333333, + "Malay,Vietnamese,Filipino": 0.6133333333333333, + "Malay,Vietnamese,Chinese": 0.5666666666666667, + "Malay,Spanish,Indonesian": 0.6066666666666667, + "Malay,Spanish,Filipino": 0.6066666666666667, + "Malay,Spanish,Chinese": 0.5866666666666667, + "Malay,Indonesian,Filipino": 0.6266666666666667, + "Malay,Indonesian,Chinese": 0.6, + "Malay,Filipino,Chinese": 0.58, + "English,Vietnamese,Spanish": 0.6533333333333333, + "English,Vietnamese,Indonesian": 0.6333333333333333, + "English,Vietnamese,Filipino": 0.6, + "English,Vietnamese,Chinese": 0.5933333333333334, + "English,Spanish,Indonesian": 0.6333333333333333, + "English,Spanish,Filipino": 0.6266666666666667, + "English,Spanish,Chinese": 0.6266666666666667, + "English,Indonesian,Filipino": 0.6133333333333333, + "English,Indonesian,Chinese": 0.62, + "English,Filipino,Chinese": 0.58, + "Vietnamese,Spanish,Indonesian": 0.6333333333333333, + "Vietnamese,Spanish,Filipino": 0.6533333333333333, + "Vietnamese,Spanish,Chinese": 0.62, + "Vietnamese,Indonesian,Filipino": 0.6266666666666667, + "Vietnamese,Indonesian,Chinese": 0.5933333333333334, + "Vietnamese,Filipino,Chinese": 0.5866666666666667, + "Spanish,Indonesian,Filipino": 0.6466666666666666, + "Spanish,Indonesian,Chinese": 0.6266666666666667, + "Spanish,Filipino,Chinese": 0.64, + "Indonesian,Filipino,Chinese": 0.62 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.5333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.54, + "Malay,English,Vietnamese,Filipino": 0.5, + "Malay,English,Vietnamese,Chinese": 0.49333333333333335, + "Malay,English,Spanish,Indonesian": 0.5333333333333333, + "Malay,English,Spanish,Filipino": 0.5066666666666667, + "Malay,English,Spanish,Chinese": 0.49333333333333335, + "Malay,English,Indonesian,Filipino": 0.5266666666666666, + "Malay,English,Indonesian,Chinese": 0.5133333333333333, + "Malay,English,Filipino,Chinese": 0.48, + "Malay,Vietnamese,Spanish,Indonesian": 0.5333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.54, + "Malay,Vietnamese,Spanish,Chinese": 0.52, + "Malay,Vietnamese,Indonesian,Filipino": 0.54, + "Malay,Vietnamese,Indonesian,Chinese": 0.5, + "Malay,Vietnamese,Filipino,Chinese": 0.5, + "Malay,Spanish,Indonesian,Filipino": 0.5333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.52, + "Malay,Spanish,Filipino,Chinese": 0.52, + "Malay,Indonesian,Filipino,Chinese": 0.5266666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.5733333333333334, + "English,Vietnamese,Spanish,Filipino": 0.56, + "English,Vietnamese,Spanish,Chinese": 0.5466666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.54, + "English,Vietnamese,Indonesian,Chinese": 0.5333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.5066666666666667, + "English,Spanish,Indonesian,Filipino": 0.5533333333333333, + "English,Spanish,Indonesian,Chinese": 0.54, + "English,Spanish,Filipino,Chinese": 0.5333333333333333, + "English,Indonesian,Filipino,Chinese": 0.5266666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.56, + "Vietnamese,Spanish,Indonesian,Chinese": 0.5333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.5533333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.5133333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.56 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.4866666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.46, + "Malay,English,Vietnamese,Spanish,Chinese": 0.4533333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.4666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.4533333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.4266666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.4666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.4533333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.43333333333333335, + "Malay,English,Indonesian,Filipino,Chinese": 0.4533333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.47333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.4533333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.4666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.44666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.4666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.5, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.4866666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.48, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.46, + "English,Spanish,Indonesian,Filipino,Chinese": 0.48, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.48 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.4266666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.41333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.4, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.4, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.41333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.43333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.37333333333333335 + } + }, + "AC3_2": 0.4057710996085194, + "AC3_3": 0.38484654271835556, + "AC3_4": 0.36679894175360994, + "AC3_5": 0.35002333267767555, + "AC3_6": 0.33452217644930293, + "AC3_7": 0.3206210576899584 + }, + "prompt_3": { + "overall_acc": 0.26571428571428574, + "language_acc": { + "Malay": 0.25333333333333335, + "English": 0.30666666666666664, + "Vietnamese": 0.26, + "Spanish": 0.29333333333333333, + "Indonesian": 0.26, + "Filipino": 0.23333333333333334, + "Chinese": 0.25333333333333335 + }, + "consistency_score_2": 0.7025396825396826, + "consistency_score_3": 0.5756190476190477, + "consistency_score_4": 0.49352380952380953, + "consistency_score_5": 0.43238095238095237, + "consistency_score_6": 0.38285714285714284, + "consistency_score_7": 0.34, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6733333333333333, + "Malay,Vietnamese": 0.6666666666666666, + "Malay,Spanish": 0.7133333333333334, + "Malay,Indonesian": 0.7866666666666666, + "Malay,Filipino": 0.7, + "Malay,Chinese": 0.7133333333333334, + "English,Vietnamese": 0.6533333333333333, + "English,Spanish": 0.7066666666666667, + "English,Indonesian": 0.7, + "English,Filipino": 0.68, + "English,Chinese": 0.62, + "Vietnamese,Spanish": 0.6533333333333333, + "Vietnamese,Indonesian": 0.7, + "Vietnamese,Filipino": 0.7066666666666667, + "Vietnamese,Chinese": 0.6866666666666666, + "Spanish,Indonesian": 0.7333333333333333, + "Spanish,Filipino": 0.6933333333333334, + "Spanish,Chinese": 0.7266666666666667, + "Indonesian,Filipino": 0.7533333333333333, + "Indonesian,Chinese": 0.78, + "Filipino,Chinese": 0.7066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.5133333333333333, + "Malay,English,Spanish": 0.56, + "Malay,English,Indonesian": 0.6, + "Malay,English,Filipino": 0.5466666666666666, + "Malay,English,Chinese": 0.5333333333333333, + "Malay,Vietnamese,Spanish": 0.5533333333333333, + "Malay,Vietnamese,Indonesian": 0.6, + "Malay,Vietnamese,Filipino": 0.5533333333333333, + "Malay,Vietnamese,Chinese": 0.5466666666666666, + "Malay,Spanish,Indonesian": 0.6466666666666666, + "Malay,Spanish,Filipino": 0.5733333333333334, + "Malay,Spanish,Chinese": 0.62, + "Malay,Indonesian,Filipino": 0.6333333333333333, + "Malay,Indonesian,Chinese": 0.66, + "Malay,Filipino,Chinese": 0.5733333333333334, + "English,Vietnamese,Spanish": 0.5466666666666666, + "English,Vietnamese,Indonesian": 0.54, + "English,Vietnamese,Filipino": 0.54, + "English,Vietnamese,Chinese": 0.5, + "English,Spanish,Indonesian": 0.5933333333333334, + "English,Spanish,Filipino": 0.56, + "English,Spanish,Chinese": 0.56, + "English,Indonesian,Filipino": 0.58, + "English,Indonesian,Chinese": 0.5666666666666667, + "English,Filipino,Chinese": 0.52, + "Vietnamese,Spanish,Indonesian": 0.5666666666666667, + "Vietnamese,Spanish,Filipino": 0.56, + "Vietnamese,Spanish,Chinese": 0.56, + "Vietnamese,Indonesian,Filipino": 0.5933333333333334, + "Vietnamese,Indonesian,Chinese": 0.6, + "Vietnamese,Filipino,Chinese": 0.5666666666666667, + "Spanish,Indonesian,Filipino": 0.6133333333333333, + "Spanish,Indonesian,Chinese": 0.64, + "Spanish,Filipino,Chinese": 0.6, + "Indonesian,Filipino,Chinese": 0.6266666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.4533333333333333, + "Malay,English,Vietnamese,Indonesian": 0.48, + "Malay,English,Vietnamese,Filipino": 0.44666666666666666, + "Malay,English,Vietnamese,Chinese": 0.43333333333333335, + "Malay,English,Spanish,Indonesian": 0.5266666666666666, + "Malay,English,Spanish,Filipino": 0.46, + "Malay,English,Spanish,Chinese": 0.48, + "Malay,English,Indonesian,Filipino": 0.5066666666666667, + "Malay,English,Indonesian,Chinese": 0.5133333333333333, + "Malay,English,Filipino,Chinese": 0.4533333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.5133333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.47333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.4866666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.52, + "Malay,Vietnamese,Indonesian,Chinese": 0.5266666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.4666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.5466666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.5733333333333334, + "Malay,Spanish,Filipino,Chinese": 0.5133333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.56, + "English,Vietnamese,Spanish,Indonesian": 0.47333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.47333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.46, + "English,Vietnamese,Indonesian,Filipino": 0.48, + "English,Vietnamese,Indonesian,Chinese": 0.46, + "English,Vietnamese,Filipino,Chinese": 0.44, + "English,Spanish,Indonesian,Filipino": 0.5, + "English,Spanish,Indonesian,Chinese": 0.5066666666666667, + "English,Spanish,Filipino,Chinese": 0.48, + "English,Indonesian,Filipino,Chinese": 0.4866666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.5066666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.5133333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.4866666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.52, + "Spanish,Indonesian,Filipino,Chinese": 0.5533333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.4266666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.3933333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.3933333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.4266666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.42, + "Malay,English,Vietnamese,Filipino,Chinese": 0.38, + "Malay,English,Spanish,Indonesian,Filipino": 0.44666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.46, + "Malay,English,Spanish,Filipino,Chinese": 0.41333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.44666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.4533333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.4666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.42, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.46, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.5, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.4266666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.42, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.4066666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.41333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.44666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.46 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.38, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.38, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.3466666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.37333333333333335, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.41333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.38 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.34 + } + }, + "AC3_2": 0.3855906322786779, + "AC3_3": 0.36359002550741637, + "AC3_4": 0.34544190376068024, + "AC3_5": 0.3291522119972858, + "AC3_6": 0.3137067337464706, + "AC3_7": 0.2983018867432049 + }, + "prompt_4": { + "overall_acc": 0.2857142857142857, + "language_acc": { + "Malay": 0.24666666666666667, + "English": 0.36, + "Vietnamese": 0.24666666666666667, + "Spanish": 0.32, + "Indonesian": 0.23333333333333334, + "Filipino": 0.2866666666666667, + "Chinese": 0.30666666666666664 + }, + "consistency_score_2": 0.5819047619047618, + "consistency_score_3": 0.4314285714285715, + "consistency_score_4": 0.348952380952381, + "consistency_score_5": 0.2949206349206349, + "consistency_score_6": 0.2561904761904762, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6266666666666667, + "Malay,Vietnamese": 0.5466666666666666, + "Malay,Spanish": 0.58, + "Malay,Indonesian": 0.7066666666666667, + "Malay,Filipino": 0.5866666666666667, + "Malay,Chinese": 0.6, + "English,Vietnamese": 0.5333333333333333, + "English,Spanish": 0.5533333333333333, + "English,Indonesian": 0.5666666666666667, + "English,Filipino": 0.5466666666666666, + "English,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.58, + "Vietnamese,Indonesian": 0.6, + "Vietnamese,Filipino": 0.5866666666666667, + "Vietnamese,Chinese": 0.5733333333333334, + "Spanish,Indonesian": 0.62, + "Spanish,Filipino": 0.5466666666666666, + "Spanish,Chinese": 0.58, + "Indonesian,Filipino": 0.6133333333333333, + "Indonesian,Chinese": 0.5866666666666667, + "Filipino,Chinese": 0.52 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.4066666666666667, + "Malay,English,Spanish": 0.41333333333333333, + "Malay,English,Indonesian": 0.5, + "Malay,English,Filipino": 0.44, + "Malay,English,Chinese": 0.47333333333333333, + "Malay,Vietnamese,Spanish": 0.4066666666666667, + "Malay,Vietnamese,Indonesian": 0.4866666666666667, + "Malay,Vietnamese,Filipino": 0.41333333333333333, + "Malay,Vietnamese,Chinese": 0.4266666666666667, + "Malay,Spanish,Indonesian": 0.49333333333333335, + "Malay,Spanish,Filipino": 0.3933333333333333, + "Malay,Spanish,Chinese": 0.43333333333333335, + "Malay,Indonesian,Filipino": 0.5066666666666667, + "Malay,Indonesian,Chinese": 0.5, + "Malay,Filipino,Chinese": 0.4, + "English,Vietnamese,Spanish": 0.3933333333333333, + "English,Vietnamese,Indonesian": 0.4266666666666667, + "English,Vietnamese,Filipino": 0.4066666666666667, + "English,Vietnamese,Chinese": 0.41333333333333333, + "English,Spanish,Indonesian": 0.42, + "English,Spanish,Filipino": 0.38, + "English,Spanish,Chinese": 0.42, + "English,Indonesian,Filipino": 0.44, + "English,Indonesian,Chinese": 0.44666666666666666, + "English,Filipino,Chinese": 0.4, + "Vietnamese,Spanish,Indonesian": 0.44666666666666666, + "Vietnamese,Spanish,Filipino": 0.41333333333333333, + "Vietnamese,Spanish,Chinese": 0.43333333333333335, + "Vietnamese,Indonesian,Filipino": 0.44666666666666666, + "Vietnamese,Indonesian,Chinese": 0.44666666666666666, + "Vietnamese,Filipino,Chinese": 0.4066666666666667, + "Spanish,Indonesian,Filipino": 0.42, + "Spanish,Indonesian,Chinese": 0.4533333333333333, + "Spanish,Filipino,Chinese": 0.38666666666666666, + "Indonesian,Filipino,Chinese": 0.4066666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.32, + "Malay,English,Vietnamese,Indonesian": 0.38, + "Malay,English,Vietnamese,Filipino": 0.34, + "Malay,English,Vietnamese,Chinese": 0.3466666666666667, + "Malay,English,Spanish,Indonesian": 0.36666666666666664, + "Malay,English,Spanish,Filipino": 0.31333333333333335, + "Malay,English,Spanish,Chinese": 0.36, + "Malay,English,Indonesian,Filipino": 0.4066666666666667, + "Malay,English,Indonesian,Chinese": 0.41333333333333333, + "Malay,English,Filipino,Chinese": 0.36, + "Malay,Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Malay,Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.38, + "Malay,Vietnamese,Indonesian,Chinese": 0.4, + "Malay,Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.35333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.3933333333333333, + "Malay,Spanish,Filipino,Chinese": 0.32, + "Malay,Indonesian,Filipino,Chinese": 0.37333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.34, + "English,Vietnamese,Spanish,Filipino": 0.30666666666666664, + "English,Vietnamese,Spanish,Chinese": 0.3333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.32, + "English,Spanish,Indonesian,Filipino": 0.32666666666666666, + "English,Spanish,Indonesian,Chinese": 0.36, + "English,Spanish,Filipino,Chinese": 0.31333333333333335, + "English,Indonesian,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian,Filipino": 0.34, + "Vietnamese,Spanish,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Spanish,Filipino,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.3, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.32, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.29333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.3, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.28 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + } + }, + "AC3_2": 0.38325231295567025, + "AC3_3": 0.34376778595092594, + "AC3_4": 0.3141828159340327, + "AC3_5": 0.2902444739014302, + "AC3_6": 0.2701481295007384, + "AC3_7": 0.25278810403988333 + }, + "prompt_5": { + "overall_acc": 0.31047619047619046, + "language_acc": { + "Malay": 0.3, + "English": 0.42, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.30666666666666664, + "Indonesian": 0.32666666666666666, + "Filipino": 0.2733333333333333, + "Chinese": 0.25333333333333335 + }, + "consistency_score_2": 0.7165079365079365, + "consistency_score_3": 0.5979047619047618, + "consistency_score_4": 0.5215238095238096, + "consistency_score_5": 0.46476190476190465, + "consistency_score_6": 0.41904761904761906, + "consistency_score_7": 0.38, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.7266666666666667, + "Malay,Vietnamese": 0.74, + "Malay,Spanish": 0.7666666666666667, + "Malay,Indonesian": 0.8533333333333334, + "Malay,Filipino": 0.8066666666666666, + "Malay,Chinese": 0.7333333333333333, + "English,Vietnamese": 0.6533333333333333, + "English,Spanish": 0.7266666666666667, + "English,Indonesian": 0.7, + "English,Filipino": 0.7, + "English,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,Indonesian": 0.6866666666666666, + "Vietnamese,Filipino": 0.7133333333333334, + "Vietnamese,Chinese": 0.64, + "Spanish,Indonesian": 0.7466666666666667, + "Spanish,Filipino": 0.6933333333333334, + "Spanish,Chinese": 0.7266666666666667, + "Indonesian,Filipino": 0.7466666666666667, + "Indonesian,Chinese": 0.68, + "Filipino,Chinese": 0.7066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.58, + "Malay,English,Spanish": 0.62, + "Malay,English,Indonesian": 0.6466666666666666, + "Malay,English,Filipino": 0.64, + "Malay,English,Chinese": 0.5666666666666667, + "Malay,Vietnamese,Spanish": 0.6133333333333333, + "Malay,Vietnamese,Indonesian": 0.6533333333333333, + "Malay,Vietnamese,Filipino": 0.66, + "Malay,Vietnamese,Chinese": 0.5933333333333334, + "Malay,Spanish,Indonesian": 0.6866666666666666, + "Malay,Spanish,Filipino": 0.6533333333333333, + "Malay,Spanish,Chinese": 0.6333333333333333, + "Malay,Indonesian,Filipino": 0.7266666666666667, + "Malay,Indonesian,Chinese": 0.64, + "Malay,Filipino,Chinese": 0.6266666666666667, + "English,Vietnamese,Spanish": 0.5533333333333333, + "English,Vietnamese,Indonesian": 0.5533333333333333, + "English,Vietnamese,Filipino": 0.5733333333333334, + "English,Vietnamese,Chinese": 0.5066666666666667, + "English,Spanish,Indonesian": 0.6, + "English,Spanish,Filipino": 0.5733333333333334, + "English,Spanish,Chinese": 0.5533333333333333, + "English,Indonesian,Filipino": 0.6, + "English,Indonesian,Chinese": 0.52, + "English,Filipino,Chinese": 0.5466666666666666, + "Vietnamese,Spanish,Indonesian": 0.5866666666666667, + "Vietnamese,Spanish,Filipino": 0.58, + "Vietnamese,Spanish,Chinese": 0.5466666666666666, + "Vietnamese,Indonesian,Filipino": 0.6, + "Vietnamese,Indonesian,Chinese": 0.54, + "Vietnamese,Filipino,Chinese": 0.56, + "Spanish,Indonesian,Filipino": 0.62, + "Spanish,Indonesian,Chinese": 0.6, + "Spanish,Filipino,Chinese": 0.5866666666666667, + "Indonesian,Filipino,Chinese": 0.5866666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.52, + "Malay,English,Vietnamese,Indonesian": 0.5266666666666666, + "Malay,English,Vietnamese,Filipino": 0.54, + "Malay,English,Vietnamese,Chinese": 0.47333333333333333, + "Malay,English,Spanish,Indonesian": 0.56, + "Malay,English,Spanish,Filipino": 0.5533333333333333, + "Malay,English,Spanish,Chinese": 0.5266666666666666, + "Malay,English,Indonesian,Filipino": 0.5866666666666667, + "Malay,English,Indonesian,Chinese": 0.5, + "Malay,English,Filipino,Chinese": 0.5066666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.5666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.5666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.5266666666666666, + "Malay,Vietnamese,Indonesian,Filipino": 0.5866666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.52, + "Malay,Vietnamese,Filipino,Chinese": 0.5333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.6066666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.5733333333333334, + "Malay,Spanish,Filipino,Chinese": 0.5533333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.5666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.5, + "English,Vietnamese,Spanish,Filipino": 0.5, + "English,Vietnamese,Spanish,Chinese": 0.46, + "English,Vietnamese,Indonesian,Filipino": 0.5, + "English,Vietnamese,Indonesian,Chinese": 0.44666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.46, + "English,Spanish,Indonesian,Filipino": 0.52, + "English,Spanish,Indonesian,Chinese": 0.4866666666666667, + "English,Spanish,Filipino,Chinese": 0.48, + "English,Indonesian,Filipino,Chinese": 0.47333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.5266666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.5, + "Vietnamese,Spanish,Filipino,Chinese": 0.49333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.4866666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.5266666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.48, + "Malay,English,Vietnamese,Spanish,Filipino": 0.4866666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.44666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.4866666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.44, + "Malay,English,Spanish,Indonesian,Filipino": 0.5133333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.47333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.4666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.46, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.52, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.4866666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.4866666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.47333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.5133333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.4533333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.4266666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.42, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.4066666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.44, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.4533333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.44666666666666666, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.41333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.41333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.3933333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.43333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.44666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.38666666666666666 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.38 + } + }, + "AC3_2": 0.4332270552312388, + "AC3_3": 0.40871661220275685, + "AC3_4": 0.38923251347144505, + "AC3_5": 0.3722662922182727, + "AC3_6": 0.35668282974098686, + "AC3_7": 0.34173793098498967 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.30681818181818177, + "language_acc": { + "English": 0.32386363636363635, + "Vietnamese": 0.32386363636363635, + "Chinese": 0.2897727272727273, + "Indonesian": 0.32386363636363635, + "Filipino": 0.26136363636363635, + "Spanish": 0.3409090909090909, + "Malay": 0.2840909090909091 + }, + "consistency_score_2": 0.46753246753246747, + "consistency_score_3": 0.27581168831168834, + "consistency_score_4": 0.175, + "consistency_score_5": 0.11282467532467531, + "consistency_score_6": 0.07224025974025973, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3522727272727273, + "English,Chinese": 0.45454545454545453, + "English,Indonesian": 0.4602272727272727, + "English,Filipino": 0.4147727272727273, + "English,Spanish": 0.5227272727272727, + "English,Malay": 0.4943181818181818, + "Vietnamese,Chinese": 0.3068181818181818, + "Vietnamese,Indonesian": 0.4602272727272727, + "Vietnamese,Filipino": 0.42045454545454547, + "Vietnamese,Spanish": 0.30113636363636365, + "Vietnamese,Malay": 0.4090909090909091, + "Chinese,Indonesian": 0.4090909090909091, + "Chinese,Filipino": 0.48295454545454547, + "Chinese,Spanish": 0.5568181818181818, + "Chinese,Malay": 0.4943181818181818, + "Indonesian,Filipino": 0.5511363636363636, + "Indonesian,Spanish": 0.5227272727272727, + "Indonesian,Malay": 0.6022727272727273, + "Filipino,Spanish": 0.4715909090909091, + "Filipino,Malay": 0.5909090909090909, + "Spanish,Malay": 0.5397727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.19318181818181818, + "English,Vietnamese,Indonesian": 0.2215909090909091, + "English,Vietnamese,Filipino": 0.21022727272727273, + "English,Vietnamese,Spanish": 0.2159090909090909, + "English,Vietnamese,Malay": 0.2556818181818182, + "English,Chinese,Indonesian": 0.22727272727272727, + "English,Chinese,Filipino": 0.23295454545454544, + "English,Chinese,Spanish": 0.3181818181818182, + "English,Chinese,Malay": 0.2784090909090909, + "English,Indonesian,Filipino": 0.2727272727272727, + "English,Indonesian,Spanish": 0.3125, + "English,Indonesian,Malay": 0.3465909090909091, + "English,Filipino,Spanish": 0.2727272727272727, + "English,Filipino,Malay": 0.3125, + "English,Spanish,Malay": 0.32954545454545453, + "Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "Vietnamese,Chinese,Filipino": 0.20454545454545456, + "Vietnamese,Chinese,Spanish": 0.1875, + "Vietnamese,Chinese,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Filipino,Spanish": 0.1875, + "Vietnamese,Filipino,Malay": 0.2840909090909091, + "Vietnamese,Spanish,Malay": 0.2159090909090909, + "Chinese,Indonesian,Filipino": 0.2840909090909091, + "Chinese,Indonesian,Spanish": 0.30113636363636365, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish": 0.3125, + "Chinese,Filipino,Malay": 0.3465909090909091, + "Chinese,Spanish,Malay": 0.3409090909090909, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.39204545454545453, + "Indonesian,Spanish,Malay": 0.39204545454545453, + "Filipino,Spanish,Malay": 0.3465909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino": 0.125, + "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish": 0.125, + "English,Vietnamese,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.18181818181818182, + "English,Chinese,Indonesian,Malay": 0.1875, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.1875, + "English,Chinese,Spanish,Malay": 0.21022727272727273, + "English,Indonesian,Filipino,Spanish": 0.1875, + "English,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.125, + "Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Chinese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Chinese,Filipino,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2556818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.3704974270533544, + "AC3_3": 0.29048988064620784, + "AC3_4": 0.22287735844430845, + "AC3_5": 0.16498153679908859, + "AC3_6": 0.11694568811397997, + "AC3_7": 0.07917888560802194 + }, + "prompt_2": { + "overall_acc": 0.30357142857142855, + "language_acc": { + "English": 0.36363636363636365, + "Vietnamese": 0.30113636363636365, + "Chinese": 0.2840909090909091, + "Indonesian": 0.32386363636363635, + "Filipino": 0.2897727272727273, + "Spanish": 0.30113636363636365, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.5173160173160173, + "consistency_score_3": 0.3378246753246753, + "consistency_score_4": 0.2448051948051948, + "consistency_score_5": 0.1877705627705628, + "consistency_score_6": 0.148538961038961, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.39204545454545453, + "English,Chinese": 0.44886363636363635, + "English,Indonesian": 0.4602272727272727, + "English,Filipino": 0.48863636363636365, + "English,Spanish": 0.5454545454545454, + "English,Malay": 0.4943181818181818, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.4772727272727273, + "Vietnamese,Filipino": 0.5170454545454546, + "Vietnamese,Spanish": 0.44886363636363635, + "Vietnamese,Malay": 0.48863636363636365, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Filipino": 0.44886363636363635, + "Chinese,Spanish": 0.45454545454545453, + "Chinese,Malay": 0.4602272727272727, + "Indonesian,Filipino": 0.6420454545454546, + "Indonesian,Spanish": 0.6193181818181818, + "Indonesian,Malay": 0.6931818181818182, + "Filipino,Spanish": 0.5681818181818182, + "Filipino,Malay": 0.7215909090909091, + "Spanish,Malay": 0.6534090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23863636363636365, + "English,Vietnamese,Indonesian": 0.2556818181818182, + "English,Vietnamese,Filipino": 0.2784090909090909, + "English,Vietnamese,Spanish": 0.2784090909090909, + "English,Vietnamese,Malay": 0.26704545454545453, + "English,Chinese,Indonesian": 0.23863636363636365, + "English,Chinese,Filipino": 0.26704545454545453, + "English,Chinese,Spanish": 0.29545454545454547, + "English,Chinese,Malay": 0.24431818181818182, + "English,Indonesian,Filipino": 0.3465909090909091, + "English,Indonesian,Spanish": 0.35795454545454547, + "English,Indonesian,Malay": 0.36363636363636365, + "English,Filipino,Spanish": 0.35795454545454547, + "English,Filipino,Malay": 0.39204545454545453, + "English,Spanish,Malay": 0.3806818181818182, + "Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "Vietnamese,Chinese,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Spanish": 0.26704545454545453, + "Vietnamese,Chinese,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish": 0.3352272727272727, + "Vietnamese,Filipino,Malay": 0.39204545454545453, + "Vietnamese,Spanish,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino": 0.32954545454545453, + "Chinese,Indonesian,Spanish": 0.32954545454545453, + "Chinese,Indonesian,Malay": 0.3465909090909091, + "Chinese,Filipino,Spanish": 0.32386363636363635, + "Chinese,Filipino,Malay": 0.36363636363636365, + "Chinese,Spanish,Malay": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.45454545454545453, + "Indonesian,Filipino,Malay": 0.5454545454545454, + "Indonesian,Spanish,Malay": 0.5170454545454546, + "Filipino,Spanish,Malay": 0.5056818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino": 0.1875, + "English,Vietnamese,Chinese,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Filipino,Spanish": 0.23295454545454544, + "English,Vietnamese,Filipino,Malay": 0.22727272727272727, + "English,Vietnamese,Spanish,Malay": 0.2215909090909091, + "English,Chinese,Indonesian,Filipino": 0.21022727272727273, + "English,Chinese,Indonesian,Spanish": 0.19886363636363635, + "English,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Chinese,Filipino,Spanish": 0.22727272727272727, + "English,Chinese,Filipino,Malay": 0.2215909090909091, + "English,Chinese,Spanish,Malay": 0.2215909090909091, + "English,Indonesian,Filipino,Spanish": 0.2784090909090909, + "English,Indonesian,Filipino,Malay": 0.3068181818181818, + "English,Indonesian,Spanish,Malay": 0.29545454545454547, + "English,Filipino,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.29545454545454547, + "Chinese,Indonesian,Filipino,Spanish": 0.2784090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.29545454545454547, + "Chinese,Indonesian,Spanish,Malay": 0.30113636363636365, + "Chinese,Filipino,Spanish,Malay": 0.29545454545454547, + "Indonesian,Filipino,Spanish,Malay": 0.42045454545454547 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.1590909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.25, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.26136363636363635 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.125, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + } + }, + "AC3_2": 0.38261606549628474, + "AC3_3": 0.31978341825292306, + "AC3_4": 0.2710394992316365, + "AC3_5": 0.23202485835428774, + "AC3_6": 0.19947422411592033, + "AC3_7": 0.17130518230114242 + }, + "prompt_3": { + "overall_acc": 0.31980519480519476, + "language_acc": { + "English": 0.3806818181818182, + "Vietnamese": 0.3181818181818182, + "Chinese": 0.32386363636363635, + "Indonesian": 0.30113636363636365, + "Filipino": 0.2784090909090909, + "Spanish": 0.3465909090909091, + "Malay": 0.2897727272727273 + }, + "consistency_score_2": 0.4761904761904763, + "consistency_score_3": 0.2865259740259741, + "consistency_score_4": 0.19480519480519481, + "consistency_score_5": 0.14420995670995668, + "consistency_score_6": 0.11282467532467533, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3409090909090909, + "English,Chinese": 0.4375, + "English,Indonesian": 0.44886363636363635, + "English,Filipino": 0.4431818181818182, + "English,Spanish": 0.4602272727272727, + "English,Malay": 0.4375, + "Vietnamese,Chinese": 0.4090909090909091, + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Filipino": 0.48295454545454547, + "Vietnamese,Spanish": 0.35795454545454547, + "Vietnamese,Malay": 0.5284090909090909, + "Chinese,Indonesian": 0.4034090909090909, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.4147727272727273, + "Chinese,Malay": 0.4318181818181818, + "Indonesian,Filipino": 0.5738636363636364, + "Indonesian,Spanish": 0.4943181818181818, + "Indonesian,Malay": 0.7102272727272727, + "Filipino,Spanish": 0.4602272727272727, + "Filipino,Malay": 0.6647727272727273, + "Spanish,Malay": 0.4659090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.19318181818181818, + "English,Vietnamese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Filipino": 0.23295454545454544, + "English,Vietnamese,Spanish": 0.2159090909090909, + "English,Vietnamese,Malay": 0.24431818181818182, + "English,Chinese,Indonesian": 0.23863636363636365, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.22727272727272727, + "English,Chinese,Malay": 0.22727272727272727, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.30113636363636365, + "English,Indonesian,Malay": 0.36363636363636365, + "English,Filipino,Spanish": 0.26704545454545453, + "English,Filipino,Malay": 0.3181818181818182, + "English,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian": 0.25, + "Vietnamese,Chinese,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Malay": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,Malay": 0.4318181818181818, + "Vietnamese,Filipino,Spanish": 0.25, + "Vietnamese,Filipino,Malay": 0.375, + "Vietnamese,Spanish,Malay": 0.24431818181818182, + "Chinese,Indonesian,Filipino": 0.29545454545454547, + "Chinese,Indonesian,Spanish": 0.23863636363636365, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish": 0.26136363636363635, + "Chinese,Filipino,Malay": 0.3409090909090909, + "Chinese,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish": 0.32386363636363635, + "Indonesian,Filipino,Malay": 0.5, + "Indonesian,Spanish,Malay": 0.375, + "Filipino,Spanish,Malay": 0.3352272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish": 0.125, + "English,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino": 0.1875, + "English,Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "English,Vietnamese,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino": 0.20454545454545456, + "English,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Chinese,Filipino,Spanish": 0.1534090909090909, + "English,Chinese,Filipino,Malay": 0.18181818181818182, + "English,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.2897727272727273, + "English,Indonesian,Spanish,Malay": 0.25, + "English,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.1875, + "Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.2840909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.125, + "English,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + } + }, + "AC3_2": 0.3826357190934275, + "AC3_3": 0.3022522991904017, + "AC3_4": 0.24212380674552658, + "AC3_5": 0.19878270413345042, + "AC3_6": 0.1668027094470808, + "AC3_7": 0.14157384114411412 + }, + "prompt_4": { + "overall_acc": 0.27272727272727276, + "language_acc": { + "English": 0.2897727272727273, + "Vietnamese": 0.25, + "Chinese": 0.2556818181818182, + "Indonesian": 0.3068181818181818, + "Filipino": 0.23863636363636365, + "Spanish": 0.32954545454545453, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.3157467532467533, + "consistency_score_3": 0.11915584415584414, + "consistency_score_4": 0.053246753246753244, + "consistency_score_5": 0.02813852813852813, + "consistency_score_6": 0.017045454545454548, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.32386363636363635, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.3125, + "English,Filipino": 0.25, + "English,Spanish": 0.3068181818181818, + "English,Malay": 0.3068181818181818, + "Vietnamese,Chinese": 0.3125, + "Vietnamese,Indonesian": 0.32386363636363635, + "Vietnamese,Filipino": 0.2556818181818182, + "Vietnamese,Spanish": 0.3465909090909091, + "Vietnamese,Malay": 0.3409090909090909, + "Chinese,Indonesian": 0.3181818181818182, + "Chinese,Filipino": 0.2556818181818182, + "Chinese,Spanish": 0.35795454545454547, + "Chinese,Malay": 0.2727272727272727, + "Indonesian,Filipino": 0.3352272727272727, + "Indonesian,Spanish": 0.3977272727272727, + "Indonesian,Malay": 0.39204545454545453, + "Filipino,Spanish": 0.29545454545454547, + "Filipino,Malay": 0.2727272727272727, + "Spanish,Malay": 0.3522727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.125, + "English,Vietnamese,Indonesian": 0.10795454545454546, + "English,Vietnamese,Filipino": 0.09090909090909091, + "English,Vietnamese,Spanish": 0.13636363636363635, + "English,Vietnamese,Malay": 0.13636363636363635, + "English,Chinese,Indonesian": 0.10795454545454546, + "English,Chinese,Filipino": 0.056818181818181816, + "English,Chinese,Spanish": 0.125, + "English,Chinese,Malay": 0.10227272727272728, + "English,Indonesian,Filipino": 0.10227272727272728, + "English,Indonesian,Spanish": 0.13636363636363635, + "English,Indonesian,Malay": 0.11931818181818182, + "English,Filipino,Spanish": 0.07954545454545454, + "English,Filipino,Malay": 0.07386363636363637, + "English,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "Vietnamese,Chinese,Filipino": 0.09659090909090909, + "Vietnamese,Chinese,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay": 0.13636363636363635, + "Vietnamese,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino": 0.10227272727272728, + "Chinese,Indonesian,Spanish": 0.17045454545454544, + "Chinese,Indonesian,Malay": 0.10795454545454546, + "Chinese,Filipino,Spanish": 0.08522727272727272, + "Chinese,Filipino,Malay": 0.0625, + "Chinese,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish": 0.16477272727272727, + "Indonesian,Filipino,Malay": 0.13068181818181818, + "Indonesian,Spanish,Malay": 0.20454545454545456, + "Filipino,Spanish,Malay": 0.11931818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Malay": 0.05113636363636364, + "English,Vietnamese,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.29266457675277513, + "AC3_3": 0.16585071924667455, + "AC3_4": 0.08909815281583985, + "AC3_5": 0.05101373445001622, + "AC3_6": 0.03208556148625354, + "AC3_7": 0.02181818181050182 + }, + "prompt_5": { + "overall_acc": 0.2987012987012987, + "language_acc": { + "English": 0.2897727272727273, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.26704545454545453, + "Indonesian": 0.3522727272727273, + "Filipino": 0.2897727272727273, + "Spanish": 0.2784090909090909, + "Malay": 0.2840909090909091 + }, + "consistency_score_2": 0.42234848484848486, + "consistency_score_3": 0.224025974025974, + "consistency_score_4": 0.13474025974025974, + "consistency_score_5": 0.08874458874458872, + "consistency_score_6": 0.06250000000000001, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.26136363636363635, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.48863636363636365, + "English,Filipino": 0.3806818181818182, + "English,Spanish": 0.6306818181818182, + "English,Malay": 0.5284090909090909, + "Vietnamese,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian": 0.45454545454545453, + "Vietnamese,Filipino": 0.2897727272727273, + "Vietnamese,Spanish": 0.2784090909090909, + "Vietnamese,Malay": 0.3977272727272727, + "Chinese,Indonesian": 0.42613636363636365, + "Chinese,Filipino": 0.38636363636363635, + "Chinese,Spanish": 0.48863636363636365, + "Chinese,Malay": 0.4090909090909091, + "Indonesian,Filipino": 0.39204545454545453, + "Indonesian,Spanish": 0.4602272727272727, + "Indonesian,Malay": 0.625, + "Filipino,Spanish": 0.3806818181818182, + "Filipino,Malay": 0.4090909090909091, + "Spanish,Malay": 0.4602272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.11931818181818182, + "English,Vietnamese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Filipino": 0.11363636363636363, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.18181818181818182, + "English,Chinese,Indonesian": 0.2784090909090909, + "English,Chinese,Filipino": 0.22727272727272727, + "English,Chinese,Spanish": 0.35795454545454547, + "English,Chinese,Malay": 0.2840909090909091, + "English,Indonesian,Filipino": 0.22727272727272727, + "English,Indonesian,Spanish": 0.3465909090909091, + "English,Indonesian,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.2556818181818182, + "English,Filipino,Malay": 0.24431818181818182, + "English,Spanish,Malay": 0.36363636363636365, + "Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "Vietnamese,Chinese,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Spanish": 0.125, + "Vietnamese,Chinese,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay": 0.3068181818181818, + "Vietnamese,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Filipino,Malay": 0.1534090909090909, + "Vietnamese,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino": 0.20454545454545456, + "Chinese,Indonesian,Spanish": 0.2727272727272727, + "Chinese,Indonesian,Malay": 0.2840909090909091, + "Chinese,Filipino,Spanish": 0.23863636363636365, + "Chinese,Filipino,Malay": 0.1875, + "Chinese,Spanish,Malay": 0.25, + "Indonesian,Filipino,Spanish": 0.22727272727272727, + "Indonesian,Filipino,Malay": 0.2727272727272727, + "Indonesian,Spanish,Malay": 0.3068181818181818, + "Filipino,Spanish,Malay": 0.23295454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish": 0.21022727272727273, + "English,Chinese,Indonesian,Malay": 0.19318181818181818, + "English,Chinese,Filipino,Spanish": 0.17045454545454544, + "English,Chinese,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Spanish,Malay": 0.2159090909090909, + "English,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.34992324743569336, + "AC3_3": 0.2560296845521335, + "AC3_4": 0.18570942162163254, + "AC3_5": 0.13683523177198156, + "AC3_6": 0.10337078648823533, + "AC3_7": 0.07890222982269976 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2815533980582524 + }, + "prompt_2": { + "accuracy": 0.27184466019417475 + }, + "prompt_3": { + "accuracy": 0.2524271844660194 + }, + "prompt_4": { + "accuracy": 0.17475728155339806 + }, + "prompt_5": { + "accuracy": 0.2912621359223301 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23809523809523808 + }, + "prompt_2": { + "accuracy": 0.23809523809523808 + }, + "prompt_3": { + "accuracy": 0.2571428571428571 + }, + "prompt_4": { + "accuracy": 0.22857142857142856 + }, + "prompt_5": { + "accuracy": 0.24761904761904763 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.308411214953271 + }, + "prompt_2": { + "accuracy": 0.37383177570093457 + }, + "prompt_3": { + "accuracy": 0.35514018691588783 + }, + "prompt_4": { + "accuracy": 0.22429906542056074 + }, + "prompt_5": { + "accuracy": 0.37383177570093457 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.2, + "history": 0.2, + "literature": 0.3, + "politics": 0.7, + "culture": 0.3, + "film": 0.4, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.2, + "history": 0.2, + "literature": 0.4, + "politics": 0.7, + "culture": 0.4, + "film": 0.4, + "law": 0.2, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.1, + "history": 0.2, + "literature": 0.3, + "politics": 0.6, + "culture": 0.4, + "film": 0.4, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_4": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.6, + "culture": 0.4, + "film": 0.7, + "law": 0.5, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.4, + "politics": 0.7, + "culture": 0.3, + "film": 0.4, + "law": 0.2, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06565357784684488 + }, + "prompt_2": { + "bleu_score": 0.08979853348012166 + }, + "prompt_3": { + "bleu_score": 0.0754816270521484 + }, + "prompt_4": { + "bleu_score": 0.0673394840610328 + }, + "prompt_5": { + "bleu_score": 0.04909134454861705 + } }, "indommlu": { "prompt_1": -1, @@ -3778,179 +34303,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09081980052265458 + }, + "prompt_2": { + "bleu_score": 0.08990369723404892 + }, + "prompt_3": { + "bleu_score": 0.07147675829443079 + }, + "prompt_4": { + "bleu_score": 0.10530185202741413 + }, + "prompt_5": { + "bleu_score": 0.08233748734434669 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0756133901331998 + }, + "prompt_2": { + "bleu_score": 0.07429706989745946 + }, + "prompt_3": { + "bleu_score": 0.06335761424422617 + }, + "prompt_4": { + "bleu_score": 0.08629075575051948 + }, + "prompt_5": { + "bleu_score": 0.07114287465662482 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.05368708519318244 + }, + "prompt_2": { + "bleu_score": 0.05671210457695124 + }, + "prompt_3": { + "bleu_score": 0.046571853073635525 + }, + "prompt_4": { + "bleu_score": 0.08039188090582502 + }, + "prompt_5": { + "bleu_score": 0.05095075778676779 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09117383437868172 + }, + "prompt_2": { + "bleu_score": 0.08757746309355022 + }, + "prompt_3": { + "bleu_score": 0.06322565513222463 + }, + "prompt_4": { + "bleu_score": 0.09460942924966294 + }, + "prompt_5": { + "bleu_score": 0.0721033720675653 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35822637106184363 + }, + "prompt_2": { + "accuracy": 0.338389731621937 + }, + "prompt_3": { + "accuracy": 0.3885647607934656 + }, + "prompt_4": { + "accuracy": 0.3663943990665111 + }, + "prompt_5": { + "accuracy": 0.3663943990665111 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3272792277440114, + "category_acc": { + "high_school_european_history": 0.5304878048780488, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.3446969696969697, + "medical_genetics": 0.37373737373737376, + "high_school_us_history": 0.4827586206896552, + "high_school_physics": 0.22666666666666666, + "high_school_world_history": 0.5042372881355932, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.31715210355987056, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.26334519572953735, + "philosophy": 0.3161290322580645, + "professional_medicine": 0.25830258302583026, + "nutrition": 0.3180327868852459, + "global_facts": 0.2222222222222222, + "machine_learning": 0.2702702702702703, + "security_studies": 0.30327868852459017, + "public_relations": 0.3577981651376147, + "professional_psychology": 0.36333878887070375, + "prehistory": 0.34055727554179566, + "anatomy": 0.291044776119403, + "human_sexuality": 0.2923076923076923, + "college_medicine": 0.3313953488372093, + "high_school_government_and_politics": 0.4270833333333333, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.35185185185185186, + "high_school_geography": 0.3756345177664975, + "elementary_mathematics": 0.21485411140583555, + "human_aging": 0.38738738738738737, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.4099264705882353, + "formal_logic": 0.272, + "high_school_statistics": 0.19069767441860466, + "international_law": 0.4083333333333333, + "high_school_mathematics": 0.21933085501858737, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.2692307692307692, + "miscellaneous": 0.40025575447570333, + "high_school_chemistry": 0.19801980198019803, + "marketing": 0.4892703862660944, + "professional_law": 0.30724070450097846, + "management": 0.3431372549019608, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.3177570093457944, + "world_religions": 0.4588235294117647, + "sociology": 0.445, + "us_foreign_policy": 0.41414141414141414, + "high_school_macroeconomics": 0.2699228791773779, + "computer_security": 0.3838383838383838, + "moral_scenarios": 0.2606263982102908, + "moral_disputes": 0.3101449275362319, + "electrical_engineering": 0.22916666666666666, + "astronomy": 0.26490066225165565, + "college_biology": 0.38461538461538464 + } + }, + "prompt_2": { + "accuracy": 0.3221308544869503, + "category_acc": { + "high_school_european_history": 0.5182926829268293, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.3143939393939394, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.458128078817734, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.5, + "virology": 0.3333333333333333, + "high_school_microeconomics": 0.25316455696202533, + "econometrics": 0.18584070796460178, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.28802588996763756, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.2846975088967972, + "philosophy": 0.3419354838709677, + "professional_medicine": 0.26199261992619927, + "nutrition": 0.28852459016393445, + "global_facts": 0.26262626262626265, + "machine_learning": 0.26126126126126126, + "security_studies": 0.3770491803278688, + "public_relations": 0.3211009174311927, + "professional_psychology": 0.33387888707037644, + "prehistory": 0.32507739938080493, + "anatomy": 0.30597014925373134, + "human_sexuality": 0.3923076923076923, + "college_medicine": 0.31976744186046513, + "high_school_government_and_politics": 0.4010416666666667, + "college_chemistry": 0.2828282828282828, + "logical_fallacies": 0.4012345679012346, + "high_school_geography": 0.38578680203045684, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.38738738738738737, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.34375, + "formal_logic": 0.304, + "high_school_statistics": 0.17209302325581396, + "international_law": 0.4666666666666667, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.32323232323232326, + "conceptual_physics": 0.3247863247863248, + "miscellaneous": 0.32608695652173914, + "high_school_chemistry": 0.16831683168316833, + "marketing": 0.47639484978540775, + "professional_law": 0.3176777560339204, + "management": 0.23529411764705882, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.4411764705882353, + "sociology": 0.395, + "us_foreign_policy": 0.46464646464646464, + "high_school_macroeconomics": 0.30077120822622105, + "computer_security": 0.36363636363636365, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.32463768115942027, + "electrical_engineering": 0.3263888888888889, + "astronomy": 0.304635761589404, + "college_biology": 0.34965034965034963 + } + }, + "prompt_3": { + "accuracy": 0.3750446907400787, + "category_acc": { + "high_school_european_history": 0.5487804878048781, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.4356060606060606, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.4876847290640394, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.5508474576271186, + "virology": 0.32727272727272727, + "high_school_microeconomics": 0.3037974683544304, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.3786407766990291, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.3167259786476868, + "philosophy": 0.4290322580645161, + "professional_medicine": 0.2730627306273063, + "nutrition": 0.3639344262295082, + "global_facts": 0.32323232323232326, + "machine_learning": 0.2972972972972973, + "security_studies": 0.3975409836065574, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.4026186579378069, + "prehistory": 0.43962848297213625, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.31976744186046513, + "high_school_government_and_politics": 0.4947916666666667, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.43209876543209874, + "high_school_geography": 0.4365482233502538, + "elementary_mathematics": 0.246684350132626, + "human_aging": 0.4144144144144144, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.4430147058823529, + "formal_logic": 0.312, + "high_school_statistics": 0.19534883720930232, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.3838383838383838, + "conceptual_physics": 0.32905982905982906, + "miscellaneous": 0.47570332480818417, + "high_school_chemistry": 0.19306930693069307, + "marketing": 0.6008583690987125, + "professional_law": 0.33985649054142203, + "management": 0.37254901960784315, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.45794392523364486, + "world_religions": 0.5588235294117647, + "sociology": 0.485, + "us_foreign_policy": 0.5656565656565656, + "high_school_macroeconomics": 0.33676092544987146, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.37681159420289856, + "electrical_engineering": 0.3402777777777778, + "astronomy": 0.304635761589404, + "college_biology": 0.4125874125874126 + } + }, + "prompt_4": { + "accuracy": 0.3363603861279943, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.5151515151515151, + "clinical_knowledge": 0.32954545454545453, + "medical_genetics": 0.36363636363636365, + "high_school_us_history": 0.458128078817734, + "high_school_physics": 0.21333333333333335, + "high_school_world_history": 0.4322033898305085, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.29957805907172996, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.32323232323232326, + "high_school_biology": 0.3268608414239482, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.35161290322580646, + "professional_medicine": 0.2177121771217712, + "nutrition": 0.3475409836065574, + "global_facts": 0.2222222222222222, + "machine_learning": 0.32432432432432434, + "security_studies": 0.30327868852459017, + "public_relations": 0.3577981651376147, + "professional_psychology": 0.35515548281505727, + "prehistory": 0.3622291021671827, + "anatomy": 0.35074626865671643, + "human_sexuality": 0.4, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.4739583333333333, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.345679012345679, + "high_school_geography": 0.3756345177664975, + "elementary_mathematics": 0.22281167108753316, + "human_aging": 0.40540540540540543, + "college_mathematics": 0.2222222222222222, + "high_school_psychology": 0.41360294117647056, + "formal_logic": 0.296, + "high_school_statistics": 0.19069767441860466, + "international_law": 0.4666666666666667, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.2727272727272727, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.4578005115089514, + "high_school_chemistry": 0.21287128712871287, + "marketing": 0.5364806866952789, + "professional_law": 0.26810176125244617, + "management": 0.39215686274509803, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.4392523364485981, + "world_religions": 0.5176470588235295, + "sociology": 0.445, + "us_foreign_policy": 0.5050505050505051, + "high_school_macroeconomics": 0.30077120822622105, + "computer_security": 0.43434343434343436, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.32753623188405795, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.32450331125827814, + "college_biology": 0.3986013986013986 + } + }, + "prompt_5": { + "accuracy": 0.35437969252770823, + "category_acc": { + "high_school_european_history": 0.5, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.3371212121212121, + "medical_genetics": 0.3838383838383838, + "high_school_us_history": 0.5123152709359606, + "high_school_physics": 0.22666666666666666, + "high_school_world_history": 0.5211864406779662, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.3206751054852321, + "econometrics": 0.20353982300884957, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.343042071197411, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.2846975088967972, + "philosophy": 0.33225806451612905, + "professional_medicine": 0.26199261992619927, + "nutrition": 0.3475409836065574, + "global_facts": 0.2727272727272727, + "machine_learning": 0.3153153153153153, + "security_studies": 0.3442622950819672, + "public_relations": 0.42201834862385323, + "professional_psychology": 0.3780687397708674, + "prehistory": 0.3931888544891641, + "anatomy": 0.3582089552238806, + "human_sexuality": 0.4, + "college_medicine": 0.31976744186046513, + "high_school_government_and_politics": 0.4479166666666667, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.3888888888888889, + "high_school_geography": 0.40609137055837563, + "elementary_mathematics": 0.2519893899204244, + "human_aging": 0.40540540540540543, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.4375, + "formal_logic": 0.344, + "high_school_statistics": 0.20930232558139536, + "international_law": 0.525, + "high_school_mathematics": 0.22304832713754646, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.3034188034188034, + "miscellaneous": 0.46930946291560105, + "high_school_chemistry": 0.1782178217821782, + "marketing": 0.5793991416309013, + "professional_law": 0.32159165035877363, + "management": 0.38235294117647056, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.411214953271028, + "world_religions": 0.5352941176470588, + "sociology": 0.47, + "us_foreign_policy": 0.5454545454545454, + "high_school_macroeconomics": 0.2853470437017995, + "computer_security": 0.47474747474747475, + "moral_scenarios": 0.2539149888143177, + "moral_disputes": 0.34782608695652173, + "electrical_engineering": 0.2638888888888889, + "astronomy": 0.31125827814569534, + "college_biology": 0.3706293706293706 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2912332838038633 + }, + "prompt_2": { + "accuracy": 0.27191679049034173 + }, + "prompt_3": { + "accuracy": 0.2563150074294205 + }, + "prompt_4": { + "accuracy": 0.2786032689450223 + }, + "prompt_5": { + "accuracy": 0.2726597325408618 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2889165628891656, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.3333333333333333, + "college_physics": 0.25, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.25, + "college_economics": 0.35, + "business_administration": 0.3684210526315789, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.25, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.4444444444444444, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.28, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.21153846153846154, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.125, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.3888888888888889, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.14814814814814814, + "physician": 0.24074074074074073 + } + }, + "prompt_2": { + "accuracy": 0.25965130759651306, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.10344827586206896, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.375, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.2894736842105263, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.4074074074074074, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.28, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.19230769230769232, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.25, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.25925925925925924, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2222222222222222, + "physician": 0.24074074074074073 + } + }, + "prompt_3": { + "accuracy": 0.27023661270236615, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2619047619047619, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.375, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.25, + "college_economics": 0.3333333333333333, + "business_administration": 0.34210526315789475, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.4074074074074074, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.25, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.28, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.19230769230769232, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.25, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.18518518518518517, + "physician": 0.2037037037037037 + } + }, + "prompt_4": { + "accuracy": 0.2777085927770859, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.25, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.23809523809523808, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.39473684210526316, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.25, + "high_school_geography": 0.25, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.3333333333333333, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.25, + "high_school_history": 0.24, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.19230769230769232, + "sports_science": 0.2916666666666667, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3148148148148148, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2222222222222222, + "physician": 0.2777777777777778 + } + }, + "prompt_5": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2619047619047619, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.23684210526315788, + "marxism": 0.25, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.08333333333333333, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.37037037037037035, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.24, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.2692307692307692, + "sports_science": 0.16666666666666666, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.1388888888888889, + "tax_accountant": 0.16666666666666666, + "physician": 0.2777777777777778 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32616487455197135 + }, + "prompt_2": { + "accuracy": 0.30824372759856633 + }, + "prompt_3": { + "accuracy": 0.30824372759856633 + }, + "prompt_4": { + "accuracy": 0.27956989247311825 + }, + "prompt_5": { + "accuracy": 0.3046594982078853 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2848385425660508, + "category_acc": { + "agronomy": 0.2781065088757396, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.2865853658536585, + "arts": 0.24375, + "astronomy": 0.26666666666666666, + "business_ethics": 0.27751196172248804, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.33587786259541985, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.2848297213622291, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.25, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.2564102564102564, + "computer_science": 0.29411764705882354, + "computer_security": 0.3216374269005848, + "conceptual_physics": 0.3673469387755102, + "construction_project_management": 0.28776978417266186, + "economics": 0.2893081761006289, + "education": 0.26993865030674846, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.2857142857142857, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.31851851851851853, + "food_science": 0.34965034965034963, + "genetics": 0.26704545454545453, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.25, + "high_school_physics": 0.3, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.2777777777777778, + "international_law": 0.2810810810810811, + "journalism": 0.31976744186046513, + "jurisprudence": 0.31143552311435524, + "legal_and_moral_basis": 0.397196261682243, + "logical": 0.3008130081300813, + "machine_learning": 0.3114754098360656, + "management": 0.2857142857142857, + "marketing": 0.24444444444444444, + "marxist_theory": 0.32275132275132273, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.2206896551724138, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.3045977011494253, + "security_study": 0.2814814814814815, + "sociology": 0.3008849557522124, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.31952662721893493, + "world_history": 0.2981366459627329, + "world_religions": 0.26875 + } + }, + "prompt_2": { + "accuracy": 0.2688654809186669, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2682926829268293, + "arts": 0.25625, + "astronomy": 0.26666666666666666, + "business_ethics": 0.24880382775119617, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.2755417956656347, + "chinese_literature": 0.27450980392156865, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.25925925925925924, + "college_mathematics": 0.19047619047619047, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.2454212454212454, + "computer_science": 0.2696078431372549, + "computer_security": 0.3157894736842105, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.26618705035971224, + "economics": 0.31446540880503143, + "education": 0.3006134969325153, + "electrical_engineering": 0.26744186046511625, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.31512605042016806, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.2814814814814815, + "food_science": 0.3006993006993007, + "genetics": 0.24431818181818182, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.25, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2810810810810811, + "journalism": 0.25, + "jurisprudence": 0.2944038929440389, + "legal_and_moral_basis": 0.32242990654205606, + "logical": 0.2764227642276423, + "machine_learning": 0.32786885245901637, + "management": 0.2523809523809524, + "marketing": 0.24444444444444444, + "marxist_theory": 0.30158730158730157, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.2620689655172414, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.24568965517241378, + "public_relations": 0.26436781609195403, + "security_study": 0.2814814814814815, + "sociology": 0.2743362831858407, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.2781065088757396, + "world_history": 0.2360248447204969, + "world_religions": 0.25 + } + }, + "prompt_3": { + "accuracy": 0.27646347781039543, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.25, + "arts": 0.25, + "astronomy": 0.24848484848484848, + "business_ethics": 0.28708133971291866, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.32061068702290074, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.29906542056074764, + "chinese_history": 0.26625386996904027, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.25, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.27472527472527475, + "computer_science": 0.28431372549019607, + "computer_security": 0.30409356725146197, + "conceptual_physics": 0.3469387755102041, + "construction_project_management": 0.2302158273381295, + "economics": 0.3270440251572327, + "education": 0.27607361963190186, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.2857142857142857, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.32592592592592595, + "food_science": 0.32167832167832167, + "genetics": 0.23295454545454544, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.3006993006993007, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2756756756756757, + "journalism": 0.27906976744186046, + "jurisprudence": 0.3236009732360097, + "legal_and_moral_basis": 0.35046728971962615, + "logical": 0.2845528455284553, + "machine_learning": 0.3114754098360656, + "management": 0.24761904761904763, + "marketing": 0.28888888888888886, + "marxist_theory": 0.328042328042328, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2482758620689655, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.3103448275862069, + "security_study": 0.26666666666666666, + "sociology": 0.2831858407079646, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.3136094674556213, + "world_history": 0.2608695652173913, + "world_religions": 0.25625 + } + }, + "prompt_4": { + "accuracy": 0.28155758936280434, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.21621621621621623, + "ancient_chinese": 0.2621951219512195, + "arts": 0.26875, + "astronomy": 0.2606060606060606, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.3053435114503817, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.21495327102803738, + "chinese_history": 0.25386996904024767, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.24581005586592178, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.26851851851851855, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.26373626373626374, + "computer_science": 0.28921568627450983, + "computer_security": 0.30994152046783624, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.2589928057553957, + "economics": 0.32075471698113206, + "education": 0.27607361963190186, + "electrical_engineering": 0.29651162790697677, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.29831932773109243, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.3037037037037037, + "food_science": 0.3146853146853147, + "genetics": 0.2727272727272727, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.373015873015873, + "international_law": 0.2756756756756757, + "journalism": 0.27906976744186046, + "jurisprudence": 0.2798053527980535, + "legal_and_moral_basis": 0.37383177570093457, + "logical": 0.3008130081300813, + "machine_learning": 0.2786885245901639, + "management": 0.28095238095238095, + "marketing": 0.2777777777777778, + "marxist_theory": 0.3333333333333333, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.2689655172413793, + "philosophy": 0.37142857142857144, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.26540284360189575, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.30603448275862066, + "public_relations": 0.26436781609195403, + "security_study": 0.2740740740740741, + "sociology": 0.3230088495575221, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.3254437869822485, + "world_history": 0.2919254658385093, + "world_religions": 0.30625 + } + }, + "prompt_5": { + "accuracy": 0.2725781384907615, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.2621951219512195, + "arts": 0.2625, + "astronomy": 0.24242424242424243, + "business_ethics": 0.291866028708134, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.2900763358778626, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.29906542056074764, + "chinese_history": 0.2786377708978328, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.26256983240223464, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.2037037037037037, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.25274725274725274, + "computer_science": 0.28431372549019607, + "computer_security": 0.2982456140350877, + "conceptual_physics": 0.30612244897959184, + "construction_project_management": 0.2517985611510791, + "economics": 0.3018867924528302, + "education": 0.3006134969325153, + "electrical_engineering": 0.2441860465116279, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.21717171717171718, + "elementary_information_and_technology": 0.3235294117647059, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.3037037037037037, + "food_science": 0.2867132867132867, + "genetics": 0.26704545454545453, + "global_facts": 0.31543624161073824, + "high_school_biology": 0.23668639053254437, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.3220338983050847, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.3, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2810810810810811, + "journalism": 0.25, + "jurisprudence": 0.2773722627737226, + "legal_and_moral_basis": 0.308411214953271, + "logical": 0.3008130081300813, + "machine_learning": 0.29508196721311475, + "management": 0.2857142857142857, + "marketing": 0.26666666666666666, + "marxist_theory": 0.2962962962962963, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2620689655172414, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.2553191489361702, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.28735632183908044, + "security_study": 0.24444444444444444, + "sociology": 0.27876106194690264, + "sports_science": 0.2606060606060606, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.2958579881656805, + "world_history": 0.2919254658385093, + "world_religions": 0.25625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.2727272727272727 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.24242424242424243 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.17727272727272728 + }, + "prompt_2": { + "accuracy": 0.17727272727272728 + }, + "prompt_3": { + "accuracy": 0.17272727272727273 + }, + "prompt_4": { + "accuracy": 0.18181818181818182 + }, + "prompt_5": { + "accuracy": 0.175 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33796610169491526 + }, + "prompt_2": { + "accuracy": 0.3396610169491525 + }, + "prompt_3": { + "accuracy": 0.33016949152542374 + }, + "prompt_4": { + "accuracy": 0.3305084745762712 + }, + "prompt_5": { + "accuracy": 0.33864406779661016 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42969334330590875 + }, + "prompt_2": { + "accuracy": 0.3769633507853403 + }, + "prompt_3": { + "accuracy": 0.3511593118922962 + }, + "prompt_4": { + "accuracy": 0.41660433807030667 + }, + "prompt_5": { + "accuracy": 0.43268511593118925 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5854973052425282 + }, + "prompt_2": { + "accuracy": 0.6300832925036747 + }, + "prompt_3": { + "accuracy": 0.6163645271925526 + }, + "prompt_4": { + "accuracy": 0.6148946594806467 + }, + "prompt_5": { + "accuracy": 0.6266536011758942 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.28118448681569536, + "rouge2": 0.09331387602913904, + "rougeL": 0.2138809568322139, + "avg_rouge": 0.19612643989234943 + }, + "prompt_2": { + "rouge1": 0.24552471302516946, + "rouge2": 0.09095585006587337, + "rougeL": 0.18942433566482253, + "avg_rouge": 0.17530163291862177 + }, + "prompt_3": { + "rouge1": 0.24743888885496398, + "rouge2": 0.08474727575295327, + "rougeL": 0.1887867551689575, + "avg_rouge": 0.1736576399256249 + }, + "prompt_4": { + "rouge1": 0.28671673455192875, + "rouge2": 0.09321229706184096, + "rougeL": 0.21608740008314425, + "avg_rouge": 0.19867214389897134 + }, + "prompt_5": { + "rouge1": 0.27268733909259185, + "rouge2": 0.08940982363348364, + "rougeL": 0.20594642927007592, + "avg_rouge": 0.18934786399871714 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2199240504646152, + "rouge2": 0.06060248503052913, + "rougeL": 0.16336037920498936, + "avg_rouge": 0.14796230490004456 + }, + "prompt_2": { + "rouge1": 0.22536703471066732, + "rouge2": 0.06141649842395884, + "rougeL": 0.16734155049930152, + "avg_rouge": 0.15137502787797588 + }, + "prompt_3": { + "rouge1": 0.2222788038678049, + "rouge2": 0.06076147057263186, + "rougeL": 0.16555015921222727, + "avg_rouge": 0.149530144550888 + }, + "prompt_4": { + "rouge1": 0.21505697288969472, + "rouge2": 0.060482585415315926, + "rougeL": 0.15943192555385252, + "avg_rouge": 0.14499049461962107 + }, + "prompt_5": { + "rouge1": 0.22239609469882782, + "rouge2": 0.05979346403703034, + "rougeL": 0.16366890653848695, + "avg_rouge": 0.1486194884247817 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5022935779816514 + }, + "prompt_2": { + "accuracy": 0.6077981651376146 + }, + "prompt_3": { + "accuracy": 0.5263761467889908 + }, + "prompt_4": { + "accuracy": 0.6444954128440367 + }, + "prompt_5": { + "accuracy": 0.5022935779816514 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6490891658676894 + }, + "prompt_2": { + "accuracy": 0.675934803451582 + }, + "prompt_3": { + "accuracy": 0.5915627996164909 + }, + "prompt_4": { + "accuracy": 0.6184084372003835 + }, + "prompt_5": { + "accuracy": 0.6500479386385427 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.475 + }, + "prompt_2": { + "accuracy": 0.4895 + }, + "prompt_3": { + "accuracy": 0.443 + }, + "prompt_4": { + "accuracy": 0.4965 + }, + "prompt_5": { + "accuracy": 0.4965 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3185 + }, + "prompt_2": { + "accuracy": 0.346 + }, + "prompt_3": { + "accuracy": 0.336 + }, + "prompt_4": { + "accuracy": 0.341 + }, + "prompt_5": { + "accuracy": 0.3515 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.503 + }, + "prompt_2": { + "accuracy": 0.478 + }, + "prompt_3": { + "accuracy": 0.484 + }, + "prompt_4": { + "accuracy": 0.5165 + }, + "prompt_5": { + "accuracy": 0.486 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4788732394366197 + }, + "prompt_2": { + "accuracy": 0.43661971830985913 + }, + "prompt_3": { + "accuracy": 0.49295774647887325 + }, + "prompt_4": { + "accuracy": 0.4647887323943662 + }, + "prompt_5": { + "accuracy": 0.4647887323943662 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49097472924187724 + }, + "prompt_2": { + "accuracy": 0.5234657039711191 + }, + "prompt_3": { + "accuracy": 0.5234657039711191 + }, + "prompt_4": { + "accuracy": 0.5306859205776173 + }, + "prompt_5": { + "accuracy": 0.5126353790613718 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.47058823529411764 + }, + "prompt_2": { + "accuracy": 0.6421568627450981 + }, + "prompt_3": { + "accuracy": 0.4681372549019608 + }, + "prompt_4": { + "accuracy": 0.5024509803921569 + }, + "prompt_5": { + "accuracy": 0.6397058823529411 + } } }, "five_shot": { @@ -4060,53 +35775,1733 @@ "model_link": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4047619047619047, + "language_acc": { + "Malay": 0.36666666666666664, + "English": 0.5266666666666666, + "Vietnamese": 0.37333333333333335, + "Spanish": 0.44, + "Indonesian": 0.38, + "Filipino": 0.3933333333333333, + "Chinese": 0.35333333333333333 + }, + "consistency_score_2": 0.527936507936508, + "consistency_score_3": 0.35295238095238096, + "consistency_score_4": 0.26247619047619053, + "consistency_score_5": 0.206031746031746, + "consistency_score_6": 0.1676190476190476, + "consistency_score_7": 0.14, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.43333333333333335, + "Malay,Vietnamese": 0.54, + "Malay,Spanish": 0.46, + "Malay,Indonesian": 0.5933333333333334, + "Malay,Filipino": 0.52, + "Malay,Chinese": 0.5333333333333333, + "English,Vietnamese": 0.5066666666666667, + "English,Spanish": 0.56, + "English,Indonesian": 0.5133333333333333, + "English,Filipino": 0.5133333333333333, + "English,Chinese": 0.47333333333333333, + "Vietnamese,Spanish": 0.5133333333333333, + "Vietnamese,Indonesian": 0.62, + "Vietnamese,Filipino": 0.4666666666666667, + "Vietnamese,Chinese": 0.56, + "Spanish,Indonesian": 0.5666666666666667, + "Spanish,Filipino": 0.56, + "Spanish,Chinese": 0.5133333333333333, + "Indonesian,Filipino": 0.5266666666666666, + "Indonesian,Chinese": 0.56, + "Filipino,Chinese": 0.5533333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.32, + "Malay,English,Spanish": 0.30666666666666664, + "Malay,English,Indonesian": 0.34, + "Malay,English,Filipino": 0.31333333333333335, + "Malay,English,Chinese": 0.28, + "Malay,Vietnamese,Spanish": 0.34, + "Malay,Vietnamese,Indonesian": 0.44, + "Malay,Vietnamese,Filipino": 0.34, + "Malay,Vietnamese,Chinese": 0.3933333333333333, + "Malay,Spanish,Indonesian": 0.36666666666666664, + "Malay,Spanish,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese": 0.32, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.3933333333333333, + "Malay,Filipino,Chinese": 0.36, + "English,Vietnamese,Spanish": 0.36, + "English,Vietnamese,Indonesian": 0.37333333333333335, + "English,Vietnamese,Filipino": 0.32, + "English,Vietnamese,Chinese": 0.32666666666666666, + "English,Spanish,Indonesian": 0.38, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.3333333333333333, + "English,Indonesian,Filipino": 0.34, + "English,Indonesian,Chinese": 0.34, + "English,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian": 0.3933333333333333, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Filipino": 0.36, + "Vietnamese,Indonesian,Chinese": 0.42, + "Vietnamese,Filipino,Chinese": 0.35333333333333333, + "Spanish,Indonesian,Filipino": 0.37333333333333335, + "Spanish,Indonesian,Chinese": 0.36666666666666664, + "Spanish,Filipino,Chinese": 0.36666666666666664, + "Indonesian,Filipino,Chinese": 0.36666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.2866666666666667, + "Malay,English,Vietnamese,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Chinese": 0.23333333333333334, + "Malay,English,Spanish,Indonesian": 0.26, + "Malay,English,Spanish,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Chinese": 0.22666666666666666, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.3, + "Malay,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.26, + "Malay,Spanish,Indonesian,Chinese": 0.28, + "Malay,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian,Filipino": 0.25333333333333335, + "English,Spanish,Indonesian,Chinese": 0.26666666666666666, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.22, + "Malay,English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14 + } + }, + "AC3_2": 0.45821582542333467, + "AC3_3": 0.37708587703611085, + "AC3_4": 0.31844813284516826, + "AC3_5": 0.27306702302231617, + "AC3_6": 0.2370652087376017, + "AC3_7": 0.2080419580037685 + }, + "prompt_2": { + "overall_acc": 0.40095238095238095, + "language_acc": { + "Malay": 0.3933333333333333, + "English": 0.5466666666666666, + "Vietnamese": 0.32666666666666666, + "Spanish": 0.44, + "Indonesian": 0.4, + "Filipino": 0.3333333333333333, + "Chinese": 0.36666666666666664 + }, + "consistency_score_2": 0.48984126984126986, + "consistency_score_3": 0.31638095238095243, + "consistency_score_4": 0.23066666666666663, + "consistency_score_5": 0.17650793650793647, + "consistency_score_6": 0.13714285714285715, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4866666666666667, + "Malay,Vietnamese": 0.4666666666666667, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.5066666666666667, + "Malay,Filipino": 0.49333333333333335, + "Malay,Chinese": 0.4866666666666667, + "English,Vietnamese": 0.4533333333333333, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.4866666666666667, + "English,Filipino": 0.48, + "English,Chinese": 0.43333333333333335, + "Vietnamese,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian": 0.52, + "Vietnamese,Filipino": 0.48, + "Vietnamese,Chinese": 0.47333333333333333, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.46, + "Spanish,Chinese": 0.49333333333333335, + "Indonesian,Filipino": 0.5333333333333333, + "Indonesian,Chinese": 0.5533333333333333, + "Filipino,Chinese": 0.5266666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.29333333333333333, + "Malay,English,Spanish": 0.32666666666666666, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.32, + "Malay,English,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish": 0.26, + "Malay,Vietnamese,Indonesian": 0.3333333333333333, + "Malay,Vietnamese,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.3, + "Malay,Spanish,Chinese": 0.3, + "Malay,Indonesian,Filipino": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.36, + "Malay,Filipino,Chinese": 0.3333333333333333, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.30666666666666664, + "English,Vietnamese,Filipino": 0.29333333333333333, + "English,Vietnamese,Chinese": 0.29333333333333333, + "English,Spanish,Indonesian": 0.31333333333333335, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.3, + "English,Indonesian,Filipino": 0.3333333333333333, + "English,Indonesian,Chinese": 0.32, + "English,Filipino,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Vietnamese,Spanish,Chinese": 0.2733333333333333, + "Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "Vietnamese,Indonesian,Chinese": 0.34, + "Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Spanish,Indonesian,Filipino": 0.35333333333333333, + "Spanish,Indonesian,Chinese": 0.38, + "Spanish,Filipino,Chinese": 0.34, + "Indonesian,Filipino,Chinese": 0.38 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian": 0.24, + "Malay,English,Vietnamese,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Chinese": 0.22, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Chinese": 0.22, + "Malay,English,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.22, + "Malay,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.2, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.26, + "Malay,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.28, + "English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.22, + "English,Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.22, + "English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "English,Spanish,Indonesian,Chinese": 0.24, + "English,Spanish,Filipino,Chinese": 0.24, + "English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Indonesian,Filipino,Chinese": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.4409618843478776, + "AC3_3": 0.3536813089055037, + "AC3_4": 0.29285484514864835, + "AC3_5": 0.24511217566895943, + "AC3_6": 0.2043792667129662, + "AC3_7": 0.16850531578919375 + }, + "prompt_3": { + "overall_acc": 0.37238095238095237, + "language_acc": { + "Malay": 0.28, + "English": 0.5, + "Vietnamese": 0.36, + "Spanish": 0.4, + "Indonesian": 0.37333333333333335, + "Filipino": 0.31333333333333335, + "Chinese": 0.38 + }, + "consistency_score_2": 0.43968253968253956, + "consistency_score_3": 0.24971428571428578, + "consistency_score_4": 0.16190476190476194, + "consistency_score_5": 0.11174603174603175, + "consistency_score_6": 0.08, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3933333333333333, + "Malay,Vietnamese": 0.42, + "Malay,Spanish": 0.43333333333333335, + "Malay,Indonesian": 0.44, + "Malay,Filipino": 0.44, + "Malay,Chinese": 0.4, + "English,Vietnamese": 0.44666666666666666, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.44, + "English,Filipino": 0.48, + "English,Chinese": 0.37333333333333335, + "Vietnamese,Spanish": 0.47333333333333333, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,Filipino": 0.46, + "Vietnamese,Chinese": 0.38666666666666666, + "Spanish,Indonesian": 0.5266666666666666, + "Spanish,Filipino": 0.49333333333333335, + "Spanish,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.4533333333333333, + "Indonesian,Chinese": 0.36, + "Filipino,Chinese": 0.37333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24, + "Malay,English,Spanish": 0.24666666666666667, + "Malay,English,Indonesian": 0.22, + "Malay,English,Filipino": 0.24, + "Malay,English,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish": 0.25333333333333335, + "Malay,Vietnamese,Indonesian": 0.3, + "Malay,Vietnamese,Filipino": 0.24, + "Malay,Vietnamese,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian": 0.2866666666666667, + "Malay,Spanish,Filipino": 0.26, + "Malay,Spanish,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino": 0.24, + "Malay,Indonesian,Chinese": 0.20666666666666667, + "Malay,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish": 0.31333333333333335, + "English,Vietnamese,Indonesian": 0.29333333333333333, + "English,Vietnamese,Filipino": 0.28, + "English,Vietnamese,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian": 0.30666666666666664, + "English,Spanish,Filipino": 0.3, + "English,Spanish,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino": 0.2733333333333333, + "English,Indonesian,Chinese": 0.2, + "English,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian": 0.34, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino": 0.29333333333333333, + "Spanish,Indonesian,Chinese": 0.22666666666666666, + "Spanish,Filipino,Chinese": 0.22, + "Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Spanish,Filipino": 0.16, + "Malay,English,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian,Filipino": 0.2, + "English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "English,Spanish,Filipino,Chinese": 0.16666666666666666, + "English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12, + "Malay,English,Vietnamese,Spanish,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.4032428608163473, + "AC3_3": 0.29895372228594813, + "AC3_4": 0.22568542564318517, + "AC3_5": 0.17190569863740338, + "AC3_6": 0.13170526312878095, + "AC3_7": 0.10334801759724328 + }, + "prompt_4": { + "overall_acc": 0.3780952380952381, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.46, + "Vietnamese": 0.35333333333333333, + "Spanish": 0.42, + "Indonesian": 0.35333333333333333, + "Filipino": 0.3466666666666667, + "Chinese": 0.38 + }, + "consistency_score_2": 0.4933333333333334, + "consistency_score_3": 0.3142857142857142, + "consistency_score_4": 0.23142857142857146, + "consistency_score_5": 0.1853968253968254, + "consistency_score_6": 0.1552380952380952, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.48, + "Malay,Indonesian": 0.5266666666666666, + "Malay,Filipino": 0.49333333333333335, + "Malay,Chinese": 0.46, + "English,Vietnamese": 0.5133333333333333, + "English,Spanish": 0.5666666666666667, + "English,Indonesian": 0.4666666666666667, + "English,Filipino": 0.47333333333333333, + "English,Chinese": 0.46, + "Vietnamese,Spanish": 0.49333333333333335, + "Vietnamese,Indonesian": 0.5533333333333333, + "Vietnamese,Filipino": 0.49333333333333335, + "Vietnamese,Chinese": 0.5733333333333334, + "Spanish,Indonesian": 0.52, + "Spanish,Filipino": 0.44, + "Spanish,Chinese": 0.49333333333333335, + "Indonesian,Filipino": 0.4666666666666667, + "Indonesian,Chinese": 0.52, + "Filipino,Chinese": 0.48 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.28, + "Malay,English,Indonesian": 0.2733333333333333, + "Malay,English,Filipino": 0.25333333333333335, + "Malay,English,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.34, + "Malay,Vietnamese,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Chinese": 0.34, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.28, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.31333333333333335, + "Malay,Indonesian,Chinese": 0.34, + "Malay,Filipino,Chinese": 0.3, + "English,Vietnamese,Spanish": 0.36, + "English,Vietnamese,Indonesian": 0.32666666666666666, + "English,Vietnamese,Filipino": 0.32666666666666666, + "English,Vietnamese,Chinese": 0.35333333333333333, + "English,Spanish,Indonesian": 0.3466666666666667, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.32, + "English,Indonesian,Filipino": 0.2733333333333333, + "English,Indonesian,Chinese": 0.29333333333333333, + "English,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.35333333333333333, + "Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.4, + "Vietnamese,Filipino,Chinese": 0.3466666666666667, + "Spanish,Indonesian,Filipino": 0.28, + "Spanish,Indonesian,Chinese": 0.3333333333333333, + "Spanish,Filipino,Chinese": 0.29333333333333333, + "Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Chinese": 0.23333333333333334, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Chinese": 0.19333333333333333, + "Malay,English,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Indonesian,Chinese": 0.2, + "Malay,English,Filipino,Chinese": 0.18, + "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.24, + "Malay,Spanish,Indonesian,Filipino": 0.22, + "Malay,Spanish,Indonesian,Chinese": 0.24, + "Malay,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.26, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.26, + "English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Spanish,Filipino,Chinese": 0.22, + "English,Indonesian,Filipino,Chinese": 0.22, + "Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + } + }, + "AC3_2": 0.4280947176193626, + "AC3_3": 0.3432501473271195, + "AC3_4": 0.28711607138146644, + "AC3_5": 0.2487973171926011, + "AC3_6": 0.2201054421356009, + "AC3_7": 0.19714463062563586 + }, + "prompt_5": { + "overall_acc": 0.39142857142857146, + "language_acc": { + "Malay": 0.3933333333333333, + "English": 0.5, + "Vietnamese": 0.36666666666666664, + "Spanish": 0.38, + "Indonesian": 0.3933333333333333, + "Filipino": 0.35333333333333333, + "Chinese": 0.35333333333333333 + }, + "consistency_score_2": 0.47714285714285715, + "consistency_score_3": 0.296, + "consistency_score_4": 0.20514285714285707, + "consistency_score_5": 0.1514285714285714, + "consistency_score_6": 0.11904761904761905, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5, + "Malay,Vietnamese": 0.49333333333333335, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.56, + "Malay,Filipino": 0.44, + "Malay,Chinese": 0.49333333333333335, + "English,Vietnamese": 0.4666666666666667, + "English,Spanish": 0.4866666666666667, + "English,Indonesian": 0.5, + "English,Filipino": 0.46, + "English,Chinese": 0.4666666666666667, + "Vietnamese,Spanish": 0.4266666666666667, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Chinese": 0.48, + "Spanish,Indonesian": 0.47333333333333333, + "Spanish,Filipino": 0.4533333333333333, + "Spanish,Chinese": 0.3933333333333333, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.5133333333333333, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.31333333333333335, + "Malay,English,Spanish": 0.3, + "Malay,English,Indonesian": 0.36666666666666664, + "Malay,English,Filipino": 0.28, + "Malay,English,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish": 0.29333333333333333, + "Malay,Vietnamese,Indonesian": 0.34, + "Malay,Vietnamese,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Chinese": 0.31333333333333335, + "Malay,Spanish,Indonesian": 0.32, + "Malay,Spanish,Filipino": 0.26666666666666666, + "Malay,Spanish,Chinese": 0.23333333333333334, + "Malay,Indonesian,Filipino": 0.32, + "Malay,Indonesian,Chinese": 0.3333333333333333, + "Malay,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish": 0.29333333333333333, + "English,Vietnamese,Indonesian": 0.32, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian": 0.31333333333333335, + "English,Spanish,Filipino": 0.28, + "English,Spanish,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.3, + "English,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.24, + "Vietnamese,Indonesian,Filipino": 0.32, + "Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "Vietnamese,Filipino,Chinese": 0.32, + "Spanish,Indonesian,Filipino": 0.2866666666666667, + "Spanish,Indonesian,Chinese": 0.24666666666666667, + "Spanish,Filipino,Chinese": 0.24666666666666667, + "Indonesian,Filipino,Chinese": 0.3 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22, + "Malay,English,Vietnamese,Indonesian": 0.24, + "Malay,English,Vietnamese,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Chinese": 0.2, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Indonesian,Chinese": 0.22, + "Malay,English,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.24, + "Malay,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.22, + "English,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian,Filipino": 0.2, + "English,Spanish,Indonesian,Chinese": 0.18, + "English,Spanish,Filipino,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.4300563909279306, + "AC3_3": 0.3370906067672561, + "AC3_4": 0.2692008757074752, + "AC3_5": 0.21837593980939687, + "AC3_6": 0.18256929633950203, + "AC3_7": 0.1593023255489792 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3741883116883117, + "language_acc": { + "English": 0.4431818181818182, + "Vietnamese": 0.3806818181818182, + "Chinese": 0.38636363636363635, + "Indonesian": 0.3693181818181818, + "Filipino": 0.3181818181818182, + "Spanish": 0.38636363636363635, + "Malay": 0.3352272727272727 + }, + "consistency_score_2": 0.5524891774891776, + "consistency_score_3": 0.3780844155844157, + "consistency_score_4": 0.2839285714285714, + "consistency_score_5": 0.2245670995670996, + "consistency_score_6": 0.18344155844155846, + "consistency_score_7": 0.1534090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5113636363636364, + "English,Chinese": 0.5397727272727273, + "English,Indonesian": 0.5397727272727273, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.6590909090909091, + "English,Malay": 0.4659090909090909, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.6022727272727273, + "Vietnamese,Filipino": 0.5284090909090909, + "Vietnamese,Spanish": 0.5568181818181818, + "Vietnamese,Malay": 0.5681818181818182, + "Chinese,Indonesian": 0.5738636363636364, + "Chinese,Filipino": 0.5, + "Chinese,Spanish": 0.5397727272727273, + "Chinese,Malay": 0.5795454545454546, + "Indonesian,Filipino": 0.5454545454545454, + "Indonesian,Spanish": 0.6022727272727273, + "Indonesian,Malay": 0.7102272727272727, + "Filipino,Spanish": 0.4943181818181818, + "Filipino,Malay": 0.5681818181818182, + "Spanish,Malay": 0.5284090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3693181818181818, + "English,Vietnamese,Indonesian": 0.39204545454545453, + "English,Vietnamese,Filipino": 0.3125, + "English,Vietnamese,Spanish": 0.4034090909090909, + "English,Vietnamese,Malay": 0.32386363636363635, + "English,Chinese,Indonesian": 0.38636363636363635, + "English,Chinese,Filipino": 0.3181818181818182, + "English,Chinese,Spanish": 0.4147727272727273, + "English,Chinese,Malay": 0.3409090909090909, + "English,Indonesian,Filipino": 0.3352272727272727, + "English,Indonesian,Spanish": 0.44886363636363635, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Filipino,Spanish": 0.35795454545454547, + "English,Filipino,Malay": 0.3068181818181818, + "English,Spanish,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Indonesian": 0.4034090909090909, + "Vietnamese,Chinese,Filipino": 0.3465909090909091, + "Vietnamese,Chinese,Spanish": 0.36363636363636365, + "Vietnamese,Chinese,Malay": 0.38636363636363635, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish": 0.42045454545454547, + "Vietnamese,Indonesian,Malay": 0.4659090909090909, + "Vietnamese,Filipino,Spanish": 0.3409090909090909, + "Vietnamese,Filipino,Malay": 0.375, + "Vietnamese,Spanish,Malay": 0.375, + "Chinese,Indonesian,Filipino": 0.36363636363636365, + "Chinese,Indonesian,Spanish": 0.4034090909090909, + "Chinese,Indonesian,Malay": 0.4602272727272727, + "Chinese,Filipino,Spanish": 0.32386363636363635, + "Chinese,Filipino,Malay": 0.38636363636363635, + "Chinese,Spanish,Malay": 0.3693181818181818, + "Indonesian,Filipino,Spanish": 0.375, + "Indonesian,Filipino,Malay": 0.44886363636363635, + "Indonesian,Spanish,Malay": 0.44886363636363635, + "Filipino,Spanish,Malay": 0.3522727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.3125, + "English,Vietnamese,Chinese,Filipino": 0.25, + "English,Vietnamese,Chinese,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Malay": 0.26136363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Filipino,Spanish": 0.2556818181818182, + "English,Vietnamese,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Spanish,Malay": 0.2556818181818182, + "English,Chinese,Indonesian,Filipino": 0.26704545454545453, + "English,Chinese,Indonesian,Spanish": 0.3352272727272727, + "English,Chinese,Indonesian,Malay": 0.3125, + "English,Chinese,Filipino,Spanish": 0.26704545454545453, + "English,Chinese,Filipino,Malay": 0.24431818181818182, + "English,Chinese,Spanish,Malay": 0.2840909090909091, + "English,Indonesian,Filipino,Spanish": 0.30113636363636365, + "English,Indonesian,Filipino,Malay": 0.26704545454545453, + "English,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.32954545454545453, + "Vietnamese,Chinese,Filipino,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish,Malay": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.2727272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.32954545454545453, + "Chinese,Filipino,Spanish,Malay": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Malay": 0.3125 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Spanish,Malay": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1875, + "English,Chinese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.2727272727272727, + "English,Chinese,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + } + }, + "AC3_2": 0.4461854203708482, + "AC3_3": 0.3761262744092495, + "AC3_4": 0.32286894772738706, + "AC3_5": 0.28068350533161174, + "AC3_6": 0.24619085422530743, + "AC3_7": 0.21760489506365058 + }, + "prompt_2": { + "overall_acc": 0.36607142857142855, + "language_acc": { + "English": 0.4090909090909091, + "Vietnamese": 0.3352272727272727, + "Chinese": 0.39204545454545453, + "Indonesian": 0.39204545454545453, + "Filipino": 0.2897727272727273, + "Spanish": 0.4034090909090909, + "Malay": 0.3409090909090909 + }, + "consistency_score_2": 0.5422077922077922, + "consistency_score_3": 0.36331168831168825, + "consistency_score_4": 0.26720779220779217, + "consistency_score_5": 0.2067099567099567, + "consistency_score_6": 0.16396103896103895, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.48295454545454547, + "English,Chinese": 0.5170454545454546, + "English,Indonesian": 0.5795454545454546, + "English,Filipino": 0.4715909090909091, + "English,Spanish": 0.6647727272727273, + "English,Malay": 0.48863636363636365, + "Vietnamese,Chinese": 0.5227272727272727, + "Vietnamese,Indonesian": 0.5909090909090909, + "Vietnamese,Filipino": 0.5056818181818182, + "Vietnamese,Spanish": 0.5340909090909091, + "Vietnamese,Malay": 0.5568181818181818, + "Chinese,Indonesian": 0.6079545454545454, + "Chinese,Filipino": 0.44886363636363635, + "Chinese,Spanish": 0.5625, + "Chinese,Malay": 0.5397727272727273, + "Indonesian,Filipino": 0.5284090909090909, + "Indonesian,Spanish": 0.5965909090909091, + "Indonesian,Malay": 0.6534090909090909, + "Filipino,Spanish": 0.48295454545454547, + "Filipino,Malay": 0.5340909090909091, + "Spanish,Malay": 0.5170454545454546 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.32954545454545453, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Filipino": 0.2897727272727273, + "English,Vietnamese,Spanish": 0.3806818181818182, + "English,Vietnamese,Malay": 0.3125, + "English,Chinese,Indonesian": 0.4034090909090909, + "English,Chinese,Filipino": 0.2840909090909091, + "English,Chinese,Spanish": 0.42045454545454547, + "English,Chinese,Malay": 0.32386363636363635, + "English,Indonesian,Filipino": 0.3522727272727273, + "English,Indonesian,Spanish": 0.4602272727272727, + "English,Indonesian,Malay": 0.4034090909090909, + "English,Filipino,Spanish": 0.3522727272727273, + "English,Filipino,Malay": 0.3125, + "English,Spanish,Malay": 0.375, + "Vietnamese,Chinese,Indonesian": 0.4090909090909091, + "Vietnamese,Chinese,Filipino": 0.30113636363636365, + "Vietnamese,Chinese,Spanish": 0.3522727272727273, + "Vietnamese,Chinese,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,Filipino": 0.35795454545454547, + "Vietnamese,Indonesian,Spanish": 0.4090909090909091, + "Vietnamese,Indonesian,Malay": 0.42613636363636365, + "Vietnamese,Filipino,Spanish": 0.3068181818181818, + "Vietnamese,Filipino,Malay": 0.3465909090909091, + "Vietnamese,Spanish,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino": 0.3522727272727273, + "Chinese,Indonesian,Spanish": 0.4147727272727273, + "Chinese,Indonesian,Malay": 0.4431818181818182, + "Chinese,Filipino,Spanish": 0.3068181818181818, + "Chinese,Filipino,Malay": 0.3352272727272727, + "Chinese,Spanish,Malay": 0.36363636363636365, + "Indonesian,Filipino,Spanish": 0.36363636363636365, + "Indonesian,Filipino,Malay": 0.4090909090909091, + "Indonesian,Spanish,Malay": 0.42045454545454547, + "Filipino,Spanish,Malay": 0.32386363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "English,Vietnamese,Chinese,Filipino": 0.21022727272727273, + "English,Vietnamese,Chinese,Spanish": 0.2784090909090909, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.24431818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.3125, + "English,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Spanish,Malay": 0.26136363636363635, + "English,Chinese,Indonesian,Filipino": 0.24431818181818182, + "English,Chinese,Indonesian,Spanish": 0.3409090909090909, + "English,Chinese,Indonesian,Malay": 0.29545454545454547, + "English,Chinese,Filipino,Spanish": 0.24431818181818182, + "English,Chinese,Filipino,Malay": 0.22727272727272727, + "English,Chinese,Spanish,Malay": 0.2784090909090909, + "English,Indonesian,Filipino,Spanish": 0.30113636363636365, + "English,Indonesian,Filipino,Malay": 0.2727272727272727, + "English,Indonesian,Spanish,Malay": 0.3352272727272727, + "English,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Malay": 0.3068181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.22727272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.29545454545454547, + "Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2784090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1875, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + } + }, + "AC3_2": 0.43706115149024366, + "AC3_3": 0.36468633742734635, + "AC3_4": 0.3089226205246979, + "AC3_5": 0.26422160734978106, + "AC3_6": 0.2264821701607267, + "AC3_7": 0.19260620911155377 + }, + "prompt_3": { + "overall_acc": 0.3660714285714285, + "language_acc": { + "English": 0.45454545454545453, + "Vietnamese": 0.36363636363636365, + "Chinese": 0.375, + "Indonesian": 0.36363636363636365, + "Filipino": 0.2727272727272727, + "Spanish": 0.39204545454545453, + "Malay": 0.3409090909090909 + }, + "consistency_score_2": 0.5162337662337663, + "consistency_score_3": 0.33165584415584415, + "consistency_score_4": 0.23425324675324677, + "consistency_score_5": 0.174512987012987, + "consistency_score_6": 0.13474025974025974, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5056818181818182, + "English,Chinese": 0.5397727272727273, + "English,Indonesian": 0.5454545454545454, + "English,Filipino": 0.42045454545454547, + "English,Spanish": 0.6022727272727273, + "English,Malay": 0.48863636363636365, + "Vietnamese,Chinese": 0.5056818181818182, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Filipino": 0.4602272727272727, + "Vietnamese,Spanish": 0.5170454545454546, + "Vietnamese,Malay": 0.5454545454545454, + "Chinese,Indonesian": 0.5681818181818182, + "Chinese,Filipino": 0.44886363636363635, + "Chinese,Spanish": 0.5056818181818182, + "Chinese,Malay": 0.5681818181818182, + "Indonesian,Filipino": 0.5, + "Indonesian,Spanish": 0.5113636363636364, + "Indonesian,Malay": 0.6363636363636364, + "Filipino,Spanish": 0.4431818181818182, + "Filipino,Malay": 0.5056818181818182, + "Spanish,Malay": 0.48295454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3465909090909091, + "English,Vietnamese,Indonesian": 0.36363636363636365, + "English,Vietnamese,Filipino": 0.2727272727272727, + "English,Vietnamese,Spanish": 0.36363636363636365, + "English,Vietnamese,Malay": 0.3352272727272727, + "English,Chinese,Indonesian": 0.375, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.3693181818181818, + "English,Chinese,Malay": 0.3409090909090909, + "English,Indonesian,Filipino": 0.30113636363636365, + "English,Indonesian,Spanish": 0.38636363636363635, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.2840909090909091, + "English,Spanish,Malay": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian": 0.3693181818181818, + "Vietnamese,Chinese,Filipino": 0.29545454545454547, + "Vietnamese,Chinese,Spanish": 0.3181818181818182, + "Vietnamese,Chinese,Malay": 0.375, + "Vietnamese,Indonesian,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Filipino,Spanish": 0.2840909090909091, + "Vietnamese,Filipino,Malay": 0.3125, + "Vietnamese,Spanish,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.4318181818181818, + "Chinese,Filipino,Spanish": 0.24431818181818182, + "Chinese,Filipino,Malay": 0.32386363636363635, + "Chinese,Spanish,Malay": 0.32954545454545453, + "Indonesian,Filipino,Spanish": 0.2840909090909091, + "Indonesian,Filipino,Malay": 0.3806818181818182, + "Indonesian,Spanish,Malay": 0.3522727272727273, + "Filipino,Spanish,Malay": 0.2840909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Chinese,Filipino": 0.20454545454545456, + "English,Vietnamese,Chinese,Spanish": 0.25, + "English,Vietnamese,Chinese,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Filipino,Malay": 0.21022727272727273, + "English,Vietnamese,Spanish,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Filipino": 0.20454545454545456, + "English,Chinese,Indonesian,Spanish": 0.2784090909090909, + "English,Chinese,Indonesian,Malay": 0.2840909090909091, + "English,Chinese,Filipino,Spanish": 0.18181818181818182, + "English,Chinese,Filipino,Malay": 0.18181818181818182, + "English,Chinese,Spanish,Malay": 0.23295454545454544, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.2897727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.25, + "Vietnamese,Indonesian,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.26704545454545453, + "Chinese,Indonesian,Spanish,Malay": 0.2727272727272727, + "Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546 + } + }, + "AC3_2": 0.4283742935511328, + "AC3_3": 0.34801485735888316, + "AC3_4": 0.2856901413415786, + "AC3_5": 0.2363524238087022, + "AC3_6": 0.19697846719845466, + "AC3_7": 0.16673801366345525 + }, + "prompt_4": { + "overall_acc": 0.3717532467532468, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.3522727272727273, + "Chinese": 0.4034090909090909, + "Indonesian": 0.36363636363636365, + "Filipino": 0.32954545454545453, + "Spanish": 0.39204545454545453, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.5257034632034632, + "consistency_score_3": 0.34642857142857125, + "consistency_score_4": 0.25129870129870135, + "consistency_score_5": 0.19101731601731603, + "consistency_score_6": 0.1477272727272727, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4431818181818182, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.5340909090909091, + "English,Filipino": 0.42613636363636365, + "English,Spanish": 0.6477272727272727, + "English,Malay": 0.48295454545454547, + "Vietnamese,Chinese": 0.4659090909090909, + "Vietnamese,Indonesian": 0.5738636363636364, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,Spanish": 0.48863636363636365, + "Vietnamese,Malay": 0.5454545454545454, + "Chinese,Indonesian": 0.5454545454545454, + "Chinese,Filipino": 0.48863636363636365, + "Chinese,Spanish": 0.5170454545454546, + "Chinese,Malay": 0.5454545454545454, + "Indonesian,Filipino": 0.5511363636363636, + "Indonesian,Spanish": 0.5625, + "Indonesian,Malay": 0.6761363636363636, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.5738636363636364, + "Spanish,Malay": 0.5227272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.30113636363636365, + "English,Vietnamese,Indonesian": 0.3409090909090909, + "English,Vietnamese,Filipino": 0.24431818181818182, + "English,Vietnamese,Spanish": 0.3465909090909091, + "English,Vietnamese,Malay": 0.29545454545454547, + "English,Chinese,Indonesian": 0.36363636363636365, + "English,Chinese,Filipino": 0.2784090909090909, + "English,Chinese,Spanish": 0.3977272727272727, + "English,Chinese,Malay": 0.3181818181818182, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.4318181818181818, + "English,Indonesian,Malay": 0.39204545454545453, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Filipino,Malay": 0.30113636363636365, + "English,Spanish,Malay": 0.3693181818181818, + "Vietnamese,Chinese,Indonesian": 0.3409090909090909, + "Vietnamese,Chinese,Filipino": 0.2727272727272727, + "Vietnamese,Chinese,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish": 0.3806818181818182, + "Vietnamese,Indonesian,Malay": 0.4431818181818182, + "Vietnamese,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Spanish,Malay": 0.3465909090909091, + "Chinese,Indonesian,Filipino": 0.3465909090909091, + "Chinese,Indonesian,Spanish": 0.36363636363636365, + "Chinese,Indonesian,Malay": 0.42613636363636365, + "Chinese,Filipino,Spanish": 0.29545454545454547, + "Chinese,Filipino,Malay": 0.35795454545454547, + "Chinese,Spanish,Malay": 0.3465909090909091, + "Indonesian,Filipino,Spanish": 0.3522727272727273, + "Indonesian,Filipino,Malay": 0.4375, + "Indonesian,Spanish,Malay": 0.4147727272727273, + "Filipino,Spanish,Malay": 0.3352272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Chinese,Filipino": 0.16477272727272727, + "English,Vietnamese,Chinese,Spanish": 0.25, + "English,Vietnamese,Chinese,Malay": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.29545454545454547, + "English,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Filipino,Malay": 0.1875, + "English,Vietnamese,Spanish,Malay": 0.25, + "English,Chinese,Indonesian,Filipino": 0.23863636363636365, + "English,Chinese,Indonesian,Spanish": 0.3125, + "English,Chinese,Indonesian,Malay": 0.2897727272727273, + "English,Chinese,Filipino,Spanish": 0.22727272727272727, + "English,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Spanish,Malay": 0.2727272727272727, + "English,Indonesian,Filipino,Spanish": 0.2784090909090909, + "English,Indonesian,Filipino,Malay": 0.26704545454545453, + "English,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.25, + "Chinese,Indonesian,Filipino,Malay": 0.29545454545454547, + "Chinese,Indonesian,Spanish,Malay": 0.29545454545454547, + "Chinese,Filipino,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2784090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.1875, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.25, + "English,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + } + }, + "AC3_2": 0.43552400262952884, + "AC3_3": 0.35864440708000267, + "AC3_4": 0.299882243815353, + "AC3_5": 0.2523632616934187, + "AC3_6": 0.21143465905020792, + "AC3_7": 0.1740650653335523 + }, + "prompt_5": { + "overall_acc": 0.3612012987012987, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.3522727272727273, + "Chinese": 0.36363636363636365, + "Indonesian": 0.3522727272727273, + "Filipino": 0.3181818181818182, + "Spanish": 0.38636363636363635, + "Malay": 0.35795454545454547 + }, + "consistency_score_2": 0.5267857142857143, + "consistency_score_3": 0.35097402597402594, + "consistency_score_4": 0.25925324675324674, + "consistency_score_5": 0.20183982683982682, + "consistency_score_6": 0.16152597402597402, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4943181818181818, + "English,Chinese": 0.48863636363636365, + "English,Indonesian": 0.5340909090909091, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.6420454545454546, + "English,Malay": 0.4659090909090909, + "Vietnamese,Chinese": 0.48295454545454547, + "Vietnamese,Indonesian": 0.6136363636363636, + "Vietnamese,Filipino": 0.5284090909090909, + "Vietnamese,Spanish": 0.4943181818181818, + "Vietnamese,Malay": 0.5454545454545454, + "Chinese,Indonesian": 0.5738636363636364, + "Chinese,Filipino": 0.4431818181818182, + "Chinese,Spanish": 0.5511363636363636, + "Chinese,Malay": 0.48295454545454547, + "Indonesian,Filipino": 0.5738636363636364, + "Indonesian,Spanish": 0.5795454545454546, + "Indonesian,Malay": 0.6931818181818182, + "Filipino,Spanish": 0.42045454545454547, + "Filipino,Malay": 0.5056818181818182, + "Spanish,Malay": 0.5 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3181818181818182, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Filipino": 0.30113636363636365, + "English,Vietnamese,Spanish": 0.3693181818181818, + "English,Vietnamese,Malay": 0.32386363636363635, + "English,Chinese,Indonesian": 0.3693181818181818, + "English,Chinese,Filipino": 0.2727272727272727, + "English,Chinese,Spanish": 0.3977272727272727, + "English,Chinese,Malay": 0.29545454545454547, + "English,Indonesian,Filipino": 0.3409090909090909, + "English,Indonesian,Spanish": 0.4318181818181818, + "English,Indonesian,Malay": 0.3977272727272727, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Filipino,Malay": 0.2897727272727273, + "English,Spanish,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Indonesian": 0.375, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.3181818181818182, + "Vietnamese,Chinese,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Filipino": 0.4147727272727273, + "Vietnamese,Indonesian,Spanish": 0.3806818181818182, + "Vietnamese,Indonesian,Malay": 0.45454545454545453, + "Vietnamese,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Filipino,Malay": 0.35795454545454547, + "Vietnamese,Spanish,Malay": 0.32386363636363635, + "Chinese,Indonesian,Filipino": 0.36363636363636365, + "Chinese,Indonesian,Spanish": 0.4034090909090909, + "Chinese,Indonesian,Malay": 0.4090909090909091, + "Chinese,Filipino,Spanish": 0.2840909090909091, + "Chinese,Filipino,Malay": 0.3125, + "Chinese,Spanish,Malay": 0.3352272727272727, + "Indonesian,Filipino,Spanish": 0.3409090909090909, + "Indonesian,Filipino,Malay": 0.4375, + "Indonesian,Spanish,Malay": 0.42613636363636365, + "Filipino,Spanish,Malay": 0.2897727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "English,Vietnamese,Chinese,Filipino": 0.20454545454545456, + "English,Vietnamese,Chinese,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "English,Vietnamese,Indonesian,Spanish": 0.3068181818181818, + "English,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "English,Vietnamese,Filipino,Spanish": 0.23295454545454544, + "English,Vietnamese,Filipino,Malay": 0.2215909090909091, + "English,Vietnamese,Spanish,Malay": 0.26704545454545453, + "English,Chinese,Indonesian,Filipino": 0.25, + "English,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.2784090909090909, + "English,Chinese,Filipino,Spanish": 0.23295454545454544, + "English,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Spanish,Malay": 0.2556818181818182, + "English,Indonesian,Filipino,Spanish": 0.2727272727272727, + "English,Indonesian,Filipino,Malay": 0.26704545454545453, + "English,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.29545454545454547, + "Vietnamese,Filipino,Spanish,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.2840909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.2727272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.19886363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.25, + "English,Vietnamese,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1875, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + } + }, + "AC3_2": 0.42855510572348054, + "AC3_3": 0.3560142273455917, + "AC3_4": 0.30185163471114795, + "AC3_5": 0.25896796615407164, + "AC3_6": 0.22322688952735298, + "AC3_7": 0.1919254425052359 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4077669902912621 + }, + "prompt_2": { + "accuracy": 0.44660194174757284 + }, + "prompt_3": { + "accuracy": 0.42718446601941745 + }, + "prompt_4": { + "accuracy": 0.4174757281553398 + }, + "prompt_5": { + "accuracy": 0.4368932038834951 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3142857142857143 + }, + "prompt_2": { + "accuracy": 0.3047619047619048 + }, + "prompt_3": { + "accuracy": 0.34285714285714286 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.3619047619047619 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38317757009345793 + }, + "prompt_2": { + "accuracy": 0.45794392523364486 + }, + "prompt_3": { + "accuracy": 0.5233644859813084 + }, + "prompt_4": { + "accuracy": 0.40186915887850466 + }, + "prompt_5": { + "accuracy": 0.4672897196261682 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.5, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.7, + "film": 0.2, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.7, + "culture": 0.5, + "film": 0.3, + "law": 0.2, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.5, + "history": 0.2, + "literature": 0.2, + "politics": 0.6, + "culture": 0.3, + "film": 0.3, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.5, + "culture": 0.5, + "film": 0.4, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_5": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.6, + "culture": 0.3, + "film": 0.4, + "law": 0.4, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.16118029254380573 + }, + "prompt_2": { + "bleu_score": 0.17132606501358158 + }, + "prompt_3": { + "bleu_score": 0.17099122972890873 + }, + "prompt_4": { + "bleu_score": 0.13212700538241728 + }, + "prompt_5": { + "bleu_score": 0.12931178340729102 + } }, "indommlu": { "prompt_1": -1, @@ -4116,179 +37511,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.12697856561896131 + }, + "prompt_2": { + "bleu_score": 0.19373624451910515 + }, + "prompt_3": { + "bleu_score": 0.1964270980872178 + }, + "prompt_4": { + "bleu_score": 0.17192589562871752 + }, + "prompt_5": { + "bleu_score": 0.16258965500852568 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07611358656798692 + }, + "prompt_2": { + "bleu_score": 0.1505276369962426 + }, + "prompt_3": { + "bleu_score": 0.1472413472807031 + }, + "prompt_4": { + "bleu_score": 0.12943833789303555 + }, + "prompt_5": { + "bleu_score": 0.13666592724690585 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06335447509619128 + }, + "prompt_2": { + "bleu_score": 0.11606202497694892 + }, + "prompt_3": { + "bleu_score": 0.1132366541993831 + }, + "prompt_4": { + "bleu_score": 0.09974517382489148 + }, + "prompt_5": { + "bleu_score": 0.15550715308466337 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09680686879468663 + }, + "prompt_2": { + "bleu_score": 0.1670411439704822 + }, + "prompt_3": { + "bleu_score": 0.16381439862453073 + }, + "prompt_4": { + "bleu_score": 0.148689576132622 + }, + "prompt_5": { + "bleu_score": 0.14101463475586926 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4340723453908985 + }, + "prompt_2": { + "accuracy": 0.4620770128354726 + }, + "prompt_3": { + "accuracy": 0.4457409568261377 + }, + "prompt_4": { + "accuracy": 0.4200700116686114 + }, + "prompt_5": { + "accuracy": 0.42707117852975496 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.44797997854844473, + "category_acc": { + "high_school_european_history": 0.6097560975609756, + "business_ethics": 0.47474747474747475, + "clinical_knowledge": 0.44696969696969696, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.6354679802955665, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.6440677966101694, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.4692556634304207, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.3274021352313167, + "philosophy": 0.5032258064516129, + "professional_medicine": 0.3985239852398524, + "nutrition": 0.49508196721311476, + "global_facts": 0.43434343434343436, + "machine_learning": 0.32432432432432434, + "security_studies": 0.42213114754098363, + "public_relations": 0.45871559633027525, + "professional_psychology": 0.4582651391162029, + "prehistory": 0.5139318885448917, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.4076923076923077, + "college_medicine": 0.3488372093023256, + "high_school_government_and_politics": 0.6145833333333334, + "college_chemistry": 0.2727272727272727, + "logical_fallacies": 0.5123456790123457, + "high_school_geography": 0.5786802030456852, + "elementary_mathematics": 0.3925729442970822, + "human_aging": 0.5225225225225225, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.6102941176470589, + "formal_logic": 0.336, + "high_school_statistics": 0.2837209302325581, + "international_law": 0.6416666666666667, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.690537084398977, + "high_school_chemistry": 0.3316831683168317, + "marketing": 0.6952789699570815, + "professional_law": 0.365296803652968, + "management": 0.5490196078431373, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.5233644859813084, + "world_religions": 0.7058823529411765, + "sociology": 0.61, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.37275064267352187, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.463768115942029, + "electrical_engineering": 0.375, + "astronomy": 0.44370860927152317, + "college_biology": 0.5034965034965035 + } + }, + "prompt_2": { + "accuracy": 0.448337504469074, + "category_acc": { + "high_school_european_history": 0.6219512195121951, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.4772727272727273, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.6305418719211823, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.6313559322033898, + "virology": 0.42424242424242425, + "high_school_microeconomics": 0.38396624472573837, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.4563106796116505, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.34519572953736655, + "philosophy": 0.5193548387096775, + "professional_medicine": 0.4132841328413284, + "nutrition": 0.47540983606557374, + "global_facts": 0.37373737373737376, + "machine_learning": 0.2972972972972973, + "security_studies": 0.4426229508196721, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.42225859247135844, + "prehistory": 0.544891640866873, + "anatomy": 0.48507462686567165, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.3488372093023256, + "high_school_government_and_politics": 0.6822916666666666, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.5679012345679012, + "high_school_geography": 0.5736040609137056, + "elementary_mathematics": 0.32891246684350134, + "human_aging": 0.5315315315315315, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.6176470588235294, + "formal_logic": 0.328, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.6, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.43434343434343436, + "conceptual_physics": 0.38461538461538464, + "miscellaneous": 0.6611253196930946, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.7167381974248928, + "professional_law": 0.3561643835616438, + "management": 0.5980392156862745, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.5420560747663551, + "world_religions": 0.7, + "sociology": 0.6, + "us_foreign_policy": 0.6464646464646465, + "high_school_macroeconomics": 0.3676092544987147, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.4811594202898551, + "electrical_engineering": 0.375, + "astronomy": 0.4503311258278146, + "college_biology": 0.4755244755244755 + } + }, + "prompt_3": { + "accuracy": 0.4337504469074008, + "category_acc": { + "high_school_european_history": 0.5853658536585366, + "business_ethics": 0.4444444444444444, + "clinical_knowledge": 0.45454545454545453, + "medical_genetics": 0.4444444444444444, + "high_school_us_history": 0.6502463054187192, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.6271186440677966, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.44336569579288027, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3274021352313167, + "philosophy": 0.5258064516129032, + "professional_medicine": 0.42066420664206644, + "nutrition": 0.46885245901639344, + "global_facts": 0.4444444444444444, + "machine_learning": 0.2972972972972973, + "security_studies": 0.4344262295081967, + "public_relations": 0.45871559633027525, + "professional_psychology": 0.40425531914893614, + "prehistory": 0.5046439628482973, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.36923076923076925, + "college_medicine": 0.3546511627906977, + "high_school_government_and_politics": 0.625, + "college_chemistry": 0.2727272727272727, + "logical_fallacies": 0.4876543209876543, + "high_school_geography": 0.5583756345177665, + "elementary_mathematics": 0.34748010610079577, + "human_aging": 0.536036036036036, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.5514705882352942, + "formal_logic": 0.312, + "high_school_statistics": 0.2651162790697674, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.20817843866171004, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.36752136752136755, + "miscellaneous": 0.6713554987212276, + "high_school_chemistry": 0.3118811881188119, + "marketing": 0.6437768240343348, + "professional_law": 0.3542074363992172, + "management": 0.5784313725490197, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.7, + "sociology": 0.56, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.36246786632390743, + "computer_security": 0.5353535353535354, + "moral_scenarios": 0.25727069351230425, + "moral_disputes": 0.4463768115942029, + "electrical_engineering": 0.3402777777777778, + "astronomy": 0.3841059602649007, + "college_biology": 0.43356643356643354 + } + }, + "prompt_4": { + "accuracy": 0.42402574186628533, + "category_acc": { + "high_school_european_history": 0.5914634146341463, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.38636363636363635, + "medical_genetics": 0.43434343434343436, + "high_school_us_history": 0.6354679802955665, + "high_school_physics": 0.3, + "high_school_world_history": 0.6398305084745762, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.350210970464135, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.4692556634304207, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.3096085409252669, + "philosophy": 0.4645161290322581, + "professional_medicine": 0.3874538745387454, + "nutrition": 0.4262295081967213, + "global_facts": 0.4444444444444444, + "machine_learning": 0.2882882882882883, + "security_studies": 0.4262295081967213, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.3944353518821604, + "prehistory": 0.4613003095975232, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.3769230769230769, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.6354166666666666, + "college_chemistry": 0.2828282828282828, + "logical_fallacies": 0.4382716049382716, + "high_school_geography": 0.5177664974619289, + "elementary_mathematics": 0.41379310344827586, + "human_aging": 0.45495495495495497, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.5477941176470589, + "formal_logic": 0.344, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3504273504273504, + "miscellaneous": 0.6432225063938619, + "high_school_chemistry": 0.3118811881188119, + "marketing": 0.6781115879828327, + "professional_law": 0.3378995433789954, + "management": 0.5392156862745098, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.4953271028037383, + "world_religions": 0.6941176470588235, + "sociology": 0.545, + "us_foreign_policy": 0.6060606060606061, + "high_school_macroeconomics": 0.32390745501285345, + "computer_security": 0.48484848484848486, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.45507246376811594, + "electrical_engineering": 0.3819444444444444, + "astronomy": 0.423841059602649, + "college_biology": 0.3916083916083916 + } + }, + "prompt_5": { + "accuracy": 0.424168752234537, + "category_acc": { + "high_school_european_history": 0.5365853658536586, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.4128787878787879, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.6059113300492611, + "high_school_physics": 0.24666666666666667, + "high_school_world_history": 0.597457627118644, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.3333333333333333, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.5048543689320388, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.5, + "professional_medicine": 0.42066420664206644, + "nutrition": 0.4524590163934426, + "global_facts": 0.5151515151515151, + "machine_learning": 0.3153153153153153, + "security_studies": 0.4139344262295082, + "public_relations": 0.4036697247706422, + "professional_psychology": 0.4353518821603928, + "prehistory": 0.48297213622291024, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.4153846153846154, + "college_medicine": 0.313953488372093, + "high_school_government_and_politics": 0.59375, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.5123456790123457, + "high_school_geography": 0.4873096446700508, + "elementary_mathematics": 0.3740053050397878, + "human_aging": 0.4369369369369369, + "college_mathematics": 0.20202020202020202, + "high_school_psychology": 0.5808823529411765, + "formal_logic": 0.272, + "high_school_statistics": 0.26976744186046514, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.6419437340153452, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.6094420600858369, + "professional_law": 0.3476842791911285, + "management": 0.5490196078431373, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.6705882352941176, + "sociology": 0.51, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.38046272493573263, + "computer_security": 0.45454545454545453, + "moral_scenarios": 0.2516778523489933, + "moral_disputes": 0.4289855072463768, + "electrical_engineering": 0.4097222222222222, + "astronomy": 0.423841059602649, + "college_biology": 0.48951048951048953 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33580980683506684 + }, + "prompt_2": { + "accuracy": 0.34546805349182763 + }, + "prompt_3": { + "accuracy": 0.3588410104011887 + }, + "prompt_4": { + "accuracy": 0.3209509658246657 + }, + "prompt_5": { + "accuracy": 0.3298662704309064 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33374844333748444, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.3333333333333333, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.36666666666666664, + "business_administration": 0.39473684210526316, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.375, + "high_school_geography": 0.25, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.2222222222222222, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.52, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.34615384615384615, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.35185185185185186, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2962962962962963, + "physician": 0.35185185185185186 + } + }, + "prompt_2": { + "accuracy": 0.32627646326276466, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.25, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.3333333333333333, + "college_physics": 0.375, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.375, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.25, + "college_economics": 0.4166666666666667, + "business_administration": 0.3684210526315789, + "marxism": 0.5, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.42857142857142855, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.25, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.2962962962962963, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.48, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.38461538461538464, + "sports_science": 0.3333333333333333, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3148148148148148, + "physician": 0.3148148148148148 + } + }, + "prompt_3": { + "accuracy": 0.33623910336239105, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.30952380952380953, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.45, + "business_administration": 0.42105263157894735, + "marxism": 0.5, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.25925925925925924, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.34615384615384615, + "sports_science": 0.3333333333333333, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.25, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.3148148148148148, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2962962962962963, + "physician": 0.25925925925925924 + } + }, + "prompt_4": { + "accuracy": 0.3275217932752179, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.38095238095238093, + "college_physics": 0.375, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.08333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.4, + "business_administration": 0.3684210526315789, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.375, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.3333333333333333, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.4, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.3269230769230769, + "sports_science": 0.2916666666666667, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.25, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.3333333333333333, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.2222222222222222 + } + }, + "prompt_5": { + "accuracy": 0.32814445828144456, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.375, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.35714285714285715, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.08333333333333333, + "middle_school_mathematics": 0.08333333333333333, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.5, + "business_administration": 0.34210526315789475, + "marxism": 0.5, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.4489795918367347, + "high_school_politics": 0.375, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.3333333333333333, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.5294117647058824, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.375, + "high_school_history": 0.52, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.38461538461538464, + "sports_science": 0.375, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2222222222222222, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2777777777777778, + "physician": 0.3148148148148148 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27956989247311825 + }, + "prompt_2": { + "accuracy": 0.3118279569892473 + }, + "prompt_3": { + "accuracy": 0.26881720430107525 + }, + "prompt_4": { + "accuracy": 0.36200716845878134 + }, + "prompt_5": { + "accuracy": 0.3010752688172043 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3159212571231221, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.2682926829268293, + "arts": 0.325, + "astronomy": 0.37575757575757573, + "business_ethics": 0.31100478468899523, + "chinese_civil_service_exam": 0.31875, + "chinese_driving_rule": 0.4198473282442748, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.38080495356037153, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.31843575418994413, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.32710280373831774, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.17592592592592593, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.315018315018315, + "computer_science": 0.30392156862745096, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.3197278911564626, + "construction_project_management": 0.2589928057553957, + "economics": 0.3584905660377358, + "education": 0.3558282208588957, + "electrical_engineering": 0.3372093023255814, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.41596638655462187, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.2962962962962963, + "food_science": 0.32867132867132864, + "genetics": 0.24431818181818182, + "global_facts": 0.3422818791946309, + "high_school_biology": 0.23668639053254437, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.3728813559322034, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.3006993006993007, + "human_sexuality": 0.30158730158730157, + "international_law": 0.32972972972972975, + "journalism": 0.313953488372093, + "jurisprudence": 0.35523114355231145, + "legal_and_moral_basis": 0.5373831775700935, + "logical": 0.3089430894308943, + "machine_learning": 0.21311475409836064, + "management": 0.30952380952380953, + "marketing": 0.38333333333333336, + "marxist_theory": 0.37566137566137564, + "modern_chinese": 0.16379310344827586, + "nutrition": 0.2896551724137931, + "philosophy": 0.4, + "professional_accounting": 0.3028571428571429, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.3232758620689655, + "public_relations": 0.3448275862068966, + "security_study": 0.31851851851851853, + "sociology": 0.35398230088495575, + "sports_science": 0.3212121212121212, + "traditional_chinese_medicine": 0.23783783783783785, + "virology": 0.35502958579881655, + "world_history": 0.33540372670807456, + "world_religions": 0.3375 + } + }, + "prompt_2": { + "accuracy": 0.3194612329476774, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2804878048780488, + "arts": 0.33125, + "astronomy": 0.4121212121212121, + "business_ethics": 0.3492822966507177, + "chinese_civil_service_exam": 0.3375, + "chinese_driving_rule": 0.4122137404580153, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.32710280373831774, + "chinese_history": 0.3746130030959752, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.3407821229050279, + "clinical_knowledge": 0.2911392405063291, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.26851851851851855, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.36792452830188677, + "college_medicine": 0.326007326007326, + "computer_science": 0.3137254901960784, + "computer_security": 0.27485380116959063, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.3237410071942446, + "economics": 0.37735849056603776, + "education": 0.36809815950920244, + "electrical_engineering": 0.313953488372093, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.4327731092436975, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.3851851851851852, + "food_science": 0.34265734265734266, + "genetics": 0.24431818181818182, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.3135593220338983, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.2857142857142857, + "international_law": 0.2810810810810811, + "journalism": 0.3313953488372093, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.5654205607476636, + "logical": 0.2601626016260163, + "machine_learning": 0.30327868852459017, + "management": 0.2857142857142857, + "marketing": 0.4166666666666667, + "marxist_theory": 0.4074074074074074, + "modern_chinese": 0.1724137931034483, + "nutrition": 0.21379310344827587, + "philosophy": 0.3619047619047619, + "professional_accounting": 0.32, + "professional_law": 0.3080568720379147, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.3232758620689655, + "public_relations": 0.3275862068965517, + "security_study": 0.3111111111111111, + "sociology": 0.3230088495575221, + "sports_science": 0.3151515151515151, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.3609467455621302, + "world_history": 0.32919254658385094, + "world_religions": 0.30625 + } + }, + "prompt_3": { + "accuracy": 0.3279226385771024, + "category_acc": { + "agronomy": 0.2781065088757396, + "anatomy": 0.25, + "ancient_chinese": 0.2682926829268293, + "arts": 0.33125, + "astronomy": 0.3090909090909091, + "business_ethics": 0.33014354066985646, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.44274809160305345, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.3364485981308411, + "chinese_history": 0.42105263157894735, + "chinese_literature": 0.3284313725490196, + "chinese_teacher_qualification": 0.3743016759776536, + "clinical_knowledge": 0.3206751054852321, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.34579439252336447, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.24074074074074073, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.30036630036630035, + "computer_science": 0.29901960784313725, + "computer_security": 0.3157894736842105, + "conceptual_physics": 0.3877551020408163, + "construction_project_management": 0.2949640287769784, + "economics": 0.3270440251572327, + "education": 0.3619631901840491, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.29292929292929293, + "elementary_information_and_technology": 0.4411764705882353, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.3333333333333333, + "food_science": 0.3146853146853147, + "genetics": 0.24431818181818182, + "global_facts": 0.3825503355704698, + "high_school_biology": 0.21893491124260356, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.3356643356643357, + "human_sexuality": 0.2777777777777778, + "international_law": 0.31351351351351353, + "journalism": 0.32558139534883723, + "jurisprudence": 0.36739659367396593, + "legal_and_moral_basis": 0.6074766355140186, + "logical": 0.3008130081300813, + "machine_learning": 0.26229508196721313, + "management": 0.3476190476190476, + "marketing": 0.4388888888888889, + "marxist_theory": 0.43915343915343913, + "modern_chinese": 0.20689655172413793, + "nutrition": 0.3103448275862069, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.21808510638297873, + "professional_psychology": 0.3620689655172414, + "public_relations": 0.3735632183908046, + "security_study": 0.31851851851851853, + "sociology": 0.3407079646017699, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.33727810650887574, + "world_history": 0.32919254658385094, + "world_religions": 0.3625 + } + }, + "prompt_4": { + "accuracy": 0.307977896736315, + "category_acc": { + "agronomy": 0.21893491124260356, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.21951219512195122, + "arts": 0.375, + "astronomy": 0.3151515151515151, + "business_ethics": 0.3157894736842105, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.3893129770992366, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.37770897832817335, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.3463687150837989, + "clinical_knowledge": 0.27848101265822783, + "college_actuarial_science": 0.16037735849056603, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.2037037037037037, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.28205128205128205, + "computer_science": 0.29411764705882354, + "computer_security": 0.32748538011695905, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.30935251798561153, + "economics": 0.3333333333333333, + "education": 0.294478527607362, + "electrical_engineering": 0.313953488372093, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.39915966386554624, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.3037037037037037, + "food_science": 0.3006993006993007, + "genetics": 0.2784090909090909, + "global_facts": 0.35570469798657717, + "high_school_biology": 0.23076923076923078, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.30952380952380953, + "international_law": 0.2918918918918919, + "journalism": 0.32558139534883723, + "jurisprudence": 0.340632603406326, + "legal_and_moral_basis": 0.5373831775700935, + "logical": 0.2926829268292683, + "machine_learning": 0.2540983606557377, + "management": 0.28095238095238095, + "marketing": 0.3611111111111111, + "marxist_theory": 0.31216931216931215, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.2896551724137931, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.22857142857142856, + "professional_law": 0.3175355450236967, + "professional_medicine": 0.2632978723404255, + "professional_psychology": 0.35344827586206895, + "public_relations": 0.3390804597701149, + "security_study": 0.31851851851851853, + "sociology": 0.2831858407079646, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.35502958579881655, + "world_history": 0.32298136645962733, + "world_religions": 0.375 + } + }, + "prompt_5": { + "accuracy": 0.31911586945259884, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.20945945945945946, + "ancient_chinese": 0.2073170731707317, + "arts": 0.3375, + "astronomy": 0.3333333333333333, + "business_ethics": 0.3444976076555024, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.3816793893129771, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.38390092879256965, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.3687150837988827, + "clinical_knowledge": 0.2869198312236287, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.24074074074074073, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.3333333333333333, + "computer_science": 0.28921568627450983, + "computer_security": 0.3391812865497076, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3237410071942446, + "economics": 0.3270440251572327, + "education": 0.3558282208588957, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.25, + "elementary_commonsense": 0.36363636363636365, + "elementary_information_and_technology": 0.3865546218487395, + "elementary_mathematics": 0.22608695652173913, + "ethnology": 0.31851851851851853, + "food_science": 0.40559440559440557, + "genetics": 0.25, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.3, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2918918918918919, + "journalism": 0.3313953488372093, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.6214953271028038, + "logical": 0.2682926829268293, + "machine_learning": 0.2786885245901639, + "management": 0.3047619047619048, + "marketing": 0.40555555555555556, + "marxist_theory": 0.4074074074074074, + "modern_chinese": 0.20689655172413793, + "nutrition": 0.32413793103448274, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.33714285714285713, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.22340425531914893, + "professional_psychology": 0.3146551724137931, + "public_relations": 0.3505747126436782, + "security_study": 0.28888888888888886, + "sociology": 0.36283185840707965, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.34911242603550297, + "world_history": 0.34782608695652173, + "world_religions": 0.36875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30303030303030304 + }, + "prompt_2": { + "accuracy": 0.21212121212121213 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.18181818181818182 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33636363636363636 + }, + "prompt_2": { + "accuracy": 0.3522727272727273 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.3977272727272727 + }, + "prompt_5": { + "accuracy": 0.3568181818181818 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34067796610169493 + }, + "prompt_2": { + "accuracy": 0.3403389830508475 + }, + "prompt_3": { + "accuracy": 0.3376271186440678 + }, + "prompt_4": { + "accuracy": 0.3586440677966102 + }, + "prompt_5": { + "accuracy": 0.363728813559322 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6222887060583395 + }, + "prompt_2": { + "accuracy": 0.6099476439790575 + }, + "prompt_3": { + "accuracy": 0.6118175018698578 + }, + "prompt_4": { + "accuracy": 0.6155572176514585 + }, + "prompt_5": { + "accuracy": 0.6084517576664173 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7427731504164625 + }, + "prompt_2": { + "accuracy": 0.7486526212640863 + }, + "prompt_3": { + "accuracy": 0.7506124448799608 + }, + "prompt_4": { + "accuracy": 0.7305242528172464 + }, + "prompt_5": { + "accuracy": 0.7662910338069574 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2916103150813641, + "rouge2": 0.10525979858778076, + "rougeL": 0.21829240386382892, + "avg_rouge": 0.20505417251099126 + }, + "prompt_2": { + "rouge1": 0.3196708229238851, + "rouge2": 0.12001738828619245, + "rougeL": 0.23976427412056603, + "avg_rouge": 0.22648416177688122 + }, + "prompt_3": { + "rouge1": 0.3189803205740337, + "rouge2": 0.1178512379749291, + "rougeL": 0.24077847034893995, + "avg_rouge": 0.22587000963263426 + }, + "prompt_4": { + "rouge1": 0.3188029985347647, + "rouge2": 0.11961123119155025, + "rougeL": 0.2407341727607601, + "avg_rouge": 0.22638280082902504 + }, + "prompt_5": { + "rouge1": 0.3259632337286004, + "rouge2": 0.1156636436621222, + "rougeL": 0.24355755950429206, + "avg_rouge": 0.22839481229833822 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2144204431797328, + "rouge2": 0.05441713180207946, + "rougeL": 0.15257149078102575, + "avg_rouge": 0.14046968858761266 + }, + "prompt_2": { + "rouge1": 0.22126376283018281, + "rouge2": 0.05519386728126753, + "rougeL": 0.1573029997600269, + "avg_rouge": 0.14458687662382574 + }, + "prompt_3": { + "rouge1": 0.21960987693146633, + "rouge2": 0.054773669944745355, + "rougeL": 0.1554536415313014, + "avg_rouge": 0.14327906280250435 + }, + "prompt_4": { + "rouge1": 0.21323836243498684, + "rouge2": 0.05457124324104029, + "rougeL": 0.15066676607418653, + "avg_rouge": 0.1394921239167379 + }, + "prompt_5": { + "rouge1": 0.22277568686099017, + "rouge2": 0.05661362557889607, + "rougeL": 0.16029410846825173, + "avg_rouge": 0.14656114030271264 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6525229357798165 + }, + "prompt_2": { + "accuracy": 0.7041284403669725 + }, + "prompt_3": { + "accuracy": 0.6811926605504587 + }, + "prompt_4": { + "accuracy": 0.5745412844036697 + }, + "prompt_5": { + "accuracy": 0.768348623853211 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7545541706615532 + }, + "prompt_2": { + "accuracy": 0.7066155321188878 + }, + "prompt_3": { + "accuracy": 0.713326941514861 + }, + "prompt_4": { + "accuracy": 0.7545541706615532 + }, + "prompt_5": { + "accuracy": 0.7315436241610739 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5115 + }, + "prompt_2": { + "accuracy": 0.588 + }, + "prompt_3": { + "accuracy": 0.486 + }, + "prompt_4": { + "accuracy": 0.532 + }, + "prompt_5": { + "accuracy": 0.596 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4245 + }, + "prompt_2": { + "accuracy": 0.3835 + }, + "prompt_3": { + "accuracy": 0.4055 + }, + "prompt_4": { + "accuracy": 0.387 + }, + "prompt_5": { + "accuracy": 0.3715 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6035 + }, + "prompt_2": { + "accuracy": 0.552 + }, + "prompt_3": { + "accuracy": 0.574 + }, + "prompt_4": { + "accuracy": 0.5275 + }, + "prompt_5": { + "accuracy": 0.6 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4788732394366197 + }, + "prompt_2": { + "accuracy": 0.43661971830985913 + }, + "prompt_3": { + "accuracy": 0.4084507042253521 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.4647887323943662 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5306859205776173 + }, + "prompt_2": { + "accuracy": 0.5379061371841155 + }, + "prompt_3": { + "accuracy": 0.555956678700361 + }, + "prompt_4": { + "accuracy": 0.5306859205776173 + }, + "prompt_5": { + "accuracy": 0.51985559566787 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5049019607843137 + }, + "prompt_2": { + "accuracy": 0.4803921568627451 + }, + "prompt_3": { + "accuracy": 0.46078431372549017 + }, + "prompt_4": { + "accuracy": 0.6127450980392157 + }, + "prompt_5": { + "accuracy": 0.4877450980392157 + } } }, "five_shot": { @@ -4398,53 +38983,1733 @@ "model_link": "https://huggingface.co/meta-llama/Llama-2-13b-hf", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4504761904761905, + "language_acc": { + "Malay": 0.4066666666666667, + "English": 0.52, + "Vietnamese": 0.44, + "Spanish": 0.46, + "Indonesian": 0.5066666666666667, + "Filipino": 0.4266666666666667, + "Chinese": 0.3933333333333333 + }, + "consistency_score_2": 0.5488888888888889, + "consistency_score_3": 0.3754285714285714, + "consistency_score_4": 0.28209523809523823, + "consistency_score_5": 0.22412698412698415, + "consistency_score_6": 0.18571428571428572, + "consistency_score_7": 0.16, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.5933333333333334, + "Malay,Spanish": 0.47333333333333333, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.6066666666666667, + "Malay,Chinese": 0.49333333333333335, + "English,Vietnamese": 0.56, + "English,Spanish": 0.5733333333333334, + "English,Indonesian": 0.5666666666666667, + "English,Filipino": 0.5333333333333333, + "English,Chinese": 0.47333333333333333, + "Vietnamese,Spanish": 0.5533333333333333, + "Vietnamese,Indonesian": 0.6066666666666667, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.4866666666666667, + "Spanish,Indonesian": 0.5733333333333334, + "Spanish,Filipino": 0.4866666666666667, + "Spanish,Chinese": 0.5533333333333333, + "Indonesian,Filipino": 0.62, + "Indonesian,Chinese": 0.5266666666666666, + "Filipino,Chinese": 0.5266666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.37333333333333335, + "Malay,English,Spanish": 0.32666666666666666, + "Malay,English,Indonesian": 0.41333333333333333, + "Malay,English,Filipino": 0.36, + "Malay,English,Chinese": 0.3, + "Malay,Vietnamese,Spanish": 0.36, + "Malay,Vietnamese,Indonesian": 0.4866666666666667, + "Malay,Vietnamese,Filipino": 0.41333333333333333, + "Malay,Vietnamese,Chinese": 0.3333333333333333, + "Malay,Spanish,Indonesian": 0.4, + "Malay,Spanish,Filipino": 0.3466666666666667, + "Malay,Spanish,Chinese": 0.32, + "Malay,Indonesian,Filipino": 0.5, + "Malay,Indonesian,Chinese": 0.38666666666666666, + "Malay,Filipino,Chinese": 0.36666666666666664, + "English,Vietnamese,Spanish": 0.38, + "English,Vietnamese,Indonesian": 0.4266666666666667, + "English,Vietnamese,Filipino": 0.36666666666666664, + "English,Vietnamese,Chinese": 0.3333333333333333, + "English,Spanish,Indonesian": 0.4066666666666667, + "English,Spanish,Filipino": 0.35333333333333333, + "English,Spanish,Chinese": 0.36, + "English,Indonesian,Filipino": 0.4066666666666667, + "English,Indonesian,Chinese": 0.34, + "English,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "Vietnamese,Spanish,Filipino": 0.34, + "Vietnamese,Spanish,Chinese": 0.36, + "Vietnamese,Indonesian,Filipino": 0.43333333333333335, + "Vietnamese,Indonesian,Chinese": 0.3466666666666667, + "Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino": 0.3933333333333333, + "Spanish,Indonesian,Chinese": 0.3933333333333333, + "Spanish,Filipino,Chinese": 0.35333333333333333, + "Indonesian,Filipino,Chinese": 0.38 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.26, + "Malay,English,Vietnamese,Indonesian": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino": 0.2733333333333333, + "Malay,English,Vietnamese,Chinese": 0.22, + "Malay,English,Spanish,Indonesian": 0.2866666666666667, + "Malay,English,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Chinese": 0.23333333333333334, + "Malay,English,Indonesian,Filipino": 0.32666666666666666, + "Malay,English,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian": 0.31333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Indonesian,Filipino": 0.36, + "Malay,Vietnamese,Indonesian,Chinese": 0.28, + "Malay,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.32, + "Malay,Spanish,Indonesian,Chinese": 0.2866666666666667, + "Malay,Spanish,Filipino,Chinese": 0.26, + "Malay,Indonesian,Filipino,Chinese": 0.30666666666666664, + "English,Vietnamese,Spanish,Indonesian": 0.32, + "English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.26666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.32, + "English,Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian,Filipino": 0.31333333333333335, + "English,Spanish,Indonesian,Chinese": 0.28, + "English,Spanish,Filipino,Chinese": 0.26666666666666666, + "English,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.3, + "Vietnamese,Spanish,Indonesian,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.2, + "Malay,English,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.22, + "English,Spanish,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16 + } + }, + "AC3_2": 0.4948369334532379, + "AC3_3": 0.40954269894883133, + "AC3_4": 0.3469346011462132, + "AC3_5": 0.2993281792273377, + "AC3_6": 0.26300256625463925, + "AC3_7": 0.23613104520312983 + }, + "prompt_2": { + "overall_acc": 0.4095238095238095, + "language_acc": { + "Malay": 0.35333333333333333, + "English": 0.5, + "Vietnamese": 0.35333333333333333, + "Spanish": 0.44666666666666666, + "Indonesian": 0.4266666666666667, + "Filipino": 0.36, + "Chinese": 0.4266666666666667 + }, + "consistency_score_2": 0.5092063492063492, + "consistency_score_3": 0.3348571428571428, + "consistency_score_4": 0.24647619047619054, + "consistency_score_5": 0.1933333333333333, + "consistency_score_6": 0.1580952380952381, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.47333333333333333, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.49333333333333335, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Filipino": 0.5, + "Malay,Chinese": 0.47333333333333333, + "English,Vietnamese": 0.4866666666666667, + "English,Spanish": 0.58, + "English,Indonesian": 0.5266666666666666, + "English,Filipino": 0.4866666666666667, + "English,Chinese": 0.5466666666666666, + "Vietnamese,Spanish": 0.4866666666666667, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Chinese": 0.4266666666666667, + "Spanish,Indonesian": 0.5266666666666666, + "Spanish,Filipino": 0.5133333333333333, + "Spanish,Chinese": 0.5533333333333333, + "Indonesian,Filipino": 0.5266666666666666, + "Indonesian,Chinese": 0.5333333333333333, + "Filipino,Chinese": 0.4866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.3, + "Malay,English,Spanish": 0.35333333333333333, + "Malay,English,Indonesian": 0.38, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.32, + "Malay,Vietnamese,Spanish": 0.30666666666666664, + "Malay,Vietnamese,Indonesian": 0.38666666666666666, + "Malay,Vietnamese,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Chinese": 0.2733333333333333, + "Malay,Spanish,Indonesian": 0.4, + "Malay,Spanish,Filipino": 0.31333333333333335, + "Malay,Spanish,Chinese": 0.32, + "Malay,Indonesian,Filipino": 0.41333333333333333, + "Malay,Indonesian,Chinese": 0.38666666666666666, + "Malay,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish": 0.3333333333333333, + "English,Vietnamese,Indonesian": 0.3466666666666667, + "English,Vietnamese,Filipino": 0.26666666666666666, + "English,Vietnamese,Chinese": 0.31333333333333335, + "English,Spanish,Indonesian": 0.37333333333333335, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.38666666666666666, + "English,Indonesian,Filipino": 0.34, + "English,Indonesian,Chinese": 0.36, + "English,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian": 0.34, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.3, + "Vietnamese,Indonesian,Filipino": 0.34, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino": 0.3466666666666667, + "Spanish,Indonesian,Chinese": 0.35333333333333333, + "Spanish,Filipino,Chinese": 0.34, + "Indonesian,Filipino,Chinese": 0.3466666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.26, + "Malay,English,Vietnamese,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Indonesian": 0.3, + "Malay,English,Spanish,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Chinese": 0.25333333333333335, + "Malay,English,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian": 0.28, + "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.2, + "Malay,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.28, + "English,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "English,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.24666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.21333333333333335, + "English,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Spanish,Filipino,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Vietnamese,Spanish,Indonesian,Chinese": 0.24, + "Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.18, + "Malay,English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Spanish,Indonesian,Chinese": 0.22, + "Malay,English,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Indonesian,Filipino,Chinese": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.2, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + } + }, + "AC3_2": 0.45395728431561316, + "AC3_3": 0.36844567914336745, + "AC3_4": 0.3077374038577402, + "AC3_5": 0.2626645602513221, + "AC3_6": 0.22812400123817408, + "AC3_7": 0.20116959060621728 + }, + "prompt_3": { + "overall_acc": 0.3952380952380952, + "language_acc": { + "Malay": 0.38, + "English": 0.44, + "Vietnamese": 0.3466666666666667, + "Spanish": 0.44, + "Indonesian": 0.37333333333333335, + "Filipino": 0.35333333333333333, + "Chinese": 0.43333333333333335 + }, + "consistency_score_2": 0.46190476190476193, + "consistency_score_3": 0.27485714285714286, + "consistency_score_4": 0.1895238095238096, + "consistency_score_5": 0.14412698412698416, + "consistency_score_6": 0.11714285714285715, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4533333333333333, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.43333333333333335, + "Malay,Indonesian": 0.56, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.4266666666666667, + "English,Vietnamese": 0.43333333333333335, + "English,Spanish": 0.5066666666666667, + "English,Indonesian": 0.41333333333333333, + "English,Filipino": 0.4866666666666667, + "English,Chinese": 0.42, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,Filipino": 0.4066666666666667, + "Vietnamese,Chinese": 0.4266666666666667, + "Spanish,Indonesian": 0.4666666666666667, + "Spanish,Filipino": 0.52, + "Spanish,Chinese": 0.4666666666666667, + "Indonesian,Filipino": 0.48, + "Indonesian,Chinese": 0.4666666666666667, + "Filipino,Chinese": 0.49333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26, + "Malay,English,Spanish": 0.26666666666666666, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.26666666666666666, + "Malay,English,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish": 0.26666666666666666, + "Malay,Vietnamese,Indonesian": 0.32, + "Malay,Vietnamese,Filipino": 0.24, + "Malay,Vietnamese,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.26, + "Malay,Spanish,Chinese": 0.26, + "Malay,Indonesian,Filipino": 0.32, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.24, + "English,Spanish,Indonesian": 0.2733333333333333, + "English,Spanish,Filipino": 0.3333333333333333, + "English,Spanish,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.23333333333333334, + "English,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian": 0.28, + "Vietnamese,Spanish,Filipino": 0.26, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.26, + "Spanish,Filipino,Chinese": 0.31333333333333335, + "Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.2, + "Malay,English,Vietnamese,Indonesian": 0.18, + "Malay,English,Vietnamese,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Chinese": 0.18, + "Malay,English,Spanish,Indonesian": 0.21333333333333335, + "Malay,English,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Chinese": 0.18, + "Malay,English,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.18, + "Malay,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Spanish,Filipino,Chinese": 0.21333333333333335, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.42597883592913843, + "AC3_3": 0.32423454880242175, + "AC3_4": 0.256196680582836, + "AC3_5": 0.21122789311927825, + "AC3_6": 0.1807222516906981, + "AC3_7": 0.15961538458315458 + }, + "prompt_4": { + "overall_acc": 0.4428571428571428, + "language_acc": { + "Malay": 0.38, + "English": 0.5466666666666666, + "Vietnamese": 0.4066666666666667, + "Spanish": 0.4866666666666667, + "Indonesian": 0.47333333333333333, + "Filipino": 0.42, + "Chinese": 0.38666666666666666 + }, + "consistency_score_2": 0.5736507936507935, + "consistency_score_3": 0.4060952380952381, + "consistency_score_4": 0.31352380952380954, + "consistency_score_5": 0.2546031746031746, + "consistency_score_6": 0.21523809523809523, + "consistency_score_7": 0.18666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5133333333333333, + "Malay,Vietnamese": 0.7066666666666667, + "Malay,Spanish": 0.5933333333333334, + "Malay,Indonesian": 0.7933333333333333, + "Malay,Filipino": 0.5866666666666667, + "Malay,Chinese": 0.5733333333333334, + "English,Vietnamese": 0.54, + "English,Spanish": 0.6666666666666666, + "English,Indonesian": 0.58, + "English,Filipino": 0.44666666666666666, + "English,Chinese": 0.46, + "Vietnamese,Spanish": 0.5666666666666667, + "Vietnamese,Indonesian": 0.68, + "Vietnamese,Filipino": 0.5666666666666667, + "Vietnamese,Chinese": 0.4866666666666667, + "Spanish,Indonesian": 0.6333333333333333, + "Spanish,Filipino": 0.5, + "Spanish,Chinese": 0.5333333333333333, + "Indonesian,Filipino": 0.58, + "Indonesian,Chinese": 0.5533333333333333, + "Filipino,Chinese": 0.4866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.41333333333333333, + "Malay,English,Spanish": 0.4066666666666667, + "Malay,English,Indonesian": 0.46, + "Malay,English,Filipino": 0.3333333333333333, + "Malay,English,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish": 0.4533333333333333, + "Malay,Vietnamese,Indonesian": 0.6, + "Malay,Vietnamese,Filipino": 0.46, + "Malay,Vietnamese,Chinese": 0.4, + "Malay,Spanish,Indonesian": 0.54, + "Malay,Spanish,Filipino": 0.3933333333333333, + "Malay,Spanish,Chinese": 0.4066666666666667, + "Malay,Indonesian,Filipino": 0.5133333333333333, + "Malay,Indonesian,Chinese": 0.48, + "Malay,Filipino,Chinese": 0.38, + "English,Vietnamese,Spanish": 0.43333333333333335, + "English,Vietnamese,Indonesian": 0.44, + "English,Vietnamese,Filipino": 0.32666666666666666, + "English,Vietnamese,Chinese": 0.32, + "English,Spanish,Indonesian": 0.4666666666666667, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.37333333333333335, + "English,Indonesian,Filipino": 0.36, + "English,Indonesian,Chinese": 0.36666666666666664, + "English,Filipino,Chinese": 0.28, + "Vietnamese,Spanish,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish,Filipino": 0.37333333333333335, + "Vietnamese,Spanish,Chinese": 0.34, + "Vietnamese,Indonesian,Filipino": 0.4666666666666667, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino": 0.41333333333333333, + "Spanish,Indonesian,Chinese": 0.41333333333333333, + "Spanish,Filipino,Chinese": 0.32666666666666666, + "Indonesian,Filipino,Chinese": 0.37333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.3333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.38, + "Malay,English,Vietnamese,Filipino": 0.28, + "Malay,English,Vietnamese,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Indonesian": 0.38666666666666666, + "Malay,English,Spanish,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Chinese": 0.29333333333333333, + "Malay,English,Indonesian,Filipino": 0.31333333333333335, + "Malay,English,Indonesian,Chinese": 0.32, + "Malay,English,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.4266666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.4266666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.3, + "Malay,Spanish,Indonesian,Filipino": 0.38, + "Malay,Spanish,Indonesian,Chinese": 0.37333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.28, + "Malay,Indonesian,Filipino,Chinese": 0.34, + "English,Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.2866666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.22, + "English,Spanish,Indonesian,Filipino": 0.32, + "English,Spanish,Indonesian,Chinese": 0.31333333333333335, + "English,Spanish,Filipino,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.3466666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.3, + "Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3, + "Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Spanish,Chinese": 0.24, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.26, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2, + "Malay,English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.28, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.28, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + } + }, + "AC3_2": 0.4998393860488022, + "AC3_3": 0.42368024611182964, + "AC3_4": 0.36713314381590767, + "AC3_5": 0.3233240132184843, + "AC3_6": 0.2896836881924917, + "AC3_7": 0.2626323751473882 + }, + "prompt_5": { + "overall_acc": 0.4523809523809524, + "language_acc": { + "Malay": 0.4, + "English": 0.5866666666666667, + "Vietnamese": 0.4066666666666667, + "Spanish": 0.48, + "Indonesian": 0.47333333333333333, + "Filipino": 0.41333333333333333, + "Chinese": 0.4066666666666667 + }, + "consistency_score_2": 0.5599999999999999, + "consistency_score_3": 0.3841904761904763, + "consistency_score_4": 0.29085714285714287, + "consistency_score_5": 0.23333333333333325, + "consistency_score_6": 0.19428571428571426, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5066666666666667, + "Malay,Vietnamese": 0.6266666666666667, + "Malay,Spanish": 0.54, + "Malay,Indonesian": 0.6733333333333333, + "Malay,Filipino": 0.5466666666666666, + "Malay,Chinese": 0.5333333333333333, + "English,Vietnamese": 0.5533333333333333, + "English,Spanish": 0.7066666666666667, + "English,Indonesian": 0.52, + "English,Filipino": 0.49333333333333335, + "English,Chinese": 0.49333333333333335, + "Vietnamese,Spanish": 0.5666666666666667, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Filipino": 0.5733333333333334, + "Vietnamese,Chinese": 0.5133333333333333, + "Spanish,Indonesian": 0.5866666666666667, + "Spanish,Filipino": 0.47333333333333333, + "Spanish,Chinese": 0.6066666666666667, + "Indonesian,Filipino": 0.56, + "Indonesian,Chinese": 0.5666666666666667, + "Filipino,Chinese": 0.49333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.38666666666666666, + "Malay,English,Spanish": 0.4066666666666667, + "Malay,English,Indonesian": 0.3933333333333333, + "Malay,English,Filipino": 0.32, + "Malay,English,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Spanish": 0.3933333333333333, + "Malay,Vietnamese,Indonesian": 0.5, + "Malay,Vietnamese,Filipino": 0.4, + "Malay,Vietnamese,Chinese": 0.38, + "Malay,Spanish,Indonesian": 0.43333333333333335, + "Malay,Spanish,Filipino": 0.32666666666666666, + "Malay,Spanish,Chinese": 0.3933333333333333, + "Malay,Indonesian,Filipino": 0.44, + "Malay,Indonesian,Chinese": 0.43333333333333335, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.4533333333333333, + "English,Vietnamese,Indonesian": 0.4, + "English,Vietnamese,Filipino": 0.3466666666666667, + "English,Vietnamese,Chinese": 0.35333333333333333, + "English,Spanish,Indonesian": 0.44, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.43333333333333335, + "English,Indonesian,Filipino": 0.3333333333333333, + "English,Indonesian,Chinese": 0.36666666666666664, + "English,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian": 0.41333333333333333, + "Vietnamese,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Spanish,Chinese": 0.38, + "Vietnamese,Indonesian,Filipino": 0.42, + "Vietnamese,Indonesian,Chinese": 0.3933333333333333, + "Vietnamese,Filipino,Chinese": 0.32666666666666666, + "Spanish,Indonesian,Filipino": 0.35333333333333333, + "Spanish,Indonesian,Chinese": 0.4266666666666667, + "Spanish,Filipino,Chinese": 0.3333333333333333, + "Indonesian,Filipino,Chinese": 0.36666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.32666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Indonesian": 0.32666666666666666, + "Malay,English,Spanish,Filipino": 0.24666666666666667, + "Malay,English,Spanish,Chinese": 0.3, + "Malay,English,Indonesian,Filipino": 0.28, + "Malay,English,Indonesian,Chinese": 0.30666666666666664, + "Malay,English,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.3333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,Spanish,Filipino,Chinese": 0.26, + "Malay,Indonesian,Filipino,Chinese": 0.31333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.32, + "English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.3, + "English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian,Filipino": 0.28, + "English,Spanish,Indonesian,Chinese": 0.32666666666666666, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.26, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.28 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Chinese": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Malay,English,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Filipino,Chinese": 0.2, + "Malay,English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.22, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 + } + }, + "AC3_2": 0.5004703668367362, + "AC3_3": 0.41550654865360226, + "AC3_4": 0.35406751111879625, + "AC3_5": 0.30787037032547254, + "AC3_6": 0.2718283189144132, + "AC3_7": 0.24358974355039445 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3782467532467533, + "language_acc": { + "English": 0.4034090909090909, + "Vietnamese": 0.42613636363636365, + "Chinese": 0.3522727272727273, + "Indonesian": 0.39204545454545453, + "Filipino": 0.3409090909090909, + "Spanish": 0.35795454545454547, + "Malay": 0.375 + }, + "consistency_score_2": 0.5432900432900434, + "consistency_score_3": 0.36136363636363644, + "consistency_score_4": 0.2668831168831169, + "consistency_score_5": 0.20941558441558436, + "consistency_score_6": 0.17045454545454544, + "consistency_score_7": 0.14204545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5056818181818182, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.5511363636363636, + "English,Filipino": 0.48295454545454547, + "English,Spanish": 0.6534090909090909, + "English,Malay": 0.48295454545454547, + "Vietnamese,Chinese": 0.48863636363636365, + "Vietnamese,Indonesian": 0.5795454545454546, + "Vietnamese,Filipino": 0.4772727272727273, + "Vietnamese,Spanish": 0.5681818181818182, + "Vietnamese,Malay": 0.5681818181818182, + "Chinese,Indonesian": 0.5738636363636364, + "Chinese,Filipino": 0.4715909090909091, + "Chinese,Spanish": 0.5511363636363636, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.6363636363636364, + "Indonesian,Spanish": 0.5909090909090909, + "Indonesian,Malay": 0.6704545454545454, + "Filipino,Spanish": 0.4659090909090909, + "Filipino,Malay": 0.6193181818181818, + "Spanish,Malay": 0.48863636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3068181818181818, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Filipino": 0.30113636363636365, + "English,Vietnamese,Spanish": 0.4034090909090909, + "English,Vietnamese,Malay": 0.32954545454545453, + "English,Chinese,Indonesian": 0.375, + "English,Chinese,Filipino": 0.2840909090909091, + "English,Chinese,Spanish": 0.4034090909090909, + "English,Chinese,Malay": 0.2784090909090909, + "English,Indonesian,Filipino": 0.3806818181818182, + "English,Indonesian,Spanish": 0.4431818181818182, + "English,Indonesian,Malay": 0.3977272727272727, + "English,Filipino,Spanish": 0.3522727272727273, + "English,Filipino,Malay": 0.3352272727272727, + "English,Spanish,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Indonesian": 0.35795454545454547, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.3409090909090909, + "Vietnamese,Chinese,Malay": 0.30113636363636365, + "Vietnamese,Indonesian,Filipino": 0.3806818181818182, + "Vietnamese,Indonesian,Spanish": 0.42045454545454547, + "Vietnamese,Indonesian,Malay": 0.4375, + "Vietnamese,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Filipino,Malay": 0.3693181818181818, + "Vietnamese,Spanish,Malay": 0.36363636363636365, + "Chinese,Indonesian,Filipino": 0.3806818181818182, + "Chinese,Indonesian,Spanish": 0.4034090909090909, + "Chinese,Indonesian,Malay": 0.3977272727272727, + "Chinese,Filipino,Spanish": 0.30113636363636365, + "Chinese,Filipino,Malay": 0.3352272727272727, + "Chinese,Spanish,Malay": 0.3125, + "Indonesian,Filipino,Spanish": 0.38636363636363635, + "Indonesian,Filipino,Malay": 0.48863636363636365, + "Indonesian,Spanish,Malay": 0.4147727272727273, + "Filipino,Spanish,Malay": 0.32954545454545453 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "English,Vietnamese,Chinese,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.2727272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "English,Vietnamese,Filipino,Spanish": 0.25, + "English,Vietnamese,Filipino,Malay": 0.23863636363636365, + "English,Vietnamese,Spanish,Malay": 0.2727272727272727, + "English,Chinese,Indonesian,Filipino": 0.26136363636363635, + "English,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Chinese,Filipino,Spanish": 0.24431818181818182, + "English,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Spanish,Malay": 0.24431818181818182, + "English,Indonesian,Filipino,Spanish": 0.3125, + "English,Indonesian,Filipino,Malay": 0.29545454545454547, + "English,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Spanish,Malay": 0.3352272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.2556818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.2784090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.30113636363636365, + "Chinese,Indonesian,Spanish,Malay": 0.2897727272727273, + "Chinese,Filipino,Spanish,Malay": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.23295454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + } + }, + "AC3_2": 0.44598912533004215, + "AC3_3": 0.3696124989523407, + "AC3_4": 0.31295302578826006, + "AC3_5": 0.2695791776824758, + "AC3_6": 0.2350053791933191, + "AC3_7": 0.20653098847257256 + }, + "prompt_2": { + "overall_acc": 0.36038961038961037, + "language_acc": { + "English": 0.4034090909090909, + "Vietnamese": 0.39204545454545453, + "Chinese": 0.32954545454545453, + "Indonesian": 0.36363636363636365, + "Filipino": 0.32386363636363635, + "Spanish": 0.35795454545454547, + "Malay": 0.3522727272727273 + }, + "consistency_score_2": 0.5227272727272727, + "consistency_score_3": 0.3387987012987012, + "consistency_score_4": 0.24496753246753247, + "consistency_score_5": 0.18777056277056278, + "consistency_score_6": 0.14853896103896105, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5227272727272727, + "English,Chinese": 0.5681818181818182, + "English,Indonesian": 0.5795454545454546, + "English,Filipino": 0.4943181818181818, + "English,Spanish": 0.6193181818181818, + "English,Malay": 0.5284090909090909, + "Vietnamese,Chinese": 0.4375, + "Vietnamese,Indonesian": 0.5170454545454546, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.5511363636363636, + "Vietnamese,Malay": 0.5397727272727273, + "Chinese,Indonesian": 0.5227272727272727, + "Chinese,Filipino": 0.5, + "Chinese,Spanish": 0.5113636363636364, + "Chinese,Malay": 0.44886363636363635, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.5625, + "Indonesian,Malay": 0.625, + "Filipino,Spanish": 0.4943181818181818, + "Filipino,Malay": 0.5397727272727273, + "Spanish,Malay": 0.4772727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.30113636363636365, + "English,Vietnamese,Indonesian": 0.36363636363636365, + "English,Vietnamese,Filipino": 0.3068181818181818, + "English,Vietnamese,Spanish": 0.39204545454545453, + "English,Vietnamese,Malay": 0.3465909090909091, + "English,Chinese,Indonesian": 0.38636363636363635, + "English,Chinese,Filipino": 0.32954545454545453, + "English,Chinese,Spanish": 0.3977272727272727, + "English,Chinese,Malay": 0.32954545454545453, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.4318181818181818, + "English,Indonesian,Malay": 0.4034090909090909, + "English,Filipino,Spanish": 0.3522727272727273, + "English,Filipino,Malay": 0.32954545454545453, + "English,Spanish,Malay": 0.36363636363636365, + "Vietnamese,Chinese,Indonesian": 0.29545454545454547, + "Vietnamese,Chinese,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Malay": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Filipino,Spanish": 0.3068181818181818, + "Vietnamese,Filipino,Malay": 0.3125, + "Vietnamese,Spanish,Malay": 0.3465909090909091, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,Spanish": 0.35795454545454547, + "Chinese,Indonesian,Malay": 0.3465909090909091, + "Chinese,Filipino,Spanish": 0.3125, + "Chinese,Filipino,Malay": 0.3181818181818182, + "Chinese,Spanish,Malay": 0.2840909090909091, + "Indonesian,Filipino,Spanish": 0.3409090909090909, + "Indonesian,Filipino,Malay": 0.3693181818181818, + "Indonesian,Spanish,Malay": 0.3693181818181818, + "Filipino,Spanish,Malay": 0.30113636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "English,Vietnamese,Indonesian,Spanish": 0.29545454545454547, + "English,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish": 0.24431818181818182, + "English,Vietnamese,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Spanish,Malay": 0.26136363636363635, + "English,Chinese,Indonesian,Filipino": 0.24431818181818182, + "English,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.2784090909090909, + "English,Chinese,Filipino,Spanish": 0.25, + "English,Chinese,Filipino,Malay": 0.23863636363636365, + "English,Chinese,Spanish,Malay": 0.25, + "English,Indonesian,Filipino,Spanish": 0.2727272727272727, + "English,Indonesian,Filipino,Malay": 0.26136363636363635, + "English,Indonesian,Spanish,Malay": 0.3068181818181818, + "English,Filipino,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.25, + "Chinese,Filipino,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.26136363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.2215909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.1875, + "English,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + } + }, + "AC3_2": 0.4266377004864489, + "AC3_3": 0.3492607925588783, + "AC3_4": 0.2916749380467075, + "AC3_5": 0.24690068071546134, + "AC3_6": 0.21037096870283845, + "AC3_7": 0.1792801107148192 + }, + "prompt_3": { + "overall_acc": 0.36525974025974023, + "language_acc": { + "English": 0.4034090909090909, + "Vietnamese": 0.4034090909090909, + "Chinese": 0.3465909090909091, + "Indonesian": 0.3409090909090909, + "Filipino": 0.32386363636363635, + "Spanish": 0.39204545454545453, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.5224567099567099, + "consistency_score_3": 0.3387987012987013, + "consistency_score_4": 0.24788961038961044, + "consistency_score_5": 0.19507575757575757, + "consistency_score_6": 0.16071428571428573, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5170454545454546, + "English,Chinese": 0.5340909090909091, + "English,Indonesian": 0.4943181818181818, + "English,Filipino": 0.5113636363636364, + "English,Spanish": 0.625, + "English,Malay": 0.5227272727272727, + "Vietnamese,Chinese": 0.4375, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.5568181818181818, + "Vietnamese,Malay": 0.5227272727272727, + "Chinese,Indonesian": 0.5227272727272727, + "Chinese,Filipino": 0.5340909090909091, + "Chinese,Spanish": 0.5568181818181818, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.5113636363636364, + "Indonesian,Spanish": 0.5511363636363636, + "Indonesian,Malay": 0.6193181818181818, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.5 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3125, + "English,Vietnamese,Indonesian": 0.3181818181818182, + "English,Vietnamese,Filipino": 0.2897727272727273, + "English,Vietnamese,Spanish": 0.39204545454545453, + "English,Vietnamese,Malay": 0.3352272727272727, + "English,Chinese,Indonesian": 0.32954545454545453, + "English,Chinese,Filipino": 0.3465909090909091, + "English,Chinese,Spanish": 0.4090909090909091, + "English,Chinese,Malay": 0.3125, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.3977272727272727, + "English,Indonesian,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.3522727272727273, + "English,Filipino,Malay": 0.3465909090909091, + "English,Spanish,Malay": 0.36363636363636365, + "Vietnamese,Chinese,Indonesian": 0.2840909090909091, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.32954545454545453, + "Vietnamese,Chinese,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "Vietnamese,Indonesian,Malay": 0.3693181818181818, + "Vietnamese,Filipino,Spanish": 0.3068181818181818, + "Vietnamese,Filipino,Malay": 0.3181818181818182, + "Vietnamese,Spanish,Malay": 0.3409090909090909, + "Chinese,Indonesian,Filipino": 0.3465909090909091, + "Chinese,Indonesian,Spanish": 0.3693181818181818, + "Chinese,Indonesian,Malay": 0.3693181818181818, + "Chinese,Filipino,Spanish": 0.3409090909090909, + "Chinese,Filipino,Malay": 0.32954545454545453, + "Chinese,Spanish,Malay": 0.3125, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.38636363636363635, + "Indonesian,Spanish,Malay": 0.39204545454545453, + "Filipino,Spanish,Malay": 0.3125 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino": 0.20454545454545456, + "English,Vietnamese,Chinese,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Indonesian,Malay": 0.25, + "English,Vietnamese,Filipino,Spanish": 0.23295454545454544, + "English,Vietnamese,Filipino,Malay": 0.23863636363636365, + "English,Vietnamese,Spanish,Malay": 0.26704545454545453, + "English,Chinese,Indonesian,Filipino": 0.2556818181818182, + "English,Chinese,Indonesian,Spanish": 0.2840909090909091, + "English,Chinese,Indonesian,Malay": 0.25, + "English,Chinese,Filipino,Spanish": 0.2727272727272727, + "English,Chinese,Filipino,Malay": 0.23863636363636365, + "English,Chinese,Spanish,Malay": 0.23863636363636365, + "English,Indonesian,Filipino,Spanish": 0.26704545454545453, + "English,Indonesian,Filipino,Malay": 0.26704545454545453, + "English,Indonesian,Spanish,Malay": 0.2897727272727273, + "English,Filipino,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Chinese,Filipino,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.26704545454545453 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.1875, + "English,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635 + } + }, + "AC3_2": 0.4299399929058857, + "AC3_3": 0.3515319704006484, + "AC3_4": 0.2953410766670153, + "AC3_5": 0.2543237785420316, + "AC3_6": 0.22321428567184742, + "AC3_7": 0.19858781990745417 + }, + "prompt_4": { + "overall_acc": 0.35714285714285715, + "language_acc": { + "English": 0.375, + "Vietnamese": 0.4034090909090909, + "Chinese": 0.32954545454545453, + "Indonesian": 0.38636363636363635, + "Filipino": 0.30113636363636365, + "Spanish": 0.375, + "Malay": 0.32954545454545453 + }, + "consistency_score_2": 0.5100108225108226, + "consistency_score_3": 0.3211038961038961, + "consistency_score_4": 0.2271103896103896, + "consistency_score_5": 0.17559523809523814, + "consistency_score_6": 0.1452922077922078, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5056818181818182, + "English,Chinese": 0.48295454545454547, + "English,Indonesian": 0.5511363636363636, + "English,Filipino": 0.4318181818181818, + "English,Spanish": 0.6022727272727273, + "English,Malay": 0.5568181818181818, + "Vietnamese,Chinese": 0.4772727272727273, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.48295454545454547, + "Vietnamese,Malay": 0.5511363636363636, + "Chinese,Indonesian": 0.5340909090909091, + "Chinese,Filipino": 0.44886363636363635, + "Chinese,Spanish": 0.4772727272727273, + "Chinese,Malay": 0.5, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.5227272727272727, + "Indonesian,Malay": 0.6647727272727273, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Malay": 0.5, + "Spanish,Malay": 0.4659090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2840909090909091, + "English,Vietnamese,Indonesian": 0.3522727272727273, + "English,Vietnamese,Filipino": 0.2556818181818182, + "English,Vietnamese,Spanish": 0.3522727272727273, + "English,Vietnamese,Malay": 0.3522727272727273, + "English,Chinese,Indonesian": 0.3352272727272727, + "English,Chinese,Filipino": 0.26704545454545453, + "English,Chinese,Spanish": 0.32954545454545453, + "English,Chinese,Malay": 0.3125, + "English,Indonesian,Filipino": 0.30113636363636365, + "English,Indonesian,Spanish": 0.3806818181818182, + "English,Indonesian,Malay": 0.4318181818181818, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Filipino,Malay": 0.29545454545454547, + "English,Spanish,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "Vietnamese,Chinese,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Malay": 0.3125, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Indonesian,Malay": 0.4147727272727273, + "Vietnamese,Filipino,Spanish": 0.2784090909090909, + "Vietnamese,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Spanish,Malay": 0.30113636363636365, + "Chinese,Indonesian,Filipino": 0.3125, + "Chinese,Indonesian,Spanish": 0.3125, + "Chinese,Indonesian,Malay": 0.3977272727272727, + "Chinese,Filipino,Spanish": 0.2727272727272727, + "Chinese,Filipino,Malay": 0.3068181818181818, + "Chinese,Spanish,Malay": 0.2784090909090909, + "Indonesian,Filipino,Spanish": 0.3068181818181818, + "Indonesian,Filipino,Malay": 0.36363636363636365, + "Indonesian,Spanish,Malay": 0.36363636363636365, + "Filipino,Spanish,Malay": 0.29545454545454547 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.23295454545454544, + "English,Vietnamese,Chinese,Filipino": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "English,Vietnamese,Indonesian,Spanish": 0.2727272727272727, + "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "English,Vietnamese,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Spanish,Malay": 0.24431818181818182, + "English,Chinese,Indonesian,Filipino": 0.21022727272727273, + "English,Chinese,Indonesian,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Malay": 0.2840909090909091, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Spanish,Malay": 0.2215909090909091, + "English,Indonesian,Filipino,Spanish": 0.23295454545454544, + "English,Indonesian,Filipino,Malay": 0.24431818181818182, + "English,Indonesian,Spanish,Malay": 0.29545454545454547, + "English,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.25, + "Vietnamese,Indonesian,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.23863636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + } + }, + "AC3_2": 0.4201025183380036, + "AC3_3": 0.33816590297961524, + "AC3_4": 0.2776564918351425, + "AC3_5": 0.2354349560611538, + "AC3_6": 0.20655435029353048, + "AC3_7": 0.1851851851467764 + }, + "prompt_5": { + "overall_acc": 0.3798701298701298, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.4034090909090909, + "Chinese": 0.35795454545454547, + "Indonesian": 0.38636363636363635, + "Filipino": 0.35795454545454547, + "Spanish": 0.38636363636363635, + "Malay": 0.3693181818181818 + }, + "consistency_score_2": 0.5419372294372293, + "consistency_score_3": 0.3621753246753246, + "consistency_score_4": 0.2733766233766234, + "consistency_score_5": 0.22294372294372294, + "consistency_score_6": 0.19155844155844154, + "consistency_score_7": 0.17045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5113636363636364, + "English,Chinese": 0.4943181818181818, + "English,Indonesian": 0.5340909090909091, + "English,Filipino": 0.5, + "English,Spanish": 0.6818181818181818, + "English,Malay": 0.5170454545454546, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5795454545454546, + "Vietnamese,Filipino": 0.48863636363636365, + "Vietnamese,Spanish": 0.5170454545454546, + "Vietnamese,Malay": 0.5511363636363636, + "Chinese,Indonesian": 0.5738636363636364, + "Chinese,Filipino": 0.48295454545454547, + "Chinese,Spanish": 0.5227272727272727, + "Chinese,Malay": 0.5170454545454546, + "Indonesian,Filipino": 0.625, + "Indonesian,Spanish": 0.5625, + "Indonesian,Malay": 0.6704545454545454, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Malay": 0.5511363636363636, + "Spanish,Malay": 0.5113636363636364 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3181818181818182, + "English,Vietnamese,Indonesian": 0.36363636363636365, + "English,Vietnamese,Filipino": 0.32386363636363635, + "English,Vietnamese,Spanish": 0.39204545454545453, + "English,Vietnamese,Malay": 0.3352272727272727, + "English,Chinese,Indonesian": 0.3522727272727273, + "English,Chinese,Filipino": 0.30113636363636365, + "English,Chinese,Spanish": 0.38636363636363635, + "English,Chinese,Malay": 0.3068181818181818, + "English,Indonesian,Filipino": 0.3693181818181818, + "English,Indonesian,Spanish": 0.4318181818181818, + "English,Indonesian,Malay": 0.39204545454545453, + "English,Filipino,Spanish": 0.375, + "English,Filipino,Malay": 0.32954545454545453, + "English,Spanish,Malay": 0.4034090909090909, + "Vietnamese,Chinese,Indonesian": 0.3693181818181818, + "Vietnamese,Chinese,Filipino": 0.3181818181818182, + "Vietnamese,Chinese,Spanish": 0.32954545454545453, + "Vietnamese,Chinese,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino": 0.4034090909090909, + "Vietnamese,Indonesian,Spanish": 0.3806818181818182, + "Vietnamese,Indonesian,Malay": 0.4318181818181818, + "Vietnamese,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Filipino,Malay": 0.3465909090909091, + "Vietnamese,Spanish,Malay": 0.3409090909090909, + "Chinese,Indonesian,Filipino": 0.38636363636363635, + "Chinese,Indonesian,Spanish": 0.375, + "Chinese,Indonesian,Malay": 0.42045454545454547, + "Chinese,Filipino,Spanish": 0.29545454545454547, + "Chinese,Filipino,Malay": 0.3465909090909091, + "Chinese,Spanish,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish": 0.3806818181818182, + "Indonesian,Filipino,Malay": 0.45454545454545453, + "Indonesian,Spanish,Malay": 0.4090909090909091, + "Filipino,Spanish,Malay": 0.3352272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "English,Vietnamese,Chinese,Filipino": 0.2215909090909091, + "English,Vietnamese,Chinese,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.2897727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.3125, + "English,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish": 0.26704545454545453, + "English,Vietnamese,Filipino,Malay": 0.25, + "English,Vietnamese,Spanish,Malay": 0.26704545454545453, + "English,Chinese,Indonesian,Filipino": 0.2556818181818182, + "English,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Chinese,Filipino,Spanish": 0.25, + "English,Chinese,Filipino,Malay": 0.2215909090909091, + "English,Chinese,Spanish,Malay": 0.26704545454545453, + "English,Indonesian,Filipino,Spanish": 0.3181818181818182, + "English,Indonesian,Filipino,Malay": 0.2897727272727273, + "English,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Chinese,Filipino,Malay": 0.25, + "Vietnamese,Chinese,Spanish,Malay": 0.25, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.3125, + "Vietnamese,Filipino,Spanish,Malay": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.26704545454545453, + "Chinese,Indonesian,Filipino,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.2897727272727273, + "Chinese,Filipino,Spanish,Malay": 0.23295454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Spanish,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.23863636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.23295454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.25, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1875, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + } + }, + "AC3_2": 0.44665680660248186, + "AC3_3": 0.37081175218816426, + "AC3_4": 0.31794299135356474, + "AC3_5": 0.2809811373079548, + "AC3_6": 0.254685655209267, + "AC3_7": 0.23531777952280733 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4563106796116505 + }, + "prompt_2": { + "accuracy": 0.47572815533980584 + }, + "prompt_3": { + "accuracy": 0.39805825242718446 + }, + "prompt_4": { + "accuracy": 0.3300970873786408 + }, + "prompt_5": { + "accuracy": 0.5145631067961165 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26666666666666666 + }, + "prompt_2": { + "accuracy": 0.2761904761904762 + }, + "prompt_3": { + "accuracy": 0.23809523809523808 + }, + "prompt_4": { + "accuracy": 0.34285714285714286 + }, + "prompt_5": { + "accuracy": 0.3523809523809524 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3364485981308411 + }, + "prompt_2": { + "accuracy": 0.5607476635514018 + }, + "prompt_3": { + "accuracy": 0.42990654205607476 + }, + "prompt_4": { + "accuracy": 0.32710280373831774 + }, + "prompt_5": { + "accuracy": 0.4953271028037383 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.2, + "history": 0.13333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.1, + "film": 0.2, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.5, + "politics": 0.8, + "culture": 0.7, + "film": 0.4, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.3, + "history": 0.2, + "literature": 0.5, + "politics": 0.8, + "culture": 0.5, + "film": 0.4, + "law": 0.4, + "geography": 0.2 + } + }, + "prompt_4": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.5, + "politics": 0.7, + "culture": 0.3, + "film": 0.4, + "law": 0.2, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.6, + "demographics": 0.4, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.6, + "politics": 0.9, + "culture": 0.3, + "film": 0.3, + "law": 0.2, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.08323701338845887 + }, + "prompt_2": { + "bleu_score": 0.07493668875939041 + }, + "prompt_3": { + "bleu_score": 0.07813957843870196 + }, + "prompt_4": { + "bleu_score": 0.16491616993592081 + }, + "prompt_5": { + "bleu_score": 0.05437702723770901 + } }, "indommlu": { "prompt_1": -1, @@ -4454,179 +40719,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09097102690237205 + }, + "prompt_2": { + "bleu_score": 0.10896033926095879 + }, + "prompt_3": { + "bleu_score": 0.08588335274813479 + }, + "prompt_4": { + "bleu_score": 0.10370042286864295 + }, + "prompt_5": { + "bleu_score": 0.10647837784902163 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.077223233094033 + }, + "prompt_2": { + "bleu_score": 0.09446339739839617 + }, + "prompt_3": { + "bleu_score": 0.06659193945823774 + }, + "prompt_4": { + "bleu_score": 0.1000659224712472 + }, + "prompt_5": { + "bleu_score": 0.11059114150202218 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.05886617413799371 + }, + "prompt_2": { + "bleu_score": 0.08390433206816507 + }, + "prompt_3": { + "bleu_score": 0.06258910353050073 + }, + "prompt_4": { + "bleu_score": 0.11385794085227369 + }, + "prompt_5": { + "bleu_score": 0.11408035691464592 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09042437546269276 + }, + "prompt_2": { + "bleu_score": 0.10413742493080713 + }, + "prompt_3": { + "bleu_score": 0.08098364365843856 + }, + "prompt_4": { + "bleu_score": 0.10571186351916498 + }, + "prompt_5": { + "bleu_score": 0.10727989747818337 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5099183197199533 + }, + "prompt_2": { + "accuracy": 0.41423570595099185 + }, + "prompt_3": { + "accuracy": 0.4049008168028005 + }, + "prompt_4": { + "accuracy": 0.47841306884480744 + }, + "prompt_5": { + "accuracy": 0.5169194865810969 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4820879513764748, + "category_acc": { + "high_school_european_history": 0.6280487804878049, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5113636363636364, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.6108374384236454, + "high_school_physics": 0.3333333333333333, + "high_school_world_history": 0.6228813559322034, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.5147679324894515, + "econometrics": 0.21238938053097345, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.5922330097087378, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.5290322580645161, + "professional_medicine": 0.5276752767527675, + "nutrition": 0.5475409836065573, + "global_facts": 0.32323232323232326, + "machine_learning": 0.32432432432432434, + "security_studies": 0.47950819672131145, + "public_relations": 0.46788990825688076, + "professional_psychology": 0.513911620294599, + "prehistory": 0.5386996904024768, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.5846153846153846, + "college_medicine": 0.4883720930232558, + "high_school_government_and_politics": 0.5989583333333334, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.5802469135802469, + "high_school_geography": 0.5939086294416244, + "elementary_mathematics": 0.29442970822281167, + "human_aging": 0.45045045045045046, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.6636029411764706, + "formal_logic": 0.376, + "high_school_statistics": 0.413953488372093, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.6099744245524297, + "high_school_chemistry": 0.3811881188118812, + "marketing": 0.6866952789699571, + "professional_law": 0.4070450097847358, + "management": 0.49019607843137253, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.5420560747663551, + "world_religions": 0.6823529411764706, + "sociology": 0.64, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.48586118251928023, + "computer_security": 0.6060606060606061, + "moral_scenarios": 0.29977628635346754, + "moral_disputes": 0.5043478260869565, + "electrical_engineering": 0.5208333333333334, + "astronomy": 0.5298013245033113, + "college_biology": 0.48951048951048953 + } + }, + "prompt_2": { + "accuracy": 0.40486235252055774, + "category_acc": { + "high_school_european_history": 0.5853658536585366, + "business_ethics": 0.45454545454545453, + "clinical_knowledge": 0.42045454545454547, + "medical_genetics": 0.41414141414141414, + "high_school_us_history": 0.6354679802955665, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.635593220338983, + "virology": 0.3151515151515151, + "high_school_microeconomics": 0.3628691983122363, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.5210355987055016, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.45483870967741935, + "professional_medicine": 0.5313653136531366, + "nutrition": 0.35737704918032787, + "global_facts": 0.24242424242424243, + "machine_learning": 0.3153153153153153, + "security_studies": 0.46311475409836067, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.41898527004909986, + "prehistory": 0.4179566563467492, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.47692307692307695, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.4791666666666667, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.41358024691358025, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.27320954907161804, + "human_aging": 0.4144144144144144, + "college_mathematics": 0.3939393939393939, + "high_school_psychology": 0.4742647058823529, + "formal_logic": 0.264, + "high_school_statistics": 0.3581395348837209, + "international_law": 0.5333333333333333, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.2863247863247863, + "miscellaneous": 0.38618925831202044, + "high_school_chemistry": 0.29207920792079206, + "marketing": 0.5879828326180258, + "professional_law": 0.40574037834311805, + "management": 0.38235294117647056, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.45794392523364486, + "world_religions": 0.5, + "sociology": 0.45, + "us_foreign_policy": 0.48484848484848486, + "high_school_macroeconomics": 0.41131105398457585, + "computer_security": 0.42424242424242425, + "moral_scenarios": 0.26174496644295303, + "moral_disputes": 0.45217391304347826, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.40397350993377484, + "college_biology": 0.46153846153846156 + } + }, + "prompt_3": { + "accuracy": 0.4202359671076153, + "category_acc": { + "high_school_european_history": 0.6097560975609756, + "business_ethics": 0.4444444444444444, + "clinical_knowledge": 0.4431818181818182, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.6551724137931034, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.6610169491525424, + "virology": 0.36363636363636365, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.517799352750809, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.3879003558718861, + "philosophy": 0.535483870967742, + "professional_medicine": 0.5461254612546126, + "nutrition": 0.4131147540983607, + "global_facts": 0.26262626262626265, + "machine_learning": 0.35135135135135137, + "security_studies": 0.5040983606557377, + "public_relations": 0.3944954128440367, + "professional_psychology": 0.44353518821603927, + "prehistory": 0.47368421052631576, + "anatomy": 0.3656716417910448, + "human_sexuality": 0.5230769230769231, + "college_medicine": 0.46511627906976744, + "high_school_government_and_politics": 0.5416666666666666, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.35802469135802467, + "high_school_geography": 0.5025380710659898, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.40540540540540543, + "college_mathematics": 0.3939393939393939, + "high_school_psychology": 0.48713235294117646, + "formal_logic": 0.224, + "high_school_statistics": 0.3302325581395349, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.31196581196581197, + "miscellaneous": 0.42455242966751916, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.6180257510729614, + "professional_law": 0.40117416829745595, + "management": 0.45098039215686275, + "college_physics": 0.15841584158415842, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.5470588235294118, + "sociology": 0.47, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.40616966580976865, + "computer_security": 0.41414141414141414, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.4608695652173913, + "electrical_engineering": 0.3888888888888889, + "astronomy": 0.41721854304635764, + "college_biology": 0.3916083916083916 + } + }, + "prompt_4": { + "accuracy": 0.4639256346085091, + "category_acc": { + "high_school_european_history": 0.5914634146341463, + "business_ethics": 0.42424242424242425, + "clinical_knowledge": 0.5378787878787878, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.6305418719211823, + "high_school_physics": 0.36, + "high_school_world_history": 0.6228813559322034, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.47257383966244726, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.5760517799352751, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.39501779359430605, + "philosophy": 0.5806451612903226, + "professional_medicine": 0.5756457564575646, + "nutrition": 0.5737704918032787, + "global_facts": 0.31313131313131315, + "machine_learning": 0.27927927927927926, + "security_studies": 0.5163934426229508, + "public_relations": 0.46788990825688076, + "professional_psychology": 0.4795417348608838, + "prehistory": 0.5201238390092879, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5076923076923077, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.6666666666666666, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5987654320987654, + "high_school_geography": 0.5736040609137056, + "elementary_mathematics": 0.27320954907161804, + "human_aging": 0.4144144144144144, + "college_mathematics": 0.40404040404040403, + "high_school_psychology": 0.5992647058823529, + "formal_logic": 0.392, + "high_school_statistics": 0.3767441860465116, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.32051282051282054, + "miscellaneous": 0.49744245524296676, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.7253218884120172, + "professional_law": 0.39399869536855836, + "management": 0.46078431372549017, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.5233644859813084, + "world_religions": 0.5764705882352941, + "sociology": 0.605, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.46786632390745503, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.2505592841163311, + "moral_disputes": 0.5159420289855072, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.543046357615894, + "college_biology": 0.4755244755244755 + } + }, + "prompt_5": { + "accuracy": 0.5032534858777261, + "category_acc": { + "high_school_european_history": 0.6036585365853658, + "business_ethics": 0.494949494949495, + "clinical_knowledge": 0.553030303030303, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.6354679802955665, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.6228813559322034, + "virology": 0.43636363636363634, + "high_school_microeconomics": 0.5443037974683544, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.6245954692556634, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3914590747330961, + "philosophy": 0.603225806451613, + "professional_medicine": 0.5350553505535055, + "nutrition": 0.5737704918032787, + "global_facts": 0.35353535353535354, + "machine_learning": 0.36036036036036034, + "security_studies": 0.5491803278688525, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.5057283142389526, + "prehistory": 0.5944272445820433, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.703125, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.6111111111111112, + "high_school_geography": 0.6243654822335025, + "elementary_mathematics": 0.3050397877984085, + "human_aging": 0.49099099099099097, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6801470588235294, + "formal_logic": 0.312, + "high_school_statistics": 0.4604651162790698, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.6841432225063938, + "high_school_chemistry": 0.42574257425742573, + "marketing": 0.7467811158798283, + "professional_law": 0.41487279843444225, + "management": 0.6372549019607843, + "college_physics": 0.1485148514851485, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.7529411764705882, + "sociology": 0.63, + "us_foreign_policy": 0.6868686868686869, + "high_school_macroeconomics": 0.5141388174807198, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.2595078299776286, + "moral_disputes": 0.5362318840579711, + "electrical_engineering": 0.5555555555555556, + "astronomy": 0.5629139072847682, + "college_biology": 0.4755244755244755 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33803863298662706 + }, + "prompt_2": { + "accuracy": 0.3588410104011887 + }, + "prompt_3": { + "accuracy": 0.34992570579494797 + }, + "prompt_4": { + "accuracy": 0.3447251114413076 + }, + "prompt_5": { + "accuracy": 0.3261515601783061 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35118306351183065, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.30952380952380953, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.2833333333333333, + "business_administration": 0.42105263157894735, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.5185185185185185, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.25, + "high_school_history": 0.28, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.4230769230769231, + "sports_science": 0.20833333333333334, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.3148148148148148, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.24074074074074073, + "physician": 0.2777777777777778 + } + }, + "prompt_2": { + "accuracy": 0.3549190535491905, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.3333333333333333, + "college_physics": 0.25, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.31666666666666665, + "business_administration": 0.4473684210526316, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.4444444444444444, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.25, + "high_school_history": 0.28, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.36538461538461536, + "sports_science": 0.16666666666666666, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.3333333333333333, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.3148148148148148 + } + }, + "prompt_3": { + "accuracy": 0.3580323785803238, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.4523809523809524, + "college_physics": 0.25, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.3, + "business_administration": 0.42105263157894735, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.5, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.4074074074074074, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.25, + "high_school_history": 0.24, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.36538461538461536, + "sports_science": 0.16666666666666666, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.35185185185185186 + } + }, + "prompt_4": { + "accuracy": 0.3518057285180573, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.375, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.35714285714285715, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.2833333333333333, + "business_administration": 0.39473684210526316, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.4444444444444444, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.24, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.40384615384615385, + "sports_science": 0.20833333333333334, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.375, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.25925925925925924, + "physician": 0.3148148148148148 + } + }, + "prompt_5": { + "accuracy": 0.3412204234122042, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.3333333333333333, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.375, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.4642857142857143, + "college_economics": 0.31666666666666665, + "business_administration": 0.34210526315789475, + "marxism": 0.375, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.5, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.48148148148148145, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.32, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.3269230769230769, + "sports_science": 0.25, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.2777777777777778, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.24074074074074073, + "physician": 0.35185185185185186 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2939068100358423 + }, + "prompt_2": { + "accuracy": 0.3118279569892473 + }, + "prompt_3": { + "accuracy": 0.3046594982078853 + }, + "prompt_4": { + "accuracy": 0.3225806451612903 + }, + "prompt_5": { + "accuracy": 0.3333333333333333 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3350025902262131, + "category_acc": { + "agronomy": 0.33136094674556216, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.24390243902439024, + "arts": 0.33125, + "astronomy": 0.28484848484848485, + "business_ethics": 0.37799043062200954, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.42748091603053434, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.38317757009345793, + "chinese_history": 0.38699690402476783, + "chinese_literature": 0.3431372549019608, + "chinese_teacher_qualification": 0.36312849162011174, + "clinical_knowledge": 0.350210970464135, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.2336448598130841, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.3055555555555556, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.31135531135531136, + "computer_science": 0.3382352941176471, + "computer_security": 0.3567251461988304, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.302158273381295, + "economics": 0.37735849056603776, + "education": 0.34355828220858897, + "electrical_engineering": 0.3430232558139535, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.31313131313131315, + "elementary_information_and_technology": 0.47478991596638653, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.3333333333333333, + "food_science": 0.3986013986013986, + "genetics": 0.30113636363636365, + "global_facts": 0.33557046979865773, + "high_school_biology": 0.23668639053254437, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.29365079365079366, + "international_law": 0.2702702702702703, + "journalism": 0.36046511627906974, + "jurisprudence": 0.36496350364963503, + "legal_and_moral_basis": 0.5514018691588785, + "logical": 0.34959349593495936, + "machine_learning": 0.28688524590163933, + "management": 0.3761904761904762, + "marketing": 0.37777777777777777, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.38620689655172413, + "philosophy": 0.37142857142857144, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.2938388625592417, + "professional_medicine": 0.300531914893617, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.3850574712643678, + "security_study": 0.362962962962963, + "sociology": 0.3893805309734513, + "sports_science": 0.34545454545454546, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.34911242603550297, + "world_history": 0.39751552795031053, + "world_religions": 0.35625 + } + }, + "prompt_2": { + "accuracy": 0.34544983595233986, + "category_acc": { + "agronomy": 0.33136094674556216, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.22560975609756098, + "arts": 0.3125, + "astronomy": 0.28484848484848485, + "business_ethics": 0.37799043062200954, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.3925233644859813, + "chinese_history": 0.38390092879256965, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.4301675977653631, + "clinical_knowledge": 0.3080168776371308, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.4528301886792453, + "college_law": 0.3055555555555556, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.27106227106227104, + "computer_science": 0.38235294117647056, + "computer_security": 0.4152046783625731, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.35251798561151076, + "economics": 0.34591194968553457, + "education": 0.3558282208588957, + "electrical_engineering": 0.36627906976744184, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.31313131313131315, + "elementary_information_and_technology": 0.49159663865546216, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.362962962962963, + "food_science": 0.4125874125874126, + "genetics": 0.3352272727272727, + "global_facts": 0.33557046979865773, + "high_school_biology": 0.28994082840236685, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.3356643356643357, + "human_sexuality": 0.36507936507936506, + "international_law": 0.2864864864864865, + "journalism": 0.3546511627906977, + "jurisprudence": 0.3381995133819951, + "legal_and_moral_basis": 0.5981308411214953, + "logical": 0.34959349593495936, + "machine_learning": 0.28688524590163933, + "management": 0.38571428571428573, + "marketing": 0.37777777777777777, + "marxist_theory": 0.4497354497354497, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.3724137931034483, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.33714285714285713, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.31117021276595747, + "professional_psychology": 0.34913793103448276, + "public_relations": 0.41379310344827586, + "security_study": 0.34814814814814815, + "sociology": 0.41150442477876104, + "sports_science": 0.40606060606060607, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.30177514792899407, + "world_history": 0.4161490683229814, + "world_religions": 0.35625 + } + }, + "prompt_3": { + "accuracy": 0.34182351925401483, + "category_acc": { + "agronomy": 0.31952662721893493, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.2073170731707317, + "arts": 0.3375, + "astronomy": 0.2606060606060606, + "business_ethics": 0.37799043062200954, + "chinese_civil_service_exam": 0.31875, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.27205882352941174, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.3715170278637771, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.4022346368715084, + "clinical_knowledge": 0.29535864978902954, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.2897196261682243, + "college_engineering_hydrology": 0.42452830188679247, + "college_law": 0.3148148148148148, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.26373626373626374, + "computer_science": 0.39705882352941174, + "computer_security": 0.40350877192982454, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.3381294964028777, + "economics": 0.3270440251572327, + "education": 0.3619631901840491, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.49159663865546216, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.362962962962963, + "food_science": 0.3916083916083916, + "genetics": 0.3465909090909091, + "global_facts": 0.348993288590604, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3559322033898305, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.3006993006993007, + "human_sexuality": 0.35714285714285715, + "international_law": 0.2972972972972973, + "journalism": 0.36627906976744184, + "jurisprudence": 0.36009732360097324, + "legal_and_moral_basis": 0.5794392523364486, + "logical": 0.35772357723577236, + "machine_learning": 0.29508196721311475, + "management": 0.4142857142857143, + "marketing": 0.37222222222222223, + "marxist_theory": 0.43915343915343913, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.4, + "philosophy": 0.4380952380952381, + "professional_accounting": 0.2914285714285714, + "professional_law": 0.2938388625592417, + "professional_medicine": 0.31648936170212766, + "professional_psychology": 0.3232758620689655, + "public_relations": 0.40229885057471265, + "security_study": 0.34074074074074073, + "sociology": 0.41150442477876104, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.3076923076923077, + "world_history": 0.39751552795031053, + "world_religions": 0.36875 + } + }, + "prompt_4": { + "accuracy": 0.3360386807114488, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.22560975609756098, + "arts": 0.3375, + "astronomy": 0.296969696969697, + "business_ethics": 0.3397129186602871, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.44274809160305345, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.3925233644859813, + "chinese_history": 0.3622291021671827, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.39106145251396646, + "clinical_knowledge": 0.3080168776371308, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.3333333333333333, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.28205128205128205, + "computer_science": 0.3431372549019608, + "computer_security": 0.3508771929824561, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.2949640287769784, + "economics": 0.36477987421383645, + "education": 0.3312883435582822, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.24206349206349206, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.4411764705882353, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.32592592592592595, + "food_science": 0.42657342657342656, + "genetics": 0.3409090909090909, + "global_facts": 0.3422818791946309, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.3412698412698413, + "international_law": 0.2972972972972973, + "journalism": 0.36046511627906974, + "jurisprudence": 0.36253041362530414, + "legal_and_moral_basis": 0.5186915887850467, + "logical": 0.34959349593495936, + "machine_learning": 0.28688524590163933, + "management": 0.36666666666666664, + "marketing": 0.3611111111111111, + "marxist_theory": 0.38095238095238093, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.3793103448275862, + "philosophy": 0.4095238095238095, + "professional_accounting": 0.3314285714285714, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.3058510638297872, + "professional_psychology": 0.3275862068965517, + "public_relations": 0.41954022988505746, + "security_study": 0.3925925925925926, + "sociology": 0.4026548672566372, + "sports_science": 0.36363636363636365, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.35502958579881655, + "world_history": 0.391304347826087, + "world_religions": 0.3875 + } + }, + "prompt_5": { + "accuracy": 0.3321533413918149, + "category_acc": { + "agronomy": 0.28994082840236685, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2804878048780488, + "arts": 0.3, + "astronomy": 0.2727272727272727, + "business_ethics": 0.36363636363636365, + "chinese_civil_service_exam": 0.3375, + "chinese_driving_rule": 0.3969465648854962, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.35514018691588783, + "chinese_history": 0.39009287925696595, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.3687150837988827, + "clinical_knowledge": 0.34177215189873417, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.3177570093457944, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.25925925925925924, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.27472527472527475, + "computer_science": 0.3088235294117647, + "computer_security": 0.3508771929824561, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.33093525179856115, + "economics": 0.3710691823899371, + "education": 0.32515337423312884, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.3949579831932773, + "elementary_mathematics": 0.30869565217391304, + "ethnology": 0.32592592592592595, + "food_science": 0.42657342657342656, + "genetics": 0.2897727272727273, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3076923076923077, + "human_sexuality": 0.31746031746031744, + "international_law": 0.2756756756756757, + "journalism": 0.36627906976744184, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.5514018691588785, + "logical": 0.4146341463414634, + "machine_learning": 0.27049180327868855, + "management": 0.38571428571428573, + "marketing": 0.3611111111111111, + "marxist_theory": 0.3862433862433862, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.35172413793103446, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.28, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.31117021276595747, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.40804597701149425, + "security_study": 0.37037037037037035, + "sociology": 0.37610619469026546, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.3254437869822485, + "world_history": 0.4409937888198758, + "world_religions": 0.38125 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30303030303030304 + }, + "prompt_2": { + "accuracy": 0.30303030303030304 + }, + "prompt_3": { + "accuracy": 0.3333333333333333 + }, + "prompt_4": { + "accuracy": 0.36363636363636365 + }, + "prompt_5": { + "accuracy": 0.3333333333333333 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.22272727272727272 + }, + "prompt_2": { + "accuracy": 0.20454545454545456 + }, + "prompt_3": { + "accuracy": 0.20681818181818182 + }, + "prompt_4": { + "accuracy": 0.325 + }, + "prompt_5": { + "accuracy": 0.3340909090909091 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32372881355932204 + }, + "prompt_2": { + "accuracy": 0.33322033898305087 + }, + "prompt_3": { + "accuracy": 0.33559322033898303 + }, + "prompt_4": { + "accuracy": 0.3264406779661017 + }, + "prompt_5": { + "accuracy": 0.3389830508474576 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6241585639491398 + }, + "prompt_2": { + "accuracy": 0.631264023934181 + }, + "prompt_3": { + "accuracy": 0.6080777860882572 + }, + "prompt_4": { + "accuracy": 0.6428571428571429 + }, + "prompt_5": { + "accuracy": 0.6488406881077038 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7829495345418912 + }, + "prompt_2": { + "accuracy": 0.7354238118569328 + }, + "prompt_3": { + "accuracy": 0.7662910338069574 + }, + "prompt_4": { + "accuracy": 0.7658010779029887 + }, + "prompt_5": { + "accuracy": 0.7790298873101421 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.28004687400650213, + "rouge2": 0.10374578920471764, + "rougeL": 0.2201696733376304, + "avg_rouge": 0.20132077884961672 + }, + "prompt_2": { + "rouge1": 0.22181461469540578, + "rouge2": 0.08365410889570252, + "rougeL": 0.1721582350888628, + "avg_rouge": 0.15920898622665705 + }, + "prompt_3": { + "rouge1": 0.24251543545930523, + "rouge2": 0.09580941179518966, + "rougeL": 0.18779666579824275, + "avg_rouge": 0.1753738376842459 + }, + "prompt_4": { + "rouge1": 0.33515651817441255, + "rouge2": 0.13651515899550423, + "rougeL": 0.26222764084869493, + "avg_rouge": 0.24463310600620392 + }, + "prompt_5": { + "rouge1": 0.31993275439290103, + "rouge2": 0.12598258695651107, + "rougeL": 0.2500379117113938, + "avg_rouge": 0.2319844176869353 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.20458991538769805, + "rouge2": 0.060535492771218474, + "rougeL": 0.14906981298379574, + "avg_rouge": 0.1380650737142374 + }, + "prompt_2": { + "rouge1": 0.20472313665880154, + "rouge2": 0.060535112183542565, + "rougeL": 0.14956947760299275, + "avg_rouge": 0.1382759088151123 + }, + "prompt_3": { + "rouge1": 0.2061219138343844, + "rouge2": 0.060381170264831086, + "rougeL": 0.15032611846680502, + "avg_rouge": 0.13894306752200683 + }, + "prompt_4": { + "rouge1": 0.21948043791934715, + "rouge2": 0.06302286795447794, + "rougeL": 0.16107601033909735, + "avg_rouge": 0.14785977207097414 + }, + "prompt_5": { + "rouge1": 0.22565456514657403, + "rouge2": 0.06019712302189149, + "rougeL": 0.16580810828250248, + "avg_rouge": 0.150553265483656 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7603211009174312 + }, + "prompt_2": { + "accuracy": 0.6594036697247706 + }, + "prompt_3": { + "accuracy": 0.6961009174311926 + }, + "prompt_4": { + "accuracy": 0.606651376146789 + }, + "prompt_5": { + "accuracy": 0.5905963302752294 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6999041227229147 + }, + "prompt_2": { + "accuracy": 0.42857142857142855 + }, + "prompt_3": { + "accuracy": 0.3326941514860978 + }, + "prompt_4": { + "accuracy": 0.700862895493768 + }, + "prompt_5": { + "accuracy": 0.5867689357622243 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.476 + }, + "prompt_2": { + "accuracy": 0.514 + }, + "prompt_3": { + "accuracy": 0.5365 + }, + "prompt_4": { + "accuracy": 0.4995 + }, + "prompt_5": { + "accuracy": 0.5105 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3605 + }, + "prompt_2": { + "accuracy": 0.4115 + }, + "prompt_3": { + "accuracy": 0.414 + }, + "prompt_4": { + "accuracy": 0.391 + }, + "prompt_5": { + "accuracy": 0.3955 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.512 + }, + "prompt_2": { + "accuracy": 0.487 + }, + "prompt_3": { + "accuracy": 0.5065 + }, + "prompt_4": { + "accuracy": 0.5245 + }, + "prompt_5": { + "accuracy": 0.4985 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5352112676056338 + }, + "prompt_2": { + "accuracy": 0.5774647887323944 + }, + "prompt_3": { + "accuracy": 0.4507042253521127 + }, + "prompt_4": { + "accuracy": 0.4225352112676056 + }, + "prompt_5": { + "accuracy": 0.4225352112676056 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48375451263537905 + }, + "prompt_2": { + "accuracy": 0.5018050541516246 + }, + "prompt_3": { + "accuracy": 0.5018050541516246 + }, + "prompt_4": { + "accuracy": 0.5306859205776173 + }, + "prompt_5": { + "accuracy": 0.49097472924187724 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6666666666666666 + }, + "prompt_2": { + "accuracy": 0.46568627450980393 + }, + "prompt_3": { + "accuracy": 0.5882352941176471 + }, + "prompt_4": { + "accuracy": 0.6495098039215687 + }, + "prompt_5": { + "accuracy": 0.6348039215686274 + } } }, "five_shot": { @@ -4736,53 +42191,1733 @@ "model_link": "https://huggingface.co/meta-llama/Llama-2-13b-chat-hf", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.440952380952381, + "language_acc": { + "Malay": 0.38, + "English": 0.5333333333333333, + "Vietnamese": 0.42, + "Spanish": 0.52, + "Indonesian": 0.41333333333333333, + "Filipino": 0.38666666666666666, + "Chinese": 0.43333333333333335 + }, + "consistency_score_2": 0.5225396825396826, + "consistency_score_3": 0.34895238095238107, + "consistency_score_4": 0.26171428571428573, + "consistency_score_5": 0.20857142857142857, + "consistency_score_6": 0.17238095238095236, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5066666666666667, + "Malay,Vietnamese": 0.5266666666666666, + "Malay,Spanish": 0.58, + "Malay,Indonesian": 0.56, + "Malay,Filipino": 0.44, + "Malay,Chinese": 0.5066666666666667, + "English,Vietnamese": 0.5266666666666666, + "English,Spanish": 0.6333333333333333, + "English,Indonesian": 0.48, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.48, + "Vietnamese,Spanish": 0.5533333333333333, + "Vietnamese,Indonesian": 0.5866666666666667, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Chinese": 0.5666666666666667, + "Spanish,Indonesian": 0.56, + "Spanish,Filipino": 0.5133333333333333, + "Spanish,Chinese": 0.5133333333333333, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.49333333333333335, + "Filipino,Chinese": 0.48 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.35333333333333333, + "Malay,English,Spanish": 0.4, + "Malay,English,Indonesian": 0.3466666666666667, + "Malay,English,Filipino": 0.2866666666666667, + "Malay,English,Chinese": 0.31333333333333335, + "Malay,Vietnamese,Spanish": 0.38666666666666666, + "Malay,Vietnamese,Indonesian": 0.4066666666666667, + "Malay,Vietnamese,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Chinese": 0.36666666666666664, + "Malay,Spanish,Indonesian": 0.4066666666666667, + "Malay,Spanish,Filipino": 0.3466666666666667, + "Malay,Spanish,Chinese": 0.36666666666666664, + "Malay,Indonesian,Filipino": 0.3333333333333333, + "Malay,Indonesian,Chinese": 0.36, + "Malay,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish": 0.4066666666666667, + "English,Vietnamese,Indonesian": 0.36, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.3466666666666667, + "English,Spanish,Indonesian": 0.38666666666666666, + "English,Spanish,Filipino": 0.36, + "English,Spanish,Chinese": 0.36, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.3, + "English,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "Vietnamese,Spanish,Filipino": 0.34, + "Vietnamese,Spanish,Chinese": 0.37333333333333335, + "Vietnamese,Indonesian,Filipino": 0.37333333333333335, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Filipino,Chinese": 0.3466666666666667, + "Spanish,Indonesian,Filipino": 0.34, + "Spanish,Indonesian,Chinese": 0.3466666666666667, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.32 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.29333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.2866666666666667, + "Malay,English,Vietnamese,Filipino": 0.22666666666666666, + "Malay,English,Vietnamese,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.26, + "Malay,English,Spanish,Chinese": 0.2733333333333333, + "Malay,English,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Indonesian,Chinese": 0.24, + "Malay,English,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.28, + "Malay,Spanish,Indonesian,Chinese": 0.2866666666666667, + "Malay,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.28, + "English,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.26, + "English,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian,Filipino": 0.25333333333333335, + "English,Spanish,Indonesian,Chinese": 0.24, + "English,Spanish,Filipino,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.28, + "Vietnamese,Spanish,Filipino,Chinese": 0.26, + "Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.22, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Spanish,Indonesian,Chinese": 0.22, + "Malay,English,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + } + }, + "AC3_2": 0.47829167642325415, + "AC3_3": 0.38959477299372725, + "AC3_4": 0.3284730665992666, + "AC3_5": 0.28319229153578834, + "AC3_6": 0.2478645370787828, + "AC3_7": 0.2201188546356902 + }, + "prompt_2": { + "overall_acc": 0.4238095238095238, + "language_acc": { + "Malay": 0.4, + "English": 0.5, + "Vietnamese": 0.37333333333333335, + "Spanish": 0.44666666666666666, + "Indonesian": 0.4266666666666667, + "Filipino": 0.42, + "Chinese": 0.4 + }, + "consistency_score_2": 0.4920634920634921, + "consistency_score_3": 0.3112380952380952, + "consistency_score_4": 0.22038095238095243, + "consistency_score_5": 0.1663492063492064, + "consistency_score_6": 0.13142857142857142, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4666666666666667, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.54, + "Malay,Filipino": 0.43333333333333335, + "Malay,Chinese": 0.4533333333333333, + "English,Vietnamese": 0.49333333333333335, + "English,Spanish": 0.6266666666666667, + "English,Indonesian": 0.4866666666666667, + "English,Filipino": 0.46, + "English,Chinese": 0.4533333333333333, + "Vietnamese,Spanish": 0.5533333333333333, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Chinese": 0.46, + "Spanish,Indonesian": 0.5, + "Spanish,Filipino": 0.48, + "Spanish,Chinese": 0.48, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.48, + "Filipino,Chinese": 0.44 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.29333333333333333, + "Malay,English,Spanish": 0.3333333333333333, + "Malay,English,Indonesian": 0.32666666666666666, + "Malay,English,Filipino": 0.26, + "Malay,English,Chinese": 0.26, + "Malay,Vietnamese,Spanish": 0.3, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian": 0.3333333333333333, + "Malay,Spanish,Filipino": 0.2866666666666667, + "Malay,Spanish,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.32666666666666666, + "Malay,Filipino,Chinese": 0.26, + "English,Vietnamese,Spanish": 0.38666666666666666, + "English,Vietnamese,Indonesian": 0.35333333333333333, + "English,Vietnamese,Filipino": 0.32, + "English,Vietnamese,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian": 0.36666666666666664, + "English,Spanish,Filipino": 0.3466666666666667, + "English,Spanish,Chinese": 0.3333333333333333, + "English,Indonesian,Filipino": 0.2866666666666667, + "English,Indonesian,Chinese": 0.26666666666666666, + "English,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,Filipino": 0.3466666666666667, + "Vietnamese,Spanish,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino": 0.34, + "Vietnamese,Indonesian,Chinese": 0.32, + "Vietnamese,Filipino,Chinese": 0.28, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.2866666666666667, + "Spanish,Filipino,Chinese": 0.2733333333333333, + "Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.24, + "Malay,English,Vietnamese,Indonesian": 0.26, + "Malay,English,Vietnamese,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian": 0.26, + "Malay,English,Spanish,Filipino": 0.22, + "Malay,English,Spanish,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.26, + "Malay,Vietnamese,Spanish,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.22, + "Malay,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.28, + "English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.24, + "English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino": 0.24, + "English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Spanish,Filipino,Chinese": 0.21333333333333335, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.24, + "Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.45539324910435874, + "AC3_3": 0.3589037376577091, + "AC3_4": 0.2899749372983446, + "AC3_5": 0.2389200542641857, + "AC3_6": 0.20063709871417096, + "AC3_7": 0.17043686412107267 + }, + "prompt_3": { + "overall_acc": 0.41809523809523813, + "language_acc": { + "Malay": 0.3933333333333333, + "English": 0.49333333333333335, + "Vietnamese": 0.38666666666666666, + "Spanish": 0.5, + "Indonesian": 0.38666666666666666, + "Filipino": 0.3466666666666667, + "Chinese": 0.42 + }, + "consistency_score_2": 0.4707936507936508, + "consistency_score_3": 0.2914285714285714, + "consistency_score_4": 0.20380952380952383, + "consistency_score_5": 0.1507936507936508, + "consistency_score_6": 0.11619047619047618, + "consistency_score_7": 0.09333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4266666666666667, + "Malay,Vietnamese": 0.49333333333333335, + "Malay,Spanish": 0.46, + "Malay,Indonesian": 0.52, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.36666666666666664, + "English,Vietnamese": 0.44, + "English,Spanish": 0.49333333333333335, + "English,Indonesian": 0.48, + "English,Filipino": 0.44666666666666666, + "English,Chinese": 0.44, + "Vietnamese,Spanish": 0.5266666666666666, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Chinese": 0.44666666666666666, + "Spanish,Indonesian": 0.5066666666666667, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.46, + "Indonesian,Filipino": 0.44, + "Indonesian,Chinese": 0.3933333333333333, + "Filipino,Chinese": 0.49333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2733333333333333, + "Malay,English,Spanish": 0.2733333333333333, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.24666666666666667, + "Malay,English,Chinese": 0.22, + "Malay,Vietnamese,Spanish": 0.34, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.32, + "Malay,Vietnamese,Chinese": 0.24666666666666667, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.29333333333333333, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Indonesian": 0.32666666666666666, + "English,Vietnamese,Filipino": 0.2733333333333333, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Spanish,Indonesian": 0.32, + "English,Spanish,Filipino": 0.3, + "English,Spanish,Chinese": 0.26, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.25333333333333335, + "English,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Spanish,Chinese": 0.3, + "Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.26666666666666666, + "Spanish,Filipino,Chinese": 0.32, + "Indonesian,Filipino,Chinese": 0.26 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Filipino": 0.18, + "Malay,English,Vietnamese,Chinese": 0.16, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.18, + "Malay,English,Indonesian,Chinese": 0.16, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.26, + "Malay,Vietnamese,Spanish,Chinese": 0.2, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.24, + "Malay,Spanish,Indonesian,Chinese": 0.2, + "Malay,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian,Filipino": 0.2, + "English,Spanish,Indonesian,Chinese": 0.18, + "English,Spanish,Filipino,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + } + }, + "AC3_2": 0.4428823128753458, + "AC3_3": 0.34345541701774884, + "AC3_4": 0.2740348573904874, + "AC3_5": 0.22164647104947408, + "AC3_6": 0.18184534416425013, + "AC3_7": 0.1526008689956119 + }, + "prompt_4": { + "overall_acc": 0.44095238095238093, + "language_acc": { + "Malay": 0.38666666666666666, + "English": 0.5066666666666667, + "Vietnamese": 0.44666666666666666, + "Spanish": 0.5133333333333333, + "Indonesian": 0.4, + "Filipino": 0.4066666666666667, + "Chinese": 0.4266666666666667 + }, + "consistency_score_2": 0.5244444444444445, + "consistency_score_3": 0.35504761904761906, + "consistency_score_4": 0.2651428571428572, + "consistency_score_5": 0.2066666666666667, + "consistency_score_6": 0.16476190476190475, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.52, + "Malay,Vietnamese": 0.5266666666666666, + "Malay,Spanish": 0.5266666666666666, + "Malay,Indonesian": 0.5733333333333334, + "Malay,Filipino": 0.5133333333333333, + "Malay,Chinese": 0.4666666666666667, + "English,Vietnamese": 0.5533333333333333, + "English,Spanish": 0.6266666666666667, + "English,Indonesian": 0.4666666666666667, + "English,Filipino": 0.5666666666666667, + "English,Chinese": 0.4866666666666667, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Filipino": 0.49333333333333335, + "Vietnamese,Chinese": 0.4866666666666667, + "Spanish,Indonesian": 0.5733333333333334, + "Spanish,Filipino": 0.5466666666666666, + "Spanish,Chinese": 0.5133333333333333, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.47333333333333333, + "Filipino,Chinese": 0.49333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.36, + "Malay,English,Spanish": 0.38666666666666666, + "Malay,English,Indonesian": 0.35333333333333333, + "Malay,English,Filipino": 0.3466666666666667, + "Malay,English,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish": 0.3466666666666667, + "Malay,Vietnamese,Indonesian": 0.38, + "Malay,Vietnamese,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian": 0.3933333333333333, + "Malay,Spanish,Filipino": 0.36, + "Malay,Spanish,Chinese": 0.32666666666666666, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.3466666666666667, + "Malay,Filipino,Chinese": 0.31333333333333335, + "English,Vietnamese,Spanish": 0.42, + "English,Vietnamese,Indonesian": 0.37333333333333335, + "English,Vietnamese,Filipino": 0.36666666666666664, + "English,Vietnamese,Chinese": 0.3466666666666667, + "English,Spanish,Indonesian": 0.38666666666666666, + "English,Spanish,Filipino": 0.4066666666666667, + "English,Spanish,Chinese": 0.3933333333333333, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Indonesian,Chinese": 0.3333333333333333, + "English,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian": 0.4, + "Vietnamese,Spanish,Filipino": 0.34, + "Vietnamese,Spanish,Chinese": 0.3333333333333333, + "Vietnamese,Indonesian,Filipino": 0.36, + "Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Spanish,Indonesian,Filipino": 0.37333333333333335, + "Spanish,Indonesian,Chinese": 0.34, + "Spanish,Filipino,Chinese": 0.3466666666666667, + "Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.28, + "Malay,English,Vietnamese,Indonesian": 0.28, + "Malay,English,Vietnamese,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian": 0.3, + "Malay,English,Spanish,Filipino": 0.28, + "Malay,English,Spanish,Chinese": 0.26666666666666666, + "Malay,English,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Indonesian,Chinese": 0.25333333333333335, + "Malay,English,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.26, + "Malay,Vietnamese,Filipino,Chinese": 0.22, + "Malay,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.32, + "English,Vietnamese,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.28, + "English,Vietnamese,Indonesian,Chinese": 0.28, + "English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino": 0.3, + "English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Spanish,Filipino,Chinese": 0.2866666666666667, + "English,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.28, + "Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.24, + "Malay,English,Vietnamese,Spanish,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.2, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + } + }, + "AC3_2": 0.47908801924227357, + "AC3_3": 0.39336455515289775, + "AC3_4": 0.3311603529532154, + "AC3_5": 0.2814313725055633, + "AC3_6": 0.23988918834015005, + "AC3_7": 0.20475400770342866 + }, + "prompt_5": { + "overall_acc": 0.4142857142857143, + "language_acc": { + "Malay": 0.36, + "English": 0.4666666666666667, + "Vietnamese": 0.38, + "Spanish": 0.52, + "Indonesian": 0.3933333333333333, + "Filipino": 0.35333333333333333, + "Chinese": 0.4266666666666667 + }, + "consistency_score_2": 0.4673015873015873, + "consistency_score_3": 0.28171428571428575, + "consistency_score_4": 0.19047619047619055, + "consistency_score_5": 0.1346031746031746, + "consistency_score_6": 0.09619047619047619, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3933333333333333, + "Malay,Vietnamese": 0.49333333333333335, + "Malay,Spanish": 0.44666666666666666, + "Malay,Indonesian": 0.5266666666666666, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.46, + "English,Vietnamese": 0.38, + "English,Spanish": 0.5666666666666667, + "English,Indonesian": 0.4866666666666667, + "English,Filipino": 0.4066666666666667, + "English,Chinese": 0.4266666666666667, + "Vietnamese,Spanish": 0.47333333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Chinese": 0.6, + "Spanish,Indonesian": 0.49333333333333335, + "Spanish,Filipino": 0.44666666666666666, + "Spanish,Chinese": 0.49333333333333335, + "Indonesian,Filipino": 0.36666666666666664, + "Indonesian,Chinese": 0.4666666666666667, + "Filipino,Chinese": 0.4066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24, + "Malay,English,Spanish": 0.2866666666666667, + "Malay,English,Indonesian": 0.28, + "Malay,English,Filipino": 0.22666666666666666, + "Malay,English,Chinese": 0.24, + "Malay,Vietnamese,Spanish": 0.2866666666666667, + "Malay,Vietnamese,Indonesian": 0.36, + "Malay,Vietnamese,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Chinese": 0.3466666666666667, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.24, + "Malay,Spanish,Chinese": 0.2733333333333333, + "Malay,Indonesian,Filipino": 0.2733333333333333, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish": 0.28, + "English,Vietnamese,Indonesian": 0.2733333333333333, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.29333333333333333, + "English,Spanish,Indonesian": 0.3333333333333333, + "English,Spanish,Filipino": 0.28, + "English,Spanish,Chinese": 0.30666666666666664, + "English,Indonesian,Filipino": 0.20666666666666667, + "English,Indonesian,Chinese": 0.26, + "English,Filipino,Chinese": 0.22, + "Vietnamese,Spanish,Indonesian": 0.32, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Filipino,Chinese": 0.31333333333333335, + "Spanish,Indonesian,Filipino": 0.24, + "Spanish,Indonesian,Chinese": 0.29333333333333333, + "Spanish,Filipino,Chinese": 0.25333333333333335, + "Indonesian,Filipino,Chinese": 0.22 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.2, + "Malay,English,Vietnamese,Filipino": 0.16, + "Malay,English,Vietnamese,Chinese": 0.2, + "Malay,English,Spanish,Indonesian": 0.23333333333333334, + "Malay,English,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.22, + "Malay,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.26, + "Malay,Vietnamese,Filipino,Chinese": 0.2, + "Malay,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.22, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "English,Spanish,Filipino,Chinese": 0.16, + "English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + } + }, + "AC3_2": 0.4391995472519968, + "AC3_3": 0.335374149611678, + "AC3_4": 0.26096737903446326, + "AC3_5": 0.20318929187407814, + "AC3_6": 0.15613006393529966, + "AC3_7": 0.11485148512463483 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3887987012987013, + "language_acc": { + "English": 0.4772727272727273, + "Vietnamese": 0.375, + "Chinese": 0.4431818181818182, + "Indonesian": 0.35795454545454547, + "Filipino": 0.29545454545454547, + "Spanish": 0.3977272727272727, + "Malay": 0.375 + }, + "consistency_score_2": 0.5481601731601732, + "consistency_score_3": 0.37418831168831174, + "consistency_score_4": 0.2792207792207792, + "consistency_score_5": 0.22023809523809523, + "consistency_score_6": 0.1810064935064935, + "consistency_score_7": 0.1534090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5340909090909091, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.5, + "English,Filipino": 0.36363636363636365, + "English,Spanish": 0.6477272727272727, + "English,Malay": 0.4943181818181818, + "Vietnamese,Chinese": 0.5965909090909091, + "Vietnamese,Indonesian": 0.6193181818181818, + "Vietnamese,Filipino": 0.5454545454545454, + "Vietnamese,Spanish": 0.5795454545454546, + "Vietnamese,Malay": 0.6306818181818182, + "Chinese,Indonesian": 0.5284090909090909, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.6420454545454546, + "Chinese,Malay": 0.5340909090909091, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.5738636363636364, + "Indonesian,Malay": 0.6420454545454546, + "Filipino,Spanish": 0.4375, + "Filipino,Malay": 0.5397727272727273, + "Spanish,Malay": 0.5795454545454546 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3806818181818182, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Filipino": 0.2897727272727273, + "English,Vietnamese,Spanish": 0.4375, + "English,Vietnamese,Malay": 0.375, + "English,Chinese,Indonesian": 0.3181818181818182, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.4318181818181818, + "English,Chinese,Malay": 0.3409090909090909, + "English,Indonesian,Filipino": 0.2727272727272727, + "English,Indonesian,Spanish": 0.4034090909090909, + "English,Indonesian,Malay": 0.3806818181818182, + "English,Filipino,Spanish": 0.3068181818181818, + "English,Filipino,Malay": 0.2840909090909091, + "English,Spanish,Malay": 0.4147727272727273, + "Vietnamese,Chinese,Indonesian": 0.4147727272727273, + "Vietnamese,Chinese,Filipino": 0.3522727272727273, + "Vietnamese,Chinese,Spanish": 0.44886363636363635, + "Vietnamese,Chinese,Malay": 0.4147727272727273, + "Vietnamese,Indonesian,Filipino": 0.3977272727272727, + "Vietnamese,Indonesian,Spanish": 0.42613636363636365, + "Vietnamese,Indonesian,Malay": 0.4772727272727273, + "Vietnamese,Filipino,Spanish": 0.3125, + "Vietnamese,Filipino,Malay": 0.4090909090909091, + "Vietnamese,Spanish,Malay": 0.4318181818181818, + "Chinese,Indonesian,Filipino": 0.3352272727272727, + "Chinese,Indonesian,Spanish": 0.42613636363636365, + "Chinese,Indonesian,Malay": 0.38636363636363635, + "Chinese,Filipino,Spanish": 0.3409090909090909, + "Chinese,Filipino,Malay": 0.32954545454545453, + "Chinese,Spanish,Malay": 0.42613636363636365, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.4090909090909091, + "Indonesian,Spanish,Malay": 0.4375, + "Filipino,Spanish,Malay": 0.32386363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "English,Vietnamese,Chinese,Filipino": 0.2159090909090909, + "English,Vietnamese,Chinese,Spanish": 0.3409090909090909, + "English,Vietnamese,Chinese,Malay": 0.2784090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.24431818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "English,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "English,Vietnamese,Filipino,Spanish": 0.2556818181818182, + "English,Vietnamese,Filipino,Malay": 0.25, + "English,Vietnamese,Spanish,Malay": 0.3352272727272727, + "English,Chinese,Indonesian,Filipino": 0.1875, + "English,Chinese,Indonesian,Spanish": 0.2840909090909091, + "English,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Chinese,Filipino,Spanish": 0.23295454545454544, + "English,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Spanish,Malay": 0.3068181818181818, + "English,Indonesian,Filipino,Spanish": 0.24431818181818182, + "English,Indonesian,Filipino,Malay": 0.24431818181818182, + "English,Indonesian,Spanish,Malay": 0.3352272727272727, + "English,Filipino,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Chinese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish,Malay": 0.3522727272727273, + "Vietnamese,Filipino,Spanish,Malay": 0.26704545454545453, + "Chinese,Indonesian,Filipino,Spanish": 0.2556818181818182, + "Chinese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish,Malay": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.26704545454545453 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Spanish,Malay": 0.26136363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2840909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.2215909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Chinese,Filipino,Spanish,Malay": 0.1875, + "English,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + } + }, + "AC3_2": 0.4549270394150149, + "AC3_3": 0.38135361973448917, + "AC3_4": 0.32502248657772015, + "AC3_5": 0.2811924858331217, + "AC3_6": 0.24701455947120884, + "AC3_7": 0.22000884590389821 + }, + "prompt_2": { + "overall_acc": 0.38555194805194803, + "language_acc": { + "English": 0.4943181818181818, + "Vietnamese": 0.375, + "Chinese": 0.4090909090909091, + "Indonesian": 0.3522727272727273, + "Filipino": 0.32386363636363635, + "Spanish": 0.375, + "Malay": 0.3693181818181818 + }, + "consistency_score_2": 0.5606060606060607, + "consistency_score_3": 0.3891233766233766, + "consistency_score_4": 0.29301948051948046, + "consistency_score_5": 0.2297077922077922, + "consistency_score_6": 0.18506493506493507, + "consistency_score_7": 0.1534090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5340909090909091, + "English,Chinese": 0.5625, + "English,Indonesian": 0.48295454545454547, + "English,Filipino": 0.4034090909090909, + "English,Spanish": 0.6534090909090909, + "English,Malay": 0.5340909090909091, + "Vietnamese,Chinese": 0.5681818181818182, + "Vietnamese,Indonesian": 0.6420454545454546, + "Vietnamese,Filipino": 0.5511363636363636, + "Vietnamese,Spanish": 0.5795454545454546, + "Vietnamese,Malay": 0.6136363636363636, + "Chinese,Indonesian": 0.5340909090909091, + "Chinese,Filipino": 0.4602272727272727, + "Chinese,Spanish": 0.6022727272727273, + "Chinese,Malay": 0.5170454545454546, + "Indonesian,Filipino": 0.6079545454545454, + "Indonesian,Spanish": 0.6136363636363636, + "Indonesian,Malay": 0.6420454545454546, + "Filipino,Spanish": 0.48863636363636365, + "Filipino,Malay": 0.5909090909090909, + "Spanish,Malay": 0.5909090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.38636363636363635, + "English,Vietnamese,Indonesian": 0.38636363636363635, + "English,Vietnamese,Filipino": 0.29545454545454547, + "English,Vietnamese,Spanish": 0.4318181818181818, + "English,Vietnamese,Malay": 0.3977272727272727, + "English,Chinese,Indonesian": 0.3352272727272727, + "English,Chinese,Filipino": 0.2840909090909091, + "English,Chinese,Spanish": 0.4431818181818182, + "English,Chinese,Malay": 0.35795454545454547, + "English,Indonesian,Filipino": 0.3068181818181818, + "English,Indonesian,Spanish": 0.4147727272727273, + "English,Indonesian,Malay": 0.3693181818181818, + "English,Filipino,Spanish": 0.32954545454545453, + "English,Filipino,Malay": 0.3125, + "English,Spanish,Malay": 0.4431818181818182, + "Vietnamese,Chinese,Indonesian": 0.4090909090909091, + "Vietnamese,Chinese,Filipino": 0.3409090909090909, + "Vietnamese,Chinese,Spanish": 0.42613636363636365, + "Vietnamese,Chinese,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.4318181818181818, + "Vietnamese,Indonesian,Spanish": 0.45454545454545453, + "Vietnamese,Indonesian,Malay": 0.48863636363636365, + "Vietnamese,Filipino,Spanish": 0.35795454545454547, + "Vietnamese,Filipino,Malay": 0.42045454545454547, + "Vietnamese,Spanish,Malay": 0.4431818181818182, + "Chinese,Indonesian,Filipino": 0.36363636363636365, + "Chinese,Indonesian,Spanish": 0.42045454545454547, + "Chinese,Indonesian,Malay": 0.39204545454545453, + "Chinese,Filipino,Spanish": 0.3409090909090909, + "Chinese,Filipino,Malay": 0.3409090909090909, + "Chinese,Spanish,Malay": 0.4090909090909091, + "Indonesian,Filipino,Spanish": 0.4090909090909091, + "Indonesian,Filipino,Malay": 0.44886363636363635, + "Indonesian,Spanish,Malay": 0.45454545454545453, + "Filipino,Spanish,Malay": 0.3693181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2840909090909091, + "English,Vietnamese,Chinese,Filipino": 0.2215909090909091, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.25, + "English,Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.3125, + "English,Vietnamese,Filipino,Spanish": 0.2556818181818182, + "English,Vietnamese,Filipino,Malay": 0.2556818181818182, + "English,Vietnamese,Spanish,Malay": 0.3522727272727273, + "English,Chinese,Indonesian,Filipino": 0.22727272727272727, + "English,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Chinese,Filipino,Spanish": 0.24431818181818182, + "English,Chinese,Filipino,Malay": 0.22727272727272727, + "English,Chinese,Spanish,Malay": 0.3125, + "English,Indonesian,Filipino,Spanish": 0.26704545454545453, + "English,Indonesian,Filipino,Malay": 0.2556818181818182, + "English,Indonesian,Spanish,Malay": 0.32954545454545453, + "English,Filipino,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3352272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.2897727272727273, + "Vietnamese,Chinese,Spanish,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.36363636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.3068181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.2897727272727273, + "Chinese,Indonesian,Filipino,Malay": 0.2897727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish,Malay": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Malay": 0.3181818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.26136363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + } + }, + "AC3_2": 0.45688512226434314, + "AC3_3": 0.387329429740663, + "AC3_4": 0.3329766823594303, + "AC3_5": 0.2878923516648209, + "AC3_6": 0.25008775004392253, + "AC3_7": 0.21948589809728267 + }, + "prompt_3": { + "overall_acc": 0.3693181818181818, + "language_acc": { + "English": 0.4715909090909091, + "Vietnamese": 0.3465909090909091, + "Chinese": 0.39204545454545453, + "Indonesian": 0.3522727272727273, + "Filipino": 0.30113636363636365, + "Spanish": 0.375, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.5311147186147185, + "consistency_score_3": 0.3542207792207792, + "consistency_score_4": 0.25860389610389617, + "consistency_score_5": 0.19724025974025977, + "consistency_score_6": 0.15503246753246752, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5113636363636364, + "English,Chinese": 0.5170454545454546, + "English,Indonesian": 0.4943181818181818, + "English,Filipino": 0.39204545454545453, + "English,Spanish": 0.6079545454545454, + "English,Malay": 0.5227272727272727, + "Vietnamese,Chinese": 0.5909090909090909, + "Vietnamese,Indonesian": 0.6136363636363636, + "Vietnamese,Filipino": 0.5170454545454546, + "Vietnamese,Spanish": 0.5511363636363636, + "Vietnamese,Malay": 0.5852272727272727, + "Chinese,Indonesian": 0.5625, + "Chinese,Filipino": 0.42613636363636365, + "Chinese,Spanish": 0.5965909090909091, + "Chinese,Malay": 0.5340909090909091, + "Indonesian,Filipino": 0.5113636363636364, + "Indonesian,Spanish": 0.5852272727272727, + "Indonesian,Malay": 0.5625, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.5340909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.375, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Filipino": 0.2784090909090909, + "English,Vietnamese,Spanish": 0.3806818181818182, + "English,Vietnamese,Malay": 0.375, + "English,Chinese,Indonesian": 0.36363636363636365, + "English,Chinese,Filipino": 0.24431818181818182, + "English,Chinese,Spanish": 0.3977272727272727, + "English,Chinese,Malay": 0.35795454545454547, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.38636363636363635, + "English,Indonesian,Malay": 0.3465909090909091, + "English,Filipino,Spanish": 0.29545454545454547, + "English,Filipino,Malay": 0.2840909090909091, + "English,Spanish,Malay": 0.3806818181818182, + "Vietnamese,Chinese,Indonesian": 0.42045454545454547, + "Vietnamese,Chinese,Filipino": 0.3068181818181818, + "Vietnamese,Chinese,Spanish": 0.4090909090909091, + "Vietnamese,Chinese,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish": 0.42613636363636365, + "Vietnamese,Indonesian,Malay": 0.4375, + "Vietnamese,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Filipino,Malay": 0.3522727272727273, + "Vietnamese,Spanish,Malay": 0.38636363636363635, + "Chinese,Indonesian,Filipino": 0.3068181818181818, + "Chinese,Indonesian,Spanish": 0.4375, + "Chinese,Indonesian,Malay": 0.3806818181818182, + "Chinese,Filipino,Spanish": 0.2897727272727273, + "Chinese,Filipino,Malay": 0.2897727272727273, + "Chinese,Spanish,Malay": 0.38636363636363635, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.38636363636363635, + "Filipino,Spanish,Malay": 0.30113636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Chinese,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.2840909090909091, + "English,Vietnamese,Chinese,Malay": 0.29545454545454547, + "English,Vietnamese,Indonesian,Filipino": 0.22727272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.3068181818181818, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Filipino,Malay": 0.22727272727272727, + "English,Vietnamese,Spanish,Malay": 0.2897727272727273, + "English,Chinese,Indonesian,Filipino": 0.1875, + "English,Chinese,Indonesian,Spanish": 0.30113636363636365, + "English,Chinese,Indonesian,Malay": 0.2727272727272727, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Spanish,Malay": 0.2840909090909091, + "English,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Indonesian,Filipino,Malay": 0.21022727272727273, + "English,Indonesian,Spanish,Malay": 0.2840909090909091, + "English,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.25, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3352272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.25, + "Vietnamese,Chinese,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.29545454545454547, + "Vietnamese,Indonesian,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.25, + "Chinese,Indonesian,Filipino,Spanish": 0.23295454545454544, + "Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.3125, + "Chinese,Filipino,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.1534090909090909, + "English,Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.22727272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + } + }, + "AC3_2": 0.43568004256525067, + "AC3_3": 0.3616119688651094, + "AC3_4": 0.3042005499186504, + "AC3_5": 0.25714704345552414, + "AC3_6": 0.21838938920687287, + "AC3_7": 0.1867816091576166 + }, + "prompt_4": { + "overall_acc": 0.38717532467532473, + "language_acc": { + "English": 0.4772727272727273, + "Vietnamese": 0.3806818181818182, + "Chinese": 0.3977272727272727, + "Indonesian": 0.3806818181818182, + "Filipino": 0.32954545454545453, + "Spanish": 0.3806818181818182, + "Malay": 0.36363636363636365 + }, + "consistency_score_2": 0.5787337662337663, + "consistency_score_3": 0.41168831168831177, + "consistency_score_4": 0.3178571428571429, + "consistency_score_5": 0.2554112554112554, + "consistency_score_6": 0.21022727272727273, + "consistency_score_7": 0.17613636363636365, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5511363636363636, + "English,Chinese": 0.5, + "English,Indonesian": 0.5511363636363636, + "English,Filipino": 0.48863636363636365, + "English,Spanish": 0.6590909090909091, + "English,Malay": 0.48863636363636365, + "Vietnamese,Chinese": 0.5795454545454546, + "Vietnamese,Indonesian": 0.6704545454545454, + "Vietnamese,Filipino": 0.6079545454545454, + "Vietnamese,Spanish": 0.6534090909090909, + "Vietnamese,Malay": 0.5965909090909091, + "Chinese,Indonesian": 0.5795454545454546, + "Chinese,Filipino": 0.5, + "Chinese,Spanish": 0.6136363636363636, + "Chinese,Malay": 0.5340909090909091, + "Indonesian,Filipino": 0.6136363636363636, + "Indonesian,Spanish": 0.6363636363636364, + "Indonesian,Malay": 0.6420454545454546, + "Filipino,Spanish": 0.5625, + "Filipino,Malay": 0.5568181818181818, + "Spanish,Malay": 0.5681818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.35795454545454547, + "English,Vietnamese,Indonesian": 0.42045454545454547, + "English,Vietnamese,Filipino": 0.38636363636363635, + "English,Vietnamese,Spanish": 0.4659090909090909, + "English,Vietnamese,Malay": 0.35795454545454547, + "English,Chinese,Indonesian": 0.35795454545454547, + "English,Chinese,Filipino": 0.30113636363636365, + "English,Chinese,Spanish": 0.4147727272727273, + "English,Chinese,Malay": 0.3125, + "English,Indonesian,Filipino": 0.3806818181818182, + "English,Indonesian,Spanish": 0.45454545454545453, + "English,Indonesian,Malay": 0.3693181818181818, + "English,Filipino,Spanish": 0.42045454545454547, + "English,Filipino,Malay": 0.32386363636363635, + "English,Spanish,Malay": 0.4034090909090909, + "Vietnamese,Chinese,Indonesian": 0.4602272727272727, + "Vietnamese,Chinese,Filipino": 0.38636363636363635, + "Vietnamese,Chinese,Spanish": 0.45454545454545453, + "Vietnamese,Chinese,Malay": 0.3977272727272727, + "Vietnamese,Indonesian,Filipino": 0.48863636363636365, + "Vietnamese,Indonesian,Spanish": 0.5113636363636364, + "Vietnamese,Indonesian,Malay": 0.48863636363636365, + "Vietnamese,Filipino,Spanish": 0.44886363636363635, + "Vietnamese,Filipino,Malay": 0.4147727272727273, + "Vietnamese,Spanish,Malay": 0.4602272727272727, + "Chinese,Indonesian,Filipino": 0.4034090909090909, + "Chinese,Indonesian,Spanish": 0.45454545454545453, + "Chinese,Indonesian,Malay": 0.42045454545454547, + "Chinese,Filipino,Spanish": 0.38636363636363635, + "Chinese,Filipino,Malay": 0.3352272727272727, + "Chinese,Spanish,Malay": 0.4147727272727273, + "Indonesian,Filipino,Spanish": 0.4659090909090909, + "Indonesian,Filipino,Malay": 0.44886363636363635, + "Indonesian,Spanish,Malay": 0.4602272727272727, + "Filipino,Spanish,Malay": 0.3806818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Chinese,Filipino": 0.26704545454545453, + "English,Vietnamese,Chinese,Spanish": 0.3125, + "English,Vietnamese,Chinese,Malay": 0.25, + "English,Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "English,Vietnamese,Indonesian,Spanish": 0.375, + "English,Vietnamese,Indonesian,Malay": 0.3125, + "English,Vietnamese,Filipino,Spanish": 0.3522727272727273, + "English,Vietnamese,Filipino,Malay": 0.2840909090909091, + "English,Vietnamese,Spanish,Malay": 0.3181818181818182, + "English,Chinese,Indonesian,Filipino": 0.26704545454545453, + "English,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Chinese,Filipino,Spanish": 0.2897727272727273, + "English,Chinese,Filipino,Malay": 0.21022727272727273, + "English,Chinese,Spanish,Malay": 0.2840909090909091, + "English,Indonesian,Filipino,Spanish": 0.3522727272727273, + "English,Indonesian,Filipino,Malay": 0.2784090909090909, + "English,Indonesian,Spanish,Malay": 0.32954545454545453, + "English,Filipino,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.38636363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.32954545454545453, + "Vietnamese,Chinese,Filipino,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.3977272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.375, + "Vietnamese,Indonesian,Spanish,Malay": 0.3977272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.3465909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.29545454545454547, + "Chinese,Indonesian,Spanish,Malay": 0.3409090909090909, + "Chinese,Filipino,Spanish,Malay": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Malay": 0.3465909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.24431818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.23295454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.3068181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2840909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.2556818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.2556818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.3125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.25 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1875, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + } + }, + "AC3_2": 0.46395967473096905, + "AC3_3": 0.39905572983284465, + "AC3_4": 0.34910858190502436, + "AC3_5": 0.3077840054203183, + "AC3_6": 0.27249567683185694, + "AC3_7": 0.24212405025830147 + }, + "prompt_5": { + "overall_acc": 0.3823051948051948, + "language_acc": { + "English": 0.5056818181818182, + "Vietnamese": 0.3693181818181818, + "Chinese": 0.38636363636363635, + "Indonesian": 0.32954545454545453, + "Filipino": 0.3181818181818182, + "Spanish": 0.4034090909090909, + "Malay": 0.36363636363636365 + }, + "consistency_score_2": 0.5589826839826839, + "consistency_score_3": 0.3892857142857144, + "consistency_score_4": 0.29318181818181827, + "consistency_score_5": 0.22943722943722944, + "consistency_score_6": 0.18344155844155846, + "consistency_score_7": 0.14772727272727273, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5454545454545454, + "English,Chinese": 0.5454545454545454, + "English,Indonesian": 0.5454545454545454, + "English,Filipino": 0.4431818181818182, + "English,Spanish": 0.6875, + "English,Malay": 0.4943181818181818, + "Vietnamese,Chinese": 0.5795454545454546, + "Vietnamese,Indonesian": 0.6704545454545454, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,Spanish": 0.5852272727272727, + "Vietnamese,Malay": 0.6363636363636364, + "Chinese,Indonesian": 0.5227272727272727, + "Chinese,Filipino": 0.45454545454545453, + "Chinese,Spanish": 0.5738636363636364, + "Chinese,Malay": 0.5170454545454546, + "Indonesian,Filipino": 0.5625, + "Indonesian,Spanish": 0.5909090909090909, + "Indonesian,Malay": 0.6477272727272727, + "Filipino,Spanish": 0.4715909090909091, + "Filipino,Malay": 0.5454545454545454, + "Spanish,Malay": 0.625 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.38636363636363635, + "English,Vietnamese,Indonesian": 0.42613636363636365, + "English,Vietnamese,Filipino": 0.3125, + "English,Vietnamese,Spanish": 0.4375, + "English,Vietnamese,Malay": 0.375, + "English,Chinese,Indonesian": 0.3522727272727273, + "English,Chinese,Filipino": 0.29545454545454547, + "English,Chinese,Spanish": 0.4431818181818182, + "English,Chinese,Malay": 0.3352272727272727, + "English,Indonesian,Filipino": 0.3522727272727273, + "English,Indonesian,Spanish": 0.45454545454545453, + "English,Indonesian,Malay": 0.3806818181818182, + "English,Filipino,Spanish": 0.3693181818181818, + "English,Filipino,Malay": 0.3125, + "English,Spanish,Malay": 0.44886363636363635, + "Vietnamese,Chinese,Indonesian": 0.42045454545454547, + "Vietnamese,Chinese,Filipino": 0.32954545454545453, + "Vietnamese,Chinese,Spanish": 0.4090909090909091, + "Vietnamese,Chinese,Malay": 0.3977272727272727, + "Vietnamese,Indonesian,Filipino": 0.42045454545454547, + "Vietnamese,Indonesian,Spanish": 0.4659090909090909, + "Vietnamese,Indonesian,Malay": 0.5056818181818182, + "Vietnamese,Filipino,Spanish": 0.3352272727272727, + "Vietnamese,Filipino,Malay": 0.3806818181818182, + "Vietnamese,Spanish,Malay": 0.4715909090909091, + "Chinese,Indonesian,Filipino": 0.3352272727272727, + "Chinese,Indonesian,Spanish": 0.3977272727272727, + "Chinese,Indonesian,Malay": 0.38636363636363635, + "Chinese,Filipino,Spanish": 0.3181818181818182, + "Chinese,Filipino,Malay": 0.30113636363636365, + "Chinese,Spanish,Malay": 0.42613636363636365, + "Indonesian,Filipino,Spanish": 0.38636363636363635, + "Indonesian,Filipino,Malay": 0.42613636363636365, + "Indonesian,Spanish,Malay": 0.4659090909090909, + "Filipino,Spanish,Malay": 0.36363636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Chinese,Filipino": 0.23295454545454544, + "English,Vietnamese,Chinese,Spanish": 0.3125, + "English,Vietnamese,Chinese,Malay": 0.26704545454545453, + "English,Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.3693181818181818, + "English,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "English,Vietnamese,Filipino,Spanish": 0.2727272727272727, + "English,Vietnamese,Filipino,Malay": 0.25, + "English,Vietnamese,Spanish,Malay": 0.3465909090909091, + "English,Chinese,Indonesian,Filipino": 0.25, + "English,Chinese,Indonesian,Spanish": 0.3068181818181818, + "English,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Chinese,Filipino,Spanish": 0.2556818181818182, + "English,Chinese,Filipino,Malay": 0.2215909090909091, + "English,Chinese,Spanish,Malay": 0.3181818181818182, + "English,Indonesian,Filipino,Spanish": 0.3125, + "English,Indonesian,Filipino,Malay": 0.2727272727272727, + "English,Indonesian,Spanish,Malay": 0.3522727272727273, + "English,Filipino,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.32954545454545453, + "Vietnamese,Chinese,Filipino,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.30113636363636365, + "Vietnamese,Indonesian,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish,Malay": 0.39204545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.25, + "Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish,Malay": 0.25, + "Indonesian,Filipino,Spanish,Malay": 0.3125 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.25, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.30113636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.22727272727272727, + "English,Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + } + }, + "AC3_2": 0.4540629677399377, + "AC3_3": 0.3857638784027107, + "AC3_4": 0.33186406234845844, + "AC3_5": 0.2867711677503847, + "AC3_6": 0.24792245053286088, + "AC3_7": 0.21310733672716092 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5533980582524272 + }, + "prompt_2": { + "accuracy": 0.47572815533980584 + }, + "prompt_3": { + "accuracy": 0.5145631067961165 + }, + "prompt_4": { + "accuracy": 0.5728155339805825 + }, + "prompt_5": { + "accuracy": 0.5339805825242718 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3047619047619048 + }, + "prompt_2": { + "accuracy": 0.2761904761904762 + }, + "prompt_3": { + "accuracy": 0.34285714285714286 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.34285714285714286 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5233644859813084 + }, + "prompt_2": { + "accuracy": 0.5607476635514018 + }, + "prompt_3": { + "accuracy": 0.5700934579439252 + }, + "prompt_4": { + "accuracy": 0.5981308411214953 + }, + "prompt_5": { + "accuracy": 0.5607476635514018 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.3, + "history": 0.4, + "literature": 0.4, + "politics": 0.7, + "culture": 0.5, + "film": 0.3, + "law": 0.4, + "geography": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.48, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.4, + "literature": 0.4, + "politics": 0.8, + "culture": 0.6, + "film": 0.6, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.49, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.3, + "history": 0.4666666666666667, + "literature": 0.5, + "politics": 0.6, + "culture": 0.6, + "film": 0.6, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.5, + "category_acc": { + "brand": 0.3, + "demographics": 0.6, + "biology": 0.4, + "history": 0.4666666666666667, + "literature": 0.4, + "politics": 0.5, + "culture": 0.7, + "film": 0.6, + "law": 0.4, + "geography": 0.7 + } + }, + "prompt_5": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.3, + "demographics": 0.6, + "biology": 0.4, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.4, + "culture": 0.6, + "film": 0.5, + "law": 0.2, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.14384188614480142 + }, + "prompt_2": { + "bleu_score": 0.13023407956897848 + }, + "prompt_3": { + "bleu_score": 0.14053647424531324 + }, + "prompt_4": { + "bleu_score": 0.13223343121920528 + }, + "prompt_5": { + "bleu_score": 0.11666892339340351 + } }, "indommlu": { "prompt_1": -1, @@ -4792,179 +43927,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10449146263796878 + }, + "prompt_2": { + "bleu_score": 0.20252699539737112 + }, + "prompt_3": { + "bleu_score": 0.1964003972077753 + }, + "prompt_4": { + "bleu_score": 0.19338662100946438 + }, + "prompt_5": { + "bleu_score": 0.14488407475413814 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07782170351710256 + }, + "prompt_2": { + "bleu_score": 0.15503112161301977 + }, + "prompt_3": { + "bleu_score": 0.15305084363626856 + }, + "prompt_4": { + "bleu_score": 0.14795894961550218 + }, + "prompt_5": { + "bleu_score": 0.08886146444097517 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07663924015837147 + }, + "prompt_2": { + "bleu_score": 0.12822346010314756 + }, + "prompt_3": { + "bleu_score": 0.12480938760963785 + }, + "prompt_4": { + "bleu_score": 0.12140739421448338 + }, + "prompt_5": { + "bleu_score": 0.12238904544905589 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09376537955104575 + }, + "prompt_2": { + "bleu_score": 0.19125055780210543 + }, + "prompt_3": { + "bleu_score": 0.18261803411364355 + }, + "prompt_4": { + "bleu_score": 0.17388523951413465 + }, + "prompt_5": { + "bleu_score": 0.1252001309706885 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5099183197199533 + }, + "prompt_2": { + "accuracy": 0.5192532088681447 + }, + "prompt_3": { + "accuracy": 0.5040840140023337 + }, + "prompt_4": { + "accuracy": 0.48891481913652274 + }, + "prompt_5": { + "accuracy": 0.485414235705951 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49567393636038615, + "category_acc": { + "high_school_european_history": 0.6280487804878049, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.5606060606060606, + "medical_genetics": 0.5454545454545454, + "high_school_us_history": 0.6650246305418719, + "high_school_physics": 0.34, + "high_school_world_history": 0.690677966101695, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.5021097046413502, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.5612903225806452, + "professional_medicine": 0.4833948339483395, + "nutrition": 0.5311475409836065, + "global_facts": 0.37373737373737376, + "machine_learning": 0.26126126126126126, + "security_studies": 0.5327868852459017, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.5171849427168577, + "prehistory": 0.56656346749226, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.4461538461538462, + "college_medicine": 0.4476744186046512, + "high_school_government_and_politics": 0.7291666666666666, + "college_chemistry": 0.2727272727272727, + "logical_fallacies": 0.5925925925925926, + "high_school_geography": 0.6345177664974619, + "elementary_mathematics": 0.4509283819628647, + "human_aging": 0.5540540540540541, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.6507352941176471, + "formal_logic": 0.288, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.675, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.5151515151515151, + "conceptual_physics": 0.4188034188034188, + "miscellaneous": 0.7161125319693095, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.7124463519313304, + "professional_law": 0.38486627527723416, + "management": 0.6568627450980392, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.6822429906542056, + "world_religions": 0.7, + "sociology": 0.665, + "us_foreign_policy": 0.696969696969697, + "high_school_macroeconomics": 0.45758354755784064, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.5797101449275363, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5562913907284768, + "college_biology": 0.4965034965034965 + } + }, + "prompt_2": { + "accuracy": 0.4948158741508759, + "category_acc": { + "high_school_european_history": 0.6280487804878049, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.5189393939393939, + "medical_genetics": 0.6060606060606061, + "high_school_us_history": 0.6798029556650246, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.6610169491525424, + "virology": 0.4, + "high_school_microeconomics": 0.5021097046413502, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.5922330097087378, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.603225806451613, + "professional_medicine": 0.5055350553505535, + "nutrition": 0.5409836065573771, + "global_facts": 0.3333333333333333, + "machine_learning": 0.2702702702702703, + "security_studies": 0.5040983606557377, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.513911620294599, + "prehistory": 0.544891640866873, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.4153846153846154, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.6979166666666666, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.5888324873096447, + "elementary_mathematics": 0.4005305039787798, + "human_aging": 0.545045045045045, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.6691176470588235, + "formal_logic": 0.304, + "high_school_statistics": 0.4046511627906977, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.5151515151515151, + "conceptual_physics": 0.37606837606837606, + "miscellaneous": 0.7327365728900256, + "high_school_chemistry": 0.40594059405940597, + "marketing": 0.759656652360515, + "professional_law": 0.3913894324853229, + "management": 0.6470588235294118, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.6822429906542056, + "world_religions": 0.7058823529411765, + "sociology": 0.61, + "us_foreign_policy": 0.7272727272727273, + "high_school_macroeconomics": 0.442159383033419, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.2550335570469799, + "moral_disputes": 0.5507246376811594, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.5231788079470199, + "college_biology": 0.5104895104895105 + } + }, + "prompt_3": { + "accuracy": 0.49667500893814803, + "category_acc": { + "high_school_european_history": 0.6280487804878049, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.5189393939393939, + "medical_genetics": 0.5454545454545454, + "high_school_us_history": 0.6945812807881774, + "high_school_physics": 0.34, + "high_school_world_history": 0.6991525423728814, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.48945147679324896, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.47474747474747475, + "high_school_biology": 0.56957928802589, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.4128113879003559, + "philosophy": 0.603225806451613, + "professional_medicine": 0.4907749077490775, + "nutrition": 0.5278688524590164, + "global_facts": 0.3434343434343434, + "machine_learning": 0.3153153153153153, + "security_studies": 0.5245901639344263, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.5237315875613748, + "prehistory": 0.5789473684210527, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.42441860465116277, + "high_school_government_and_politics": 0.71875, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.4005305039787798, + "human_aging": 0.5315315315315315, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.6580882352941176, + "formal_logic": 0.328, + "high_school_statistics": 0.34418604651162793, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.405982905982906, + "miscellaneous": 0.7071611253196931, + "high_school_chemistry": 0.3712871287128713, + "marketing": 0.7381974248927039, + "professional_law": 0.39204174820613175, + "management": 0.6862745098039216, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.7009345794392523, + "world_religions": 0.7352941176470589, + "sociology": 0.64, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.4473007712082262, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.5652173913043478, + "electrical_engineering": 0.4375, + "astronomy": 0.4966887417218543, + "college_biology": 0.5034965034965035 + } + }, + "prompt_4": { + "accuracy": 0.47808366106542727, + "category_acc": { + "high_school_european_history": 0.6097560975609756, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5416666666666666, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.6650246305418719, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.652542372881356, + "virology": 0.45454545454545453, + "high_school_microeconomics": 0.4472573839662447, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.540453074433657, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.5451612903225806, + "professional_medicine": 0.46494464944649444, + "nutrition": 0.5344262295081967, + "global_facts": 0.3939393939393939, + "machine_learning": 0.2882882882882883, + "security_studies": 0.5245901639344263, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.46481178396072015, + "prehistory": 0.5325077399380805, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.4076923076923077, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.671875, + "college_chemistry": 0.2828282828282828, + "logical_fallacies": 0.5987654320987654, + "high_school_geography": 0.6091370558375635, + "elementary_mathematics": 0.44562334217506633, + "human_aging": 0.5765765765765766, + "college_mathematics": 0.1919191919191919, + "high_school_psychology": 0.6323529411764706, + "formal_logic": 0.28, + "high_school_statistics": 0.35348837209302325, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.6867007672634271, + "high_school_chemistry": 0.3564356435643564, + "marketing": 0.7124463519313304, + "professional_law": 0.36725375081539463, + "management": 0.5980392156862745, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.6882352941176471, + "sociology": 0.615, + "us_foreign_policy": 0.7373737373737373, + "high_school_macroeconomics": 0.41131105398457585, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.28187919463087246, + "moral_disputes": 0.5391304347826087, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.4900662251655629, + "college_biology": 0.5034965034965035 + } + }, + "prompt_5": { + "accuracy": 0.4636396138720057, + "category_acc": { + "high_school_european_history": 0.6036585365853658, + "business_ethics": 0.4444444444444444, + "clinical_knowledge": 0.5075757575757576, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.6748768472906403, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.6567796610169492, + "virology": 0.42424242424242425, + "high_school_microeconomics": 0.4430379746835443, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.540453074433657, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.33451957295373663, + "philosophy": 0.5451612903225806, + "professional_medicine": 0.47601476014760147, + "nutrition": 0.4918032786885246, + "global_facts": 0.35353535353535354, + "machine_learning": 0.26126126126126126, + "security_studies": 0.5409836065573771, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.4664484451718494, + "prehistory": 0.5356037151702786, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.47692307692307695, + "college_medicine": 0.4069767441860465, + "high_school_government_and_politics": 0.6354166666666666, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.5555555555555556, + "high_school_geography": 0.5634517766497462, + "elementary_mathematics": 0.4403183023872679, + "human_aging": 0.5, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.5790441176470589, + "formal_logic": 0.296, + "high_school_statistics": 0.32558139534883723, + "international_law": 0.5333333333333333, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.6649616368286445, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.6995708154506438, + "professional_law": 0.37116764514024786, + "management": 0.6078431372549019, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.6882352941176471, + "sociology": 0.565, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.39845758354755784, + "computer_security": 0.47474747474747475, + "moral_scenarios": 0.2505592841163311, + "moral_disputes": 0.5362318840579711, + "electrical_engineering": 0.5138888888888888, + "astronomy": 0.5629139072847682, + "college_biology": 0.48951048951048953 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35066864784546803 + }, + "prompt_2": { + "accuracy": 0.3588410104011887 + }, + "prompt_3": { + "accuracy": 0.3447251114413076 + }, + "prompt_4": { + "accuracy": 0.3447251114413076 + }, + "prompt_5": { + "accuracy": 0.36181277860326894 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33623910336239105, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.25, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.3333333333333333, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.3, + "business_administration": 0.3684210526315789, + "marxism": 0.375, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.4444444444444444, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.52, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.38461538461538464, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.2777777777777778, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.2962962962962963, + "physician": 0.2962962962962963 + } + }, + "prompt_2": { + "accuracy": 0.32938978829389787, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.375, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.40476190476190477, + "college_physics": 0.25, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.125, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.5, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.4489795918367347, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.37037037037037035, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.375, + "high_school_history": 0.52, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.34615384615384615, + "sports_science": 0.2916666666666667, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.3333333333333333, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.3333333333333333, + "physician": 0.18518518518518517 + } + }, + "prompt_3": { + "accuracy": 0.3368617683686177, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.25, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.3333333333333333, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.4166666666666667, + "business_administration": 0.5, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.4074074074074074, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.08333333333333333, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.34615384615384615, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.25925925925925924, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.3148148148148148, + "physician": 0.25925925925925924 + } + }, + "prompt_4": { + "accuracy": 0.32689912826899126, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.30952380952380953, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.125, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.4473684210526316, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.3877551020408163, + "high_school_politics": 0.5, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.5714285714285714, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.37037037037037035, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.4230769230769231, + "sports_science": 0.375, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.25925925925925924, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.25925925925925924, + "physician": 0.3148148148148148 + } + }, + "prompt_5": { + "accuracy": 0.34184308841843086, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.38095238095238093, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.4, + "business_administration": 0.39473684210526316, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.375, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.4444444444444444, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.4230769230769231, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.25925925925925924, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.3148148148148148, + "physician": 0.24074074074074073 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35842293906810035 + }, + "prompt_2": { + "accuracy": 0.34767025089605735 + }, + "prompt_3": { + "accuracy": 0.3727598566308244 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.36200716845878134 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.341305474011397, + "category_acc": { + "agronomy": 0.3609467455621302, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2682926829268293, + "arts": 0.31875, + "astronomy": 0.296969696969697, + "business_ethics": 0.35406698564593303, + "chinese_civil_service_exam": 0.3125, + "chinese_driving_rule": 0.44274809160305345, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.4205607476635514, + "chinese_history": 0.3931888544891641, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.41899441340782123, + "clinical_knowledge": 0.21940928270042195, + "college_actuarial_science": 0.18867924528301888, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.44339622641509435, + "college_law": 0.23148148148148148, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.32234432234432236, + "computer_science": 0.3480392156862745, + "computer_security": 0.28654970760233917, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.33093525179856115, + "economics": 0.3836477987421384, + "education": 0.3374233128834356, + "electrical_engineering": 0.38953488372093026, + "elementary_chinese": 0.30952380952380953, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.47478991596638653, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.3111111111111111, + "food_science": 0.4125874125874126, + "genetics": 0.30113636363636365, + "global_facts": 0.3691275167785235, + "high_school_biology": 0.28994082840236685, + "high_school_chemistry": 0.3333333333333333, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.4195804195804196, + "human_sexuality": 0.2619047619047619, + "international_law": 0.33513513513513515, + "journalism": 0.37790697674418605, + "jurisprudence": 0.34306569343065696, + "legal_and_moral_basis": 0.5700934579439252, + "logical": 0.37398373983739835, + "machine_learning": 0.2540983606557377, + "management": 0.3619047619047619, + "marketing": 0.4388888888888889, + "marxist_theory": 0.37037037037037035, + "modern_chinese": 0.25, + "nutrition": 0.30344827586206896, + "philosophy": 0.4, + "professional_accounting": 0.36, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.2765957446808511, + "professional_psychology": 0.375, + "public_relations": 0.39080459770114945, + "security_study": 0.28888888888888886, + "sociology": 0.36283185840707965, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.2918918918918919, + "virology": 0.33727810650887574, + "world_history": 0.422360248447205, + "world_religions": 0.39375 + } + }, + "prompt_2": { + "accuracy": 0.3435503367294077, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2682926829268293, + "arts": 0.38125, + "astronomy": 0.2545454545454545, + "business_ethics": 0.3875598086124402, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.40458015267175573, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.411214953271028, + "chinese_history": 0.43034055727554177, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.4022346368715084, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.2222222222222222, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.31868131868131866, + "computer_science": 0.37745098039215685, + "computer_security": 0.38596491228070173, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.33093525179856115, + "economics": 0.3522012578616352, + "education": 0.3558282208588957, + "electrical_engineering": 0.4186046511627907, + "elementary_chinese": 0.32936507936507936, + "elementary_commonsense": 0.32323232323232326, + "elementary_information_and_technology": 0.4369747899159664, + "elementary_mathematics": 0.36086956521739133, + "ethnology": 0.34074074074074073, + "food_science": 0.40559440559440557, + "genetics": 0.2840909090909091, + "global_facts": 0.31543624161073824, + "high_school_biology": 0.28994082840236685, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.43356643356643354, + "human_sexuality": 0.2857142857142857, + "international_law": 0.3081081081081081, + "journalism": 0.36046511627906974, + "jurisprudence": 0.35279805352798055, + "legal_and_moral_basis": 0.5607476635514018, + "logical": 0.37398373983739835, + "machine_learning": 0.26229508196721313, + "management": 0.34285714285714286, + "marketing": 0.4388888888888889, + "marxist_theory": 0.42328042328042326, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.35172413793103446, + "philosophy": 0.38095238095238093, + "professional_accounting": 0.32571428571428573, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.2898936170212766, + "professional_psychology": 0.33620689655172414, + "public_relations": 0.3735632183908046, + "security_study": 0.32592592592592595, + "sociology": 0.3893805309734513, + "sports_science": 0.3939393939393939, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.28994082840236685, + "world_history": 0.422360248447205, + "world_religions": 0.375 + } + }, + "prompt_3": { + "accuracy": 0.3393196339146952, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.18902439024390244, + "arts": 0.33125, + "astronomy": 0.2606060606060606, + "business_ethics": 0.36363636363636365, + "chinese_civil_service_exam": 0.3125, + "chinese_driving_rule": 0.3893129770992366, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.3925233644859813, + "chinese_history": 0.4148606811145511, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.39106145251396646, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.44339622641509435, + "college_law": 0.25925925925925924, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.30036630036630035, + "computer_science": 0.37254901960784315, + "computer_security": 0.2982456140350877, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3669064748201439, + "economics": 0.37735849056603776, + "education": 0.3374233128834356, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.3333333333333333, + "elementary_information_and_technology": 0.5, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.3333333333333333, + "food_science": 0.35664335664335667, + "genetics": 0.32386363636363635, + "global_facts": 0.3288590604026846, + "high_school_biology": 0.23668639053254437, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.3, + "high_school_politics": 0.42657342657342656, + "human_sexuality": 0.2777777777777778, + "international_law": 0.35135135135135137, + "journalism": 0.3546511627906977, + "jurisprudence": 0.35279805352798055, + "legal_and_moral_basis": 0.5934579439252337, + "logical": 0.42276422764227645, + "machine_learning": 0.29508196721311475, + "management": 0.3476190476190476, + "marketing": 0.3888888888888889, + "marxist_theory": 0.37037037037037035, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.3448275862068966, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.32, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.3706896551724138, + "public_relations": 0.3735632183908046, + "security_study": 0.32592592592592595, + "sociology": 0.42035398230088494, + "sports_science": 0.3515151515151515, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.3431952662721893, + "world_history": 0.42857142857142855, + "world_religions": 0.36875 + } + }, + "prompt_4": { + "accuracy": 0.319806596442756, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.22560975609756098, + "arts": 0.3125, + "astronomy": 0.26666666666666666, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.3, + "chinese_driving_rule": 0.3511450381679389, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.39009287925696595, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.40782122905027934, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.36792452830188677, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.21296296296296297, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.3076923076923077, + "computer_science": 0.3382352941176471, + "computer_security": 0.2982456140350877, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.30935251798561153, + "economics": 0.3018867924528302, + "education": 0.3374233128834356, + "electrical_engineering": 0.3953488372093023, + "elementary_chinese": 0.29365079365079366, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.4369747899159664, + "elementary_mathematics": 0.24782608695652175, + "ethnology": 0.26666666666666666, + "food_science": 0.3916083916083916, + "genetics": 0.2727272727272727, + "global_facts": 0.3221476510067114, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.43356643356643354, + "human_sexuality": 0.2777777777777778, + "international_law": 0.31351351351351353, + "journalism": 0.38953488372093026, + "jurisprudence": 0.34306569343065696, + "legal_and_moral_basis": 0.49065420560747663, + "logical": 0.35772357723577236, + "machine_learning": 0.26229508196721313, + "management": 0.28095238095238095, + "marketing": 0.3388888888888889, + "marxist_theory": 0.36507936507936506, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.30344827586206896, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.29714285714285715, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.23404255319148937, + "professional_psychology": 0.3620689655172414, + "public_relations": 0.3850574712643678, + "security_study": 0.2740740740740741, + "sociology": 0.3407079646017699, + "sports_science": 0.3696969696969697, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.28402366863905326, + "world_history": 0.4161490683229814, + "world_religions": 0.375 + } + }, + "prompt_5": { + "accuracy": 0.33388015886720773, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.21951219512195122, + "arts": 0.34375, + "astronomy": 0.3090909090909091, + "business_ethics": 0.3397129186602871, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.3893129770992366, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.35514018691588783, + "chinese_history": 0.43653250773993807, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.3854748603351955, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.3644859813084112, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.25, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.304029304029304, + "computer_science": 0.3431372549019608, + "computer_security": 0.3333333333333333, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.3381294964028777, + "economics": 0.389937106918239, + "education": 0.34355828220858897, + "electrical_engineering": 0.4476744186046512, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.25252525252525254, + "elementary_information_and_technology": 0.4579831932773109, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.3333333333333333, + "food_science": 0.3986013986013986, + "genetics": 0.23863636363636365, + "global_facts": 0.3288590604026846, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2909090909090909, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2918918918918919, + "journalism": 0.36627906976744184, + "jurisprudence": 0.36253041362530414, + "legal_and_moral_basis": 0.5046728971962616, + "logical": 0.36585365853658536, + "machine_learning": 0.2459016393442623, + "management": 0.3476190476190476, + "marketing": 0.37222222222222223, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.35172413793103446, + "philosophy": 0.3619047619047619, + "professional_accounting": 0.3314285714285714, + "professional_law": 0.32701421800947866, + "professional_medicine": 0.23670212765957446, + "professional_psychology": 0.3879310344827586, + "public_relations": 0.367816091954023, + "security_study": 0.37037037037037035, + "sociology": 0.35398230088495575, + "sports_science": 0.3151515151515151, + "traditional_chinese_medicine": 0.3027027027027027, + "virology": 0.31952662721893493, + "world_history": 0.40993788819875776, + "world_religions": 0.3875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.21212121212121213 + }, + "prompt_4": { + "accuracy": 0.12121212121212122 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4772727272727273 + }, + "prompt_2": { + "accuracy": 0.4636363636363636 + }, + "prompt_3": { + "accuracy": 0.4636363636363636 + }, + "prompt_4": { + "accuracy": 0.42954545454545456 + }, + "prompt_5": { + "accuracy": 0.42727272727272725 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34067796610169493 + }, + "prompt_2": { + "accuracy": 0.3213559322033898 + }, + "prompt_3": { + "accuracy": 0.3271186440677966 + }, + "prompt_4": { + "accuracy": 0.34576271186440677 + }, + "prompt_5": { + "accuracy": 0.3206779661016949 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6634255796559462 + }, + "prompt_2": { + "accuracy": 0.6615557217651459 + }, + "prompt_3": { + "accuracy": 0.6664173522812267 + }, + "prompt_4": { + "accuracy": 0.6555721765145849 + }, + "prompt_5": { + "accuracy": 0.6709050112191474 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7907888290053895 + }, + "prompt_2": { + "accuracy": 0.8049975502204801 + }, + "prompt_3": { + "accuracy": 0.8152866242038217 + }, + "prompt_4": { + "accuracy": 0.7824595786379226 + }, + "prompt_5": { + "accuracy": 0.8098971092601666 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3142225472032497, + "rouge2": 0.11931945357896737, + "rougeL": 0.23495074378683636, + "avg_rouge": 0.22283091485635112 + }, + "prompt_2": { + "rouge1": 0.334181002201965, + "rouge2": 0.12872693812074404, + "rougeL": 0.2501810919937621, + "avg_rouge": 0.23769634410549037 + }, + "prompt_3": { + "rouge1": 0.3070077616497732, + "rouge2": 0.11362981897323761, + "rougeL": 0.22562971634990703, + "avg_rouge": 0.21542243232430594 + }, + "prompt_4": { + "rouge1": 0.32606206971249524, + "rouge2": 0.12283124057473786, + "rougeL": 0.24322694186413493, + "avg_rouge": 0.23070675071712268 + }, + "prompt_5": { + "rouge1": 0.3273332783872484, + "rouge2": 0.11649839900976232, + "rougeL": 0.24184305444647933, + "avg_rouge": 0.22855824394783 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.22075129247117517, + "rouge2": 0.05586087557181527, + "rougeL": 0.1549829996836681, + "avg_rouge": 0.14386505590888618 + }, + "prompt_2": { + "rouge1": 0.22162805995543938, + "rouge2": 0.055115751915289515, + "rougeL": 0.15640378057283055, + "avg_rouge": 0.14438253081451982 + }, + "prompt_3": { + "rouge1": 0.21848103730782187, + "rouge2": 0.05553319185414756, + "rougeL": 0.15435600938320915, + "avg_rouge": 0.14279007951505954 + }, + "prompt_4": { + "rouge1": 0.22359275034455478, + "rouge2": 0.056910176834291434, + "rougeL": 0.157249203505163, + "avg_rouge": 0.14591737689466974 + }, + "prompt_5": { + "rouge1": 0.21889840322599738, + "rouge2": 0.05198089795347924, + "rougeL": 0.15326555739784622, + "avg_rouge": 0.1413816195257743 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8509174311926605 + }, + "prompt_2": { + "accuracy": 0.823394495412844 + }, + "prompt_3": { + "accuracy": 0.8142201834862385 + }, + "prompt_4": { + "accuracy": 0.7993119266055045 + }, + "prompt_5": { + "accuracy": 0.893348623853211 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5877277085330777 + }, + "prompt_2": { + "accuracy": 0.5627996164908916 + }, + "prompt_3": { + "accuracy": 0.6836049856184084 + }, + "prompt_4": { + "accuracy": 0.6989453499520614 + }, + "prompt_5": { + "accuracy": 0.5100671140939598 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.661 + }, + "prompt_2": { + "accuracy": 0.604 + }, + "prompt_3": { + "accuracy": 0.5465 + }, + "prompt_4": { + "accuracy": 0.6185 + }, + "prompt_5": { + "accuracy": 0.603 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5175 + }, + "prompt_2": { + "accuracy": 0.5135 + }, + "prompt_3": { + "accuracy": 0.486 + }, + "prompt_4": { + "accuracy": 0.4925 + }, + "prompt_5": { + "accuracy": 0.4915 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.77 + }, + "prompt_2": { + "accuracy": 0.7245 + }, + "prompt_3": { + "accuracy": 0.753 + }, + "prompt_4": { + "accuracy": 0.615 + }, + "prompt_5": { + "accuracy": 0.8205 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6056338028169014 + }, + "prompt_2": { + "accuracy": 0.6197183098591549 + }, + "prompt_3": { + "accuracy": 0.5070422535211268 + }, + "prompt_4": { + "accuracy": 0.4507042253521127 + }, + "prompt_5": { + "accuracy": 0.5211267605633803 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6931407942238267 + }, + "prompt_2": { + "accuracy": 0.5956678700361011 + }, + "prompt_3": { + "accuracy": 0.6173285198555957 + }, + "prompt_4": { + "accuracy": 0.592057761732852 + }, + "prompt_5": { + "accuracy": 0.4981949458483754 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5392156862745098 + }, + "prompt_2": { + "accuracy": 0.46568627450980393 + }, + "prompt_3": { + "accuracy": 0.5612745098039216 + }, + "prompt_4": { + "accuracy": 0.4681372549019608 + }, + "prompt_5": { + "accuracy": 0.4632352941176471 + } } }, "five_shot": { @@ -5074,53 +45399,1733 @@ "model_link": "https://huggingface.co/THUDM/chatglm-6b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3819047619047619, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.5066666666666667, + "Vietnamese": 0.4, + "Spanish": 0.37333333333333335, + "Indonesian": 0.2866666666666667, + "Filipino": 0.3333333333333333, + "Chinese": 0.44666666666666666 + }, + "consistency_score_2": 0.4441269841269842, + "consistency_score_3": 0.24914285714285717, + "consistency_score_4": 0.15638095238095234, + "consistency_score_5": 0.10603174603174607, + "consistency_score_6": 0.07714285714285715, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.43333333333333335, + "Malay,Vietnamese": 0.5, + "Malay,Spanish": 0.42, + "Malay,Indonesian": 0.64, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.38666666666666666, + "English,Spanish": 0.4866666666666667, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.41333333333333333, + "English,Chinese": 0.5266666666666666, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Filipino": 0.3933333333333333, + "Vietnamese,Chinese": 0.38666666666666666, + "Spanish,Indonesian": 0.4533333333333333, + "Spanish,Filipino": 0.49333333333333335, + "Spanish,Chinese": 0.4533333333333333, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.4, + "Filipino,Chinese": 0.34 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.25333333333333335, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.32, + "Malay,English,Filipino": 0.22666666666666666, + "Malay,English,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish": 0.24666666666666667, + "Malay,Vietnamese,Indonesian": 0.38, + "Malay,Vietnamese,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian": 0.3333333333333333, + "Malay,Spanish,Filipino": 0.26, + "Malay,Spanish,Chinese": 0.22, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.26, + "Malay,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Indonesian": 0.24666666666666667, + "English,Vietnamese,Filipino": 0.18666666666666668, + "English,Vietnamese,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian": 0.25333333333333335, + "English,Spanish,Filipino": 0.26666666666666666, + "English,Spanish,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino": 0.24666666666666667, + "English,Indonesian,Chinese": 0.25333333333333335, + "English,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino": 0.26, + "Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino": 0.2866666666666667, + "Spanish,Indonesian,Chinese": 0.24, + "Spanish,Filipino,Chinese": 0.22, + "Indonesian,Filipino,Chinese": 0.2 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian": 0.18, + "Malay,English,Vietnamese,Filipino": 0.14, + "Malay,English,Vietnamese,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Spanish,Filipino": 0.16, + "Malay,English,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino": 0.2, + "Malay,English,Indonesian,Chinese": 0.18, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.12, + "Malay,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.4106723765104389, + "AC3_3": 0.30155836311033474, + "AC3_4": 0.2218993697562356, + "AC3_5": 0.1659807292778771, + "AC3_6": 0.1283580319815225, + "AC3_7": 0.10370689652825597 + }, + "prompt_2": { + "overall_acc": 0.36952380952380953, + "language_acc": { + "Malay": 0.3, + "English": 0.46, + "Vietnamese": 0.3466666666666667, + "Spanish": 0.41333333333333333, + "Indonesian": 0.28, + "Filipino": 0.32666666666666666, + "Chinese": 0.46 + }, + "consistency_score_2": 0.43587301587301575, + "consistency_score_3": 0.2340952380952381, + "consistency_score_4": 0.13980952380952383, + "consistency_score_5": 0.08888888888888886, + "consistency_score_6": 0.060952380952380945, + "consistency_score_7": 0.04666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.32, + "Malay,Vietnamese": 0.5, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.54, + "Malay,Filipino": 0.44, + "Malay,Chinese": 0.34, + "English,Vietnamese": 0.29333333333333333, + "English,Spanish": 0.52, + "English,Indonesian": 0.3933333333333333, + "English,Filipino": 0.41333333333333333, + "English,Chinese": 0.52, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Filipino": 0.42, + "Vietnamese,Chinese": 0.32666666666666666, + "Spanish,Indonesian": 0.4533333333333333, + "Spanish,Filipino": 0.5066666666666667, + "Spanish,Chinese": 0.47333333333333333, + "Indonesian,Filipino": 0.54, + "Indonesian,Chinese": 0.37333333333333335, + "Filipino,Chinese": 0.36 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.17333333333333334, + "Malay,English,Spanish": 0.22666666666666666, + "Malay,English,Indonesian": 0.21333333333333335, + "Malay,English,Filipino": 0.19333333333333333, + "Malay,English,Chinese": 0.2, + "Malay,Vietnamese,Spanish": 0.2866666666666667, + "Malay,Vietnamese,Indonesian": 0.32, + "Malay,Vietnamese,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Chinese": 0.18, + "Malay,Spanish,Indonesian": 0.3, + "Malay,Spanish,Filipino": 0.2733333333333333, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino": 0.32, + "Malay,Indonesian,Chinese": 0.22, + "Malay,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish": 0.2, + "English,Vietnamese,Indonesian": 0.18, + "English,Vietnamese,Filipino": 0.15333333333333332, + "English,Vietnamese,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian": 0.24666666666666667, + "English,Spanish,Filipino": 0.2733333333333333, + "English,Spanish,Chinese": 0.32, + "English,Indonesian,Filipino": 0.26, + "English,Indonesian,Chinese": 0.23333333333333334, + "English,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian": 0.26, + "Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Vietnamese,Spanish,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Vietnamese,Indonesian,Chinese": 0.18, + "Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Spanish,Indonesian,Filipino": 0.3333333333333333, + "Spanish,Indonesian,Chinese": 0.21333333333333335, + "Spanish,Filipino,Chinese": 0.22666666666666666, + "Indonesian,Filipino,Chinese": 0.22 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian": 0.10666666666666667, + "Malay,English,Vietnamese,Filipino": 0.1, + "Malay,English,Vietnamese,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian": 0.14, + "Malay,English,Spanish,Filipino": 0.13333333333333333, + "Malay,English,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Indonesian,Chinese": 0.14, + "Malay,English,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.14, + "Malay,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.12, + "English,Vietnamese,Spanish,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.12, + "English,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "English,Spanish,Indonesian,Filipino": 0.2, + "English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Spanish,Filipino,Chinese": 0.16, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + } + }, + "AC3_2": 0.39996546347118317, + "AC3_3": 0.2866170790972073, + "AC3_4": 0.20286497841228432, + "AC3_5": 0.1433056324710471, + "AC3_6": 0.10464391063730089, + "AC3_7": 0.08286803964446725 + }, + "prompt_3": { + "overall_acc": 0.3685714285714286, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.47333333333333333, + "Vietnamese": 0.35333333333333333, + "Spanish": 0.35333333333333333, + "Indonesian": 0.30666666666666664, + "Filipino": 0.32666666666666666, + "Chinese": 0.43333333333333335 + }, + "consistency_score_2": 0.39968253968253975, + "consistency_score_3": 0.19942857142857143, + "consistency_score_4": 0.11238095238095236, + "consistency_score_5": 0.06888888888888889, + "consistency_score_6": 0.045714285714285714, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3466666666666667, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.38666666666666666, + "Malay,Indonesian": 0.56, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.2866666666666667, + "English,Vietnamese": 0.35333333333333333, + "English,Spanish": 0.4266666666666667, + "English,Indonesian": 0.4, + "English,Filipino": 0.41333333333333333, + "English,Chinese": 0.47333333333333333, + "Vietnamese,Spanish": 0.3466666666666667, + "Vietnamese,Indonesian": 0.4533333333333333, + "Vietnamese,Filipino": 0.38666666666666666, + "Vietnamese,Chinese": 0.34, + "Spanish,Indonesian": 0.4066666666666667, + "Spanish,Filipino": 0.36, + "Spanish,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.34, + "Filipino,Chinese": 0.3466666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.20666666666666667, + "Malay,English,Spanish": 0.16666666666666666, + "Malay,English,Indonesian": 0.24, + "Malay,English,Filipino": 0.20666666666666667, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.20666666666666667, + "Malay,Vietnamese,Indonesian": 0.3, + "Malay,Vietnamese,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Chinese": 0.18, + "Malay,Spanish,Indonesian": 0.26, + "Malay,Spanish,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino": 0.30666666666666664, + "Malay,Indonesian,Chinese": 0.19333333333333333, + "Malay,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish": 0.16, + "English,Vietnamese,Indonesian": 0.20666666666666667, + "English,Vietnamese,Filipino": 0.18666666666666668, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian": 0.19333333333333333, + "English,Spanish,Filipino": 0.19333333333333333, + "English,Spanish,Chinese": 0.23333333333333334, + "English,Indonesian,Filipino": 0.22, + "English,Indonesian,Chinese": 0.20666666666666667, + "English,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian": 0.18, + "Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino": 0.24, + "Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino": 0.22, + "Spanish,Indonesian,Chinese": 0.16666666666666666, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.18 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.1, + "Malay,English,Vietnamese,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Chinese": 0.12, + "Malay,English,Spanish,Indonesian": 0.12, + "Malay,English,Spanish,Filipino": 0.1, + "Malay,English,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Indonesian,Chinese": 0.14, + "Malay,English,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian": 0.14, + "Malay,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Spanish,Indonesian,Chinese": 0.1, + "Malay,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.08666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.1, + "English,Vietnamese,Spanish,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "English,Spanish,Indonesian,Filipino": 0.12666666666666668, + "English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.08, + "Malay,English,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + } + }, + "AC3_2": 0.38349704835622134, + "AC3_3": 0.2588157516072076, + "AC3_4": 0.17224328143519127, + "AC3_5": 0.11608127718681742, + "AC3_6": 0.08133990145819874, + "AC3_7": 0.06113744074308192 + }, + "prompt_4": { + "overall_acc": 0.3685714285714285, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.49333333333333335, + "Vietnamese": 0.3466666666666667, + "Spanish": 0.37333333333333335, + "Indonesian": 0.26, + "Filipino": 0.34, + "Chinese": 0.4533333333333333 + }, + "consistency_score_2": 0.42920634920634915, + "consistency_score_3": 0.23504761904761903, + "consistency_score_4": 0.14819047619047618, + "consistency_score_5": 0.10380952380952382, + "consistency_score_6": 0.07904761904761905, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3933333333333333, + "Malay,Vietnamese": 0.46, + "Malay,Spanish": 0.4, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Filipino": 0.5, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.31333333333333335, + "English,Spanish": 0.52, + "English,Indonesian": 0.43333333333333335, + "English,Filipino": 0.42, + "English,Chinese": 0.5066666666666667, + "Vietnamese,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian": 0.4666666666666667, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Chinese": 0.36666666666666664, + "Spanish,Indonesian": 0.48, + "Spanish,Filipino": 0.44, + "Spanish,Chinese": 0.4066666666666667, + "Indonesian,Filipino": 0.5133333333333333, + "Indonesian,Chinese": 0.34, + "Filipino,Chinese": 0.31333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2, + "Malay,English,Spanish": 0.24, + "Malay,English,Indonesian": 0.3, + "Malay,English,Filipino": 0.22, + "Malay,English,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.29333333333333333, + "Malay,Vietnamese,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian": 0.29333333333333333, + "Malay,Spanish,Filipino": 0.26666666666666666, + "Malay,Spanish,Chinese": 0.18666666666666668, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.21333333333333335, + "Malay,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish": 0.18, + "English,Vietnamese,Indonesian": 0.20666666666666667, + "English,Vietnamese,Filipino": 0.18, + "English,Vietnamese,Chinese": 0.18, + "English,Spanish,Indonesian": 0.2733333333333333, + "English,Spanish,Filipino": 0.26666666666666666, + "English,Spanish,Chinese": 0.30666666666666664, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.20666666666666667, + "English,Filipino,Chinese": 0.22, + "Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Filipino,Chinese": 0.16666666666666666, + "Spanish,Indonesian,Filipino": 0.30666666666666664, + "Spanish,Indonesian,Chinese": 0.20666666666666667, + "Spanish,Filipino,Chinese": 0.18666666666666668, + "Indonesian,Filipino,Chinese": 0.2 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Indonesian,Chinese": 0.16, + "Malay,English,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.14, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.12, + "English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + } + }, + "AC3_2": 0.39658461708405035, + "AC3_3": 0.2870414280730795, + "AC3_4": 0.2113885524313173, + "AC3_5": 0.1619930875233108, + "AC3_6": 0.13017629176423112, + "AC3_7": 0.11291028443795278 + }, + "prompt_5": { + "overall_acc": 0.3542857142857142, + "language_acc": { + "Malay": 0.3, + "English": 0.5133333333333333, + "Vietnamese": 0.3333333333333333, + "Spanish": 0.30666666666666664, + "Indonesian": 0.2866666666666667, + "Filipino": 0.3, + "Chinese": 0.44 + }, + "consistency_score_2": 0.4384126984126984, + "consistency_score_3": 0.2424761904761905, + "consistency_score_4": 0.1521904761904762, + "consistency_score_5": 0.10222222222222224, + "consistency_score_6": 0.07047619047619048, + "consistency_score_7": 0.04666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.37333333333333335, + "Malay,Vietnamese": 0.5133333333333333, + "Malay,Spanish": 0.38, + "Malay,Indonesian": 0.5866666666666667, + "Malay,Filipino": 0.52, + "Malay,Chinese": 0.38, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.48, + "English,Indonesian": 0.44666666666666666, + "English,Filipino": 0.35333333333333333, + "English,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Chinese": 0.3933333333333333, + "Spanish,Indonesian": 0.46, + "Spanish,Filipino": 0.42, + "Spanish,Chinese": 0.4066666666666667, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.4, + "Filipino,Chinese": 0.3466666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22666666666666666, + "Malay,English,Spanish": 0.18666666666666668, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.22, + "Malay,English,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish": 0.24, + "Malay,Vietnamese,Indonesian": 0.35333333333333333, + "Malay,Vietnamese,Filipino": 0.3, + "Malay,Vietnamese,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian": 0.28, + "Malay,Spanish,Filipino": 0.24, + "Malay,Spanish,Chinese": 0.18, + "Malay,Indonesian,Filipino": 0.38, + "Malay,Indonesian,Chinese": 0.26666666666666666, + "Malay,Filipino,Chinese": 0.22, + "English,Vietnamese,Spanish": 0.21333333333333335, + "English,Vietnamese,Indonesian": 0.24666666666666667, + "English,Vietnamese,Filipino": 0.16666666666666666, + "English,Vietnamese,Chinese": 0.26, + "English,Spanish,Indonesian": 0.24666666666666667, + "English,Spanish,Filipino": 0.2, + "English,Spanish,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino": 0.24, + "English,Indonesian,Chinese": 0.2866666666666667, + "English,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian": 0.24, + "Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino": 0.26666666666666666, + "Spanish,Indonesian,Chinese": 0.22, + "Spanish,Filipino,Chinese": 0.18, + "Indonesian,Filipino,Chinese": 0.22 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian": 0.18, + "Malay,English,Vietnamese,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian": 0.15333333333333332, + "Malay,English,Spanish,Filipino": 0.12, + "Malay,English,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.24, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Spanish,Filipino,Chinese": 0.14, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino": 0.08, + "Malay,English,Vietnamese,Spanish,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + } + }, + "AC3_2": 0.39188511923656344, + "AC3_3": 0.2879066161571266, + "AC3_4": 0.21291785307137628, + "AC3_5": 0.15866481220446496, + "AC3_6": 0.11756566300883704, + "AC3_7": 0.08247030876802997 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.32711038961038963, + "language_acc": { + "English": 0.3352272727272727, + "Vietnamese": 0.3068181818181818, + "Chinese": 0.44886363636363635, + "Indonesian": 0.2897727272727273, + "Filipino": 0.24431818181818182, + "Spanish": 0.3522727272727273, + "Malay": 0.3125 + }, + "consistency_score_2": 0.4537337662337662, + "consistency_score_3": 0.25762987012987015, + "consistency_score_4": 0.1660714285714285, + "consistency_score_5": 0.11634199134199134, + "consistency_score_6": 0.08522727272727272, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4431818181818182, + "English,Chinese": 0.4943181818181818, + "English,Indonesian": 0.4090909090909091, + "English,Filipino": 0.32954545454545453, + "English,Spanish": 0.44886363636363635, + "English,Malay": 0.35795454545454547, + "Vietnamese,Chinese": 0.48863636363636365, + "Vietnamese,Indonesian": 0.48295454545454547, + "Vietnamese,Filipino": 0.42045454545454547, + "Vietnamese,Spanish": 0.48295454545454547, + "Vietnamese,Malay": 0.48863636363636365, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Filipino": 0.35795454545454547, + "Chinese,Spanish": 0.4715909090909091, + "Chinese,Malay": 0.42613636363636365, + "Indonesian,Filipino": 0.5, + "Indonesian,Spanish": 0.48295454545454547, + "Indonesian,Malay": 0.6306818181818182, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.48295454545454547, + "Spanish,Malay": 0.5 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2784090909090909, + "English,Vietnamese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Filipino": 0.2159090909090909, + "English,Vietnamese,Spanish": 0.2727272727272727, + "English,Vietnamese,Malay": 0.25, + "English,Chinese,Indonesian": 0.23295454545454544, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.26704545454545453, + "English,Chinese,Malay": 0.20454545454545456, + "English,Indonesian,Filipino": 0.2215909090909091, + "English,Indonesian,Spanish": 0.24431818181818182, + "English,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Spanish": 0.1875, + "English,Filipino,Malay": 0.18181818181818182, + "English,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian": 0.26136363636363635, + "Vietnamese,Chinese,Filipino": 0.2215909090909091, + "Vietnamese,Chinese,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Vietnamese,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Spanish,Malay": 0.3125, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Malay": 0.2897727272727273, + "Chinese,Filipino,Spanish": 0.21022727272727273, + "Chinese,Filipino,Malay": 0.23295454545454544, + "Chinese,Spanish,Malay": 0.26136363636363635, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.36363636363636365, + "Indonesian,Spanish,Malay": 0.3522727272727273, + "Filipino,Spanish,Malay": 0.2727272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.16477272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "English,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + } + }, + "AC3_2": 0.38015531760663246, + "AC3_3": 0.2882421922392614, + "AC3_4": 0.22029883377457304, + "AC3_5": 0.17163815436332983, + "AC3_6": 0.13522279882189442, + "AC3_7": 0.10494791663973004 + }, + "prompt_2": { + "overall_acc": 0.3141233766233767, + "language_acc": { + "English": 0.3125, + "Vietnamese": 0.2897727272727273, + "Chinese": 0.42613636363636365, + "Indonesian": 0.25, + "Filipino": 0.30113636363636365, + "Spanish": 0.36363636363636365, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.45319264069264065, + "consistency_score_3": 0.2573051948051948, + "consistency_score_4": 0.16168831168831171, + "consistency_score_5": 0.10633116883116885, + "consistency_score_6": 0.0706168831168831, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.39204545454545453, + "English,Chinese": 0.4659090909090909, + "English,Indonesian": 0.4147727272727273, + "English,Filipino": 0.375, + "English,Spanish": 0.4090909090909091, + "English,Malay": 0.42613636363636365, + "Vietnamese,Chinese": 0.4090909090909091, + "Vietnamese,Indonesian": 0.48863636363636365, + "Vietnamese,Filipino": 0.42613636363636365, + "Vietnamese,Spanish": 0.4431818181818182, + "Vietnamese,Malay": 0.5170454545454546, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Filipino": 0.3409090909090909, + "Chinese,Spanish": 0.4602272727272727, + "Chinese,Malay": 0.4375, + "Indonesian,Filipino": 0.4602272727272727, + "Indonesian,Spanish": 0.4943181818181818, + "Indonesian,Malay": 0.6022727272727273, + "Filipino,Spanish": 0.4659090909090909, + "Filipino,Malay": 0.5340909090909091, + "Spanish,Malay": 0.5340909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2215909090909091, + "English,Vietnamese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Filipino": 0.21022727272727273, + "English,Vietnamese,Spanish": 0.22727272727272727, + "English,Vietnamese,Malay": 0.25, + "English,Chinese,Indonesian": 0.22727272727272727, + "English,Chinese,Filipino": 0.18181818181818182, + "English,Chinese,Spanish": 0.2215909090909091, + "English,Chinese,Malay": 0.23295454545454544, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.2215909090909091, + "English,Indonesian,Malay": 0.29545454545454547, + "English,Filipino,Spanish": 0.2215909090909091, + "English,Filipino,Malay": 0.25, + "English,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "Vietnamese,Chinese,Filipino": 0.18181818181818182, + "Vietnamese,Chinese,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Malay": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay": 0.3352272727272727, + "Vietnamese,Filipino,Spanish": 0.26704545454545453, + "Vietnamese,Filipino,Malay": 0.29545454545454547, + "Vietnamese,Spanish,Malay": 0.3181818181818182, + "Chinese,Indonesian,Filipino": 0.20454545454545456, + "Chinese,Indonesian,Spanish": 0.2556818181818182, + "Chinese,Indonesian,Malay": 0.2840909090909091, + "Chinese,Filipino,Spanish": 0.22727272727272727, + "Chinese,Filipino,Malay": 0.22727272727272727, + "Chinese,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish": 0.2840909090909091, + "Indonesian,Filipino,Malay": 0.36363636363636365, + "Indonesian,Spanish,Malay": 0.3465909090909091, + "Filipino,Spanish,Malay": 0.3352272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.1590909090909091, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.2159090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "Chinese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Chinese,Filipino,Spanish,Malay": 0.1875, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.125, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.125, + "English,Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Indonesian,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.37105546951733764, + "AC3_3": 0.2828895181027966, + "AC3_4": 0.2134881494719405, + "AC3_5": 0.15888093562886207, + "AC3_6": 0.11531111290772403, + "AC3_7": 0.07941719677660042 + }, + "prompt_3": { + "overall_acc": 0.32305194805194803, + "language_acc": { + "English": 0.3522727272727273, + "Vietnamese": 0.30113636363636365, + "Chinese": 0.4431818181818182, + "Indonesian": 0.2727272727272727, + "Filipino": 0.2556818181818182, + "Spanish": 0.35795454545454547, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.42857142857142855, + "consistency_score_3": 0.22483766233766236, + "consistency_score_4": 0.12873376623376626, + "consistency_score_5": 0.07656926406926406, + "consistency_score_6": 0.04464285714285715, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.5113636363636364, + "English,Indonesian": 0.3806818181818182, + "English,Filipino": 0.3181818181818182, + "English,Spanish": 0.42045454545454547, + "English,Malay": 0.3352272727272727, + "Vietnamese,Chinese": 0.4431818181818182, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Filipino": 0.38636363636363635, + "Vietnamese,Spanish": 0.4318181818181818, + "Vietnamese,Malay": 0.4659090909090909, + "Chinese,Indonesian": 0.4090909090909091, + "Chinese,Filipino": 0.2784090909090909, + "Chinese,Spanish": 0.45454545454545453, + "Chinese,Malay": 0.4034090909090909, + "Indonesian,Filipino": 0.42613636363636365, + "Indonesian,Spanish": 0.4772727272727273, + "Indonesian,Malay": 0.5795454545454546, + "Filipino,Spanish": 0.36363636363636365, + "Filipino,Malay": 0.5113636363636364, + "Spanish,Malay": 0.44886363636363635 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2784090909090909, + "English,Vietnamese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Filipino": 0.18181818181818182, + "English,Vietnamese,Spanish": 0.2215909090909091, + "English,Vietnamese,Malay": 0.2215909090909091, + "English,Chinese,Indonesian": 0.23863636363636365, + "English,Chinese,Filipino": 0.1534090909090909, + "English,Chinese,Spanish": 0.26136363636363635, + "English,Chinese,Malay": 0.2215909090909091, + "English,Indonesian,Filipino": 0.18181818181818182, + "English,Indonesian,Spanish": 0.21022727272727273, + "English,Indonesian,Malay": 0.23863636363636365, + "English,Filipino,Spanish": 0.14772727272727273, + "English,Filipino,Malay": 0.17613636363636365, + "English,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian": 0.26136363636363635, + "Vietnamese,Chinese,Filipino": 0.1590909090909091, + "Vietnamese,Chinese,Spanish": 0.23295454545454544, + "Vietnamese,Chinese,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.22727272727272727, + "Chinese,Indonesian,Filipino": 0.17613636363636365, + "Chinese,Indonesian,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Malay": 0.2556818181818182, + "Chinese,Filipino,Spanish": 0.1590909090909091, + "Chinese,Filipino,Malay": 0.19886363636363635, + "Chinese,Spanish,Malay": 0.2215909090909091, + "Indonesian,Filipino,Spanish": 0.20454545454545456, + "Indonesian,Filipino,Malay": 0.3181818181818182, + "Indonesian,Spanish,Malay": 0.30113636363636365, + "Filipino,Spanish,Malay": 0.23295454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish": 0.125, + "English,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Indonesian,Filipino,Malay": 0.14204545454545456, + "English,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Malay": 0.1534090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.3684048132802048, + "AC3_3": 0.26514189509350183, + "AC3_4": 0.18410362540315553, + "AC3_5": 0.1237964810776218, + "AC3_6": 0.07844528537759345, + "AC3_7": 0.04246692273633138 + }, + "prompt_4": { + "overall_acc": 0.3344155844155844, + "language_acc": { + "English": 0.3352272727272727, + "Vietnamese": 0.30113636363636365, + "Chinese": 0.4375, + "Indonesian": 0.2784090909090909, + "Filipino": 0.30113636363636365, + "Spanish": 0.3693181818181818, + "Malay": 0.3181818181818182 + }, + "consistency_score_2": 0.4545454545454544, + "consistency_score_3": 0.2625, + "consistency_score_4": 0.1730519480519481, + "consistency_score_5": 0.12175324675324677, + "consistency_score_6": 0.08766233766233765, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.48863636363636365, + "English,Indonesian": 0.39204545454545453, + "English,Filipino": 0.35795454545454547, + "English,Spanish": 0.4431818181818182, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.45454545454545453, + "Vietnamese,Indonesian": 0.5113636363636364, + "Vietnamese,Filipino": 0.4659090909090909, + "Vietnamese,Spanish": 0.4375, + "Vietnamese,Malay": 0.5397727272727273, + "Chinese,Indonesian": 0.3977272727272727, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,Spanish": 0.4659090909090909, + "Chinese,Malay": 0.4318181818181818, + "Indonesian,Filipino": 0.44886363636363635, + "Indonesian,Spanish": 0.45454545454545453, + "Indonesian,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Malay": 0.5056818181818182, + "Spanish,Malay": 0.48863636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2897727272727273, + "English,Vietnamese,Indonesian": 0.26136363636363635, + "English,Vietnamese,Filipino": 0.2215909090909091, + "English,Vietnamese,Spanish": 0.23863636363636365, + "English,Vietnamese,Malay": 0.2840909090909091, + "English,Chinese,Indonesian": 0.2215909090909091, + "English,Chinese,Filipino": 0.19886363636363635, + "English,Chinese,Spanish": 0.2727272727272727, + "English,Chinese,Malay": 0.25, + "English,Indonesian,Filipino": 0.19318181818181818, + "English,Indonesian,Spanish": 0.2159090909090909, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.23863636363636365, + "English,Filipino,Malay": 0.2215909090909091, + "English,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "Vietnamese,Chinese,Filipino": 0.23863636363636365, + "Vietnamese,Chinese,Spanish": 0.26136363636363635, + "Vietnamese,Chinese,Malay": 0.30113636363636365, + "Vietnamese,Indonesian,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay": 0.375, + "Vietnamese,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Filipino,Malay": 0.3181818181818182, + "Vietnamese,Spanish,Malay": 0.30113636363636365, + "Chinese,Indonesian,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Spanish": 0.23863636363636365, + "Chinese,Indonesian,Malay": 0.2727272727272727, + "Chinese,Filipino,Spanish": 0.25, + "Chinese,Filipino,Malay": 0.22727272727272727, + "Chinese,Spanish,Malay": 0.2727272727272727, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.3125, + "Filipino,Spanish,Malay": 0.3068181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "English,Vietnamese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Spanish,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino": 0.125, + "English,Chinese,Indonesian,Spanish": 0.125, + "English,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.1590909090909091, + "English,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Malay": 0.2215909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + } + }, + "AC3_2": 0.38533482973043337, + "AC3_3": 0.2941256458577151, + "AC3_4": 0.2280787030817433, + "AC3_5": 0.17851365712221076, + "AC3_6": 0.13891108887817763, + "AC3_7": 0.10531697338859908 + }, + "prompt_5": { + "overall_acc": 0.3125, + "language_acc": { + "English": 0.32954545454545453, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.42045454545454547, + "Indonesian": 0.2784090909090909, + "Filipino": 0.25, + "Spanish": 0.3465909090909091, + "Malay": 0.3068181818181818 + }, + "consistency_score_2": 0.4331709956709957, + "consistency_score_3": 0.23376623376623376, + "consistency_score_4": 0.1418831168831169, + "consistency_score_5": 0.09226190476190475, + "consistency_score_6": 0.06168831168831168, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.32954545454545453, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.38636363636363635, + "English,Filipino": 0.29545454545454547, + "English,Spanish": 0.36363636363636365, + "English,Malay": 0.35795454545454547, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.48863636363636365, + "Vietnamese,Filipino": 0.4034090909090909, + "Vietnamese,Spanish": 0.45454545454545453, + "Vietnamese,Malay": 0.45454545454545453, + "Chinese,Indonesian": 0.44886363636363635, + "Chinese,Filipino": 0.32954545454545453, + "Chinese,Spanish": 0.4602272727272727, + "Chinese,Malay": 0.4659090909090909, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.4772727272727273, + "Indonesian,Malay": 0.5852272727272727, + "Filipino,Spanish": 0.42613636363636365, + "Filipino,Malay": 0.45454545454545453, + "Spanish,Malay": 0.5170454545454546 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2159090909090909, + "English,Vietnamese,Indonesian": 0.19886363636363635, + "English,Vietnamese,Filipino": 0.1534090909090909, + "English,Vietnamese,Spanish": 0.17045454545454544, + "English,Vietnamese,Malay": 0.17613636363636365, + "English,Chinese,Indonesian": 0.2215909090909091, + "English,Chinese,Filipino": 0.17613636363636365, + "English,Chinese,Spanish": 0.2215909090909091, + "English,Chinese,Malay": 0.22727272727272727, + "English,Indonesian,Filipino": 0.17613636363636365, + "English,Indonesian,Spanish": 0.19886363636363635, + "English,Indonesian,Malay": 0.23295454545454544, + "English,Filipino,Spanish": 0.16477272727272727, + "English,Filipino,Malay": 0.1590909090909091, + "English,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "Vietnamese,Chinese,Filipino": 0.1875, + "Vietnamese,Chinese,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Malay": 0.3068181818181818, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Spanish,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.2784090909090909, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.23863636363636365, + "Chinese,Filipino,Malay": 0.22727272727272727, + "Chinese,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Malay": 0.3181818181818182, + "Indonesian,Spanish,Malay": 0.3465909090909091, + "Filipino,Spanish,Malay": 0.2897727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish": 0.125, + "English,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Chinese,Indonesian,Filipino,Malay": 0.1875, + "Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.36307148035769554, + "AC3_3": 0.26745913813826006, + "AC3_4": 0.19515898530896109, + "AC3_5": 0.14246323525892082, + "AC3_6": 0.10303687632821226, + "AC3_7": 0.07056451610900104 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4854368932038835 + }, + "prompt_2": { + "accuracy": 0.4563106796116505 + }, + "prompt_3": { + "accuracy": 0.4563106796116505 + }, + "prompt_4": { + "accuracy": 0.44660194174757284 + }, + "prompt_5": { + "accuracy": 0.44660194174757284 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.22857142857142856 + }, + "prompt_3": { + "accuracy": 0.2571428571428571 + }, + "prompt_4": { + "accuracy": 0.2571428571428571 + }, + "prompt_5": { + "accuracy": 0.37142857142857144 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3925233644859813 + }, + "prompt_2": { + "accuracy": 0.37383177570093457 + }, + "prompt_3": { + "accuracy": 0.3925233644859813 + }, + "prompt_4": { + "accuracy": 0.42990654205607476 + }, + "prompt_5": { + "accuracy": 0.411214953271028 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.2, + "history": 0.06666666666666667, + "literature": 0.3, + "politics": 0.4, + "culture": 0.1, + "film": 0.1, + "law": 0.5, + "geography": 0.3 + } + }, + "prompt_2": { + "accuracy": 0.27, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.2, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.1, + "film": 0.1, + "law": 0.5, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.26, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.0, + "history": 0.2, + "literature": 0.3, + "politics": 0.4, + "culture": 0.2, + "film": 0.1, + "law": 0.5, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.26, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.2, + "history": 0.13333333333333333, + "literature": 0.3, + "politics": 0.4, + "culture": 0.1, + "film": 0.1, + "law": 0.4, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.18, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.1, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.2, + "culture": 0.1, + "film": 0.0, + "law": 0.4, + "geography": 0.3 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21065954355641694 + }, + "prompt_2": { + "bleu_score": 0.2142232484085738 + }, + "prompt_3": { + "bleu_score": 0.2162326225895175 + }, + "prompt_4": { + "bleu_score": 0.20594889002993072 + }, + "prompt_5": { + "bleu_score": 0.2074219065480958 + } }, "indommlu": { "prompt_1": -1, @@ -5130,179 +47135,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10521697133397957 + }, + "prompt_2": { + "bleu_score": 0.11192980706890322 + }, + "prompt_3": { + "bleu_score": 0.11223812373265543 + }, + "prompt_4": { + "bleu_score": 0.11024817922293007 + }, + "prompt_5": { + "bleu_score": 0.1036724909096609 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07298456978785127 + }, + "prompt_2": { + "bleu_score": 0.07738918313389209 + }, + "prompt_3": { + "bleu_score": 0.07700843808300069 + }, + "prompt_4": { + "bleu_score": 0.07589367507620545 + }, + "prompt_5": { + "bleu_score": 0.06807895407698869 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.16698011387064993 + }, + "prompt_2": { + "bleu_score": 0.16997315405620067 + }, + "prompt_3": { + "bleu_score": 0.17133327004115503 + }, + "prompt_4": { + "bleu_score": 0.16564083778213695 + }, + "prompt_5": { + "bleu_score": 0.15944602449292478 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09475865107965513 + }, + "prompt_2": { + "bleu_score": 0.10045729015323383 + }, + "prompt_3": { + "bleu_score": 0.10067727687695545 + }, + "prompt_4": { + "bleu_score": 0.09837575885202184 + }, + "prompt_5": { + "bleu_score": 0.09323326096929711 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4282380396732789 + }, + "prompt_2": { + "accuracy": 0.42123687281213534 + }, + "prompt_3": { + "accuracy": 0.41656942823803966 + }, + "prompt_4": { + "accuracy": 0.40606767794632437 + }, + "prompt_5": { + "accuracy": 0.4235705950991832 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3954951734000715, + "category_acc": { + "high_school_european_history": 0.5304878048780488, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.4053030303030303, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.4482758620689655, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.597457627118644, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.3924050632911392, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.44660194174757284, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.2775800711743772, + "philosophy": 0.3903225806451613, + "professional_medicine": 0.3763837638376384, + "nutrition": 0.46229508196721314, + "global_facts": 0.35353535353535354, + "machine_learning": 0.2972972972972973, + "security_studies": 0.48360655737704916, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.39279869067103107, + "prehistory": 0.37770897832817335, + "anatomy": 0.31343283582089554, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.3953488372093023, + "high_school_government_and_politics": 0.5833333333333334, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.47530864197530864, + "high_school_geography": 0.4467005076142132, + "elementary_mathematics": 0.3156498673740053, + "human_aging": 0.45045045045045046, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.5533088235294118, + "formal_logic": 0.312, + "high_school_statistics": 0.2930232558139535, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.32323232323232326, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.5140664961636828, + "high_school_chemistry": 0.3564356435643564, + "marketing": 0.4206008583690987, + "professional_law": 0.3359425962165688, + "management": 0.49019607843137253, + "college_physics": 0.16831683168316833, + "jurisprudence": 0.4205607476635514, + "world_religions": 0.4823529411764706, + "sociology": 0.575, + "us_foreign_policy": 0.5555555555555556, + "high_school_macroeconomics": 0.40102827763496146, + "computer_security": 0.5353535353535354, + "moral_scenarios": 0.22371364653243847, + "moral_disputes": 0.3652173913043478, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.423841059602649, + "college_biology": 0.3916083916083916 + } + }, + "prompt_2": { + "accuracy": 0.40035752592062923, + "category_acc": { + "high_school_european_history": 0.5426829268292683, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.3977272727272727, + "medical_genetics": 0.43434343434343436, + "high_school_us_history": 0.4187192118226601, + "high_school_physics": 0.22666666666666666, + "high_school_world_history": 0.6101694915254238, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.379746835443038, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.45307443365695793, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.3870967741935484, + "professional_medicine": 0.3837638376383764, + "nutrition": 0.42295081967213116, + "global_facts": 0.29292929292929293, + "machine_learning": 0.35135135135135137, + "security_studies": 0.48770491803278687, + "public_relations": 0.4954128440366973, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.38390092879256965, + "anatomy": 0.27611940298507465, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.36627906976744184, + "high_school_government_and_politics": 0.53125, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.3448275862068966, + "human_aging": 0.42792792792792794, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.5367647058823529, + "formal_logic": 0.328, + "high_school_statistics": 0.3302325581395349, + "international_law": 0.6, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.5191815856777494, + "high_school_chemistry": 0.31683168316831684, + "marketing": 0.4978540772532189, + "professional_law": 0.3548597521200261, + "management": 0.5588235294117647, + "college_physics": 0.1782178217821782, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.5176470588235295, + "sociology": 0.59, + "us_foreign_policy": 0.5858585858585859, + "high_school_macroeconomics": 0.37789203084832906, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.4318840579710145, + "electrical_engineering": 0.3541666666666667, + "astronomy": 0.41721854304635764, + "college_biology": 0.36363636363636365 + } + }, + "prompt_3": { + "accuracy": 0.4012155881301394, + "category_acc": { + "high_school_european_history": 0.5487804878048781, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.4015151515151515, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.45320197044334976, + "high_school_physics": 0.26, + "high_school_world_history": 0.5550847457627118, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.3670886075949367, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.4627831715210356, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3096085409252669, + "philosophy": 0.4129032258064516, + "professional_medicine": 0.36162361623616235, + "nutrition": 0.43934426229508194, + "global_facts": 0.36363636363636365, + "machine_learning": 0.36036036036036034, + "security_studies": 0.4344262295081967, + "public_relations": 0.5229357798165137, + "professional_psychology": 0.3895253682487725, + "prehistory": 0.3993808049535604, + "anatomy": 0.29850746268656714, + "human_sexuality": 0.43846153846153846, + "college_medicine": 0.3953488372093023, + "high_school_government_and_politics": 0.578125, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.5, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.3183023872679045, + "human_aging": 0.481981981981982, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.5367647058823529, + "formal_logic": 0.28, + "high_school_statistics": 0.31627906976744186, + "international_law": 0.6, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.36752136752136755, + "miscellaneous": 0.5179028132992327, + "high_school_chemistry": 0.3564356435643564, + "marketing": 0.45493562231759654, + "professional_law": 0.3542074363992172, + "management": 0.5196078431372549, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.4205607476635514, + "world_religions": 0.48823529411764705, + "sociology": 0.615, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.3341902313624679, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.41739130434782606, + "electrical_engineering": 0.3680555555555556, + "astronomy": 0.46357615894039733, + "college_biology": 0.3776223776223776 + } + }, + "prompt_4": { + "accuracy": 0.4010010725777619, + "category_acc": { + "high_school_european_history": 0.5365853658536586, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.42045454545454547, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.43349753694581283, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.5635593220338984, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.37130801687763715, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.4886731391585761, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.4, + "professional_medicine": 0.39114391143911437, + "nutrition": 0.4524590163934426, + "global_facts": 0.31313131313131315, + "machine_learning": 0.3153153153153153, + "security_studies": 0.430327868852459, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.3993453355155483, + "prehistory": 0.38080495356037153, + "anatomy": 0.291044776119403, + "human_sexuality": 0.5, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.5572916666666666, + "college_chemistry": 0.2727272727272727, + "logical_fallacies": 0.5, + "high_school_geography": 0.48223350253807107, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.44594594594594594, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.5514705882352942, + "formal_logic": 0.304, + "high_school_statistics": 0.30697674418604654, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.36363636363636365, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.510230179028133, + "high_school_chemistry": 0.37623762376237624, + "marketing": 0.5064377682403434, + "professional_law": 0.3385518590998043, + "management": 0.5098039215686274, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.411214953271028, + "world_religions": 0.49411764705882355, + "sociology": 0.59, + "us_foreign_policy": 0.6060606060606061, + "high_school_macroeconomics": 0.38303341902313626, + "computer_security": 0.46464646464646464, + "moral_scenarios": 0.2203579418344519, + "moral_disputes": 0.42318840579710143, + "electrical_engineering": 0.3611111111111111, + "astronomy": 0.3973509933774834, + "college_biology": 0.3706293706293706 + } + }, + "prompt_5": { + "accuracy": 0.4000715051841259, + "category_acc": { + "high_school_european_history": 0.5304878048780488, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.4318181818181818, + "medical_genetics": 0.4444444444444444, + "high_school_us_history": 0.4482758620689655, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5423728813559322, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.3670886075949367, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.48220064724919093, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3309608540925267, + "philosophy": 0.41935483870967744, + "professional_medicine": 0.3763837638376384, + "nutrition": 0.4065573770491803, + "global_facts": 0.36363636363636365, + "machine_learning": 0.2972972972972973, + "security_studies": 0.45081967213114754, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.3878887070376432, + "prehistory": 0.38390092879256965, + "anatomy": 0.2835820895522388, + "human_sexuality": 0.46153846153846156, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.5729166666666666, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.47715736040609136, + "elementary_mathematics": 0.35278514588859416, + "human_aging": 0.4594594594594595, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.5349264705882353, + "formal_logic": 0.304, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.5666666666666667, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.5076726342710998, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.49356223175965663, + "professional_law": 0.33985649054142203, + "management": 0.5196078431372549, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.42990654205607476, + "world_religions": 0.5117647058823529, + "sociology": 0.595, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.35989717223650386, + "computer_security": 0.48484848484848486, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.4260869565217391, + "electrical_engineering": 0.3541666666666667, + "astronomy": 0.4304635761589404, + "college_biology": 0.40559440559440557 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38558692421991086 + }, + "prompt_2": { + "accuracy": 0.3707280832095097 + }, + "prompt_3": { + "accuracy": 0.37964338781575035 + }, + "prompt_4": { + "accuracy": 0.36701337295690933 + }, + "prompt_5": { + "accuracy": 0.3521545319465082 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3686176836861768, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.35714285714285715, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.43333333333333335, + "business_administration": 0.3157894736842105, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.375, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.3333333333333333, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.5294117647058824, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.44, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4583333333333333, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.24074074074074073, + "physician": 0.37037037037037035 + } + }, + "prompt_2": { + "accuracy": 0.36488169364881695, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.375, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.2857142857142857, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.43333333333333335, + "business_administration": 0.2894736842105263, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.25, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.4074074074074074, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.56, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4583333333333333, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.5, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.49019607843137253, + "accountant": 0.2777777777777778, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.3823163138231631, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.125, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.38095238095238093, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.375, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.4166666666666667, + "business_administration": 0.34210526315789475, + "marxism": 0.5, + "mao_zedong_thought": 0.5517241379310345, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.75, + "logic": 0.5185185185185185, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.5, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.56, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.4423076923076923, + "sports_science": 0.375, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.37037037037037035, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.24074074074074073, + "physician": 0.37037037037037035 + } + }, + "prompt_4": { + "accuracy": 0.3561643835616438, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.23809523809523808, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.25, + "college_economics": 0.3, + "business_administration": 0.34210526315789475, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.5918367346938775, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.5, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.3333333333333333, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.25, + "high_school_chinese": 0.25, + "high_school_history": 0.56, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.28846153846153844, + "sports_science": 0.5, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2777777777777778, + "physician": 0.37037037037037035 + } + }, + "prompt_5": { + "accuracy": 0.3686176836861768, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.25, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.3333333333333333, + "college_physics": 0.125, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.375, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.36666666666666664, + "business_administration": 0.2631578947368421, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.44, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.34615384615384615, + "sports_science": 0.25, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.42592592592592593, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.25925925925925924, + "physician": 0.3333333333333333 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40860215053763443 + }, + "prompt_2": { + "accuracy": 0.3978494623655914 + }, + "prompt_3": { + "accuracy": 0.4229390681003584 + }, + "prompt_4": { + "accuracy": 0.3727598566308244 + }, + "prompt_5": { + "accuracy": 0.4050179211469534 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39172854429286824, + "category_acc": { + "agronomy": 0.3609467455621302, + "anatomy": 0.31756756756756754, + "ancient_chinese": 0.2865853658536585, + "arts": 0.4625, + "astronomy": 0.24242424242424243, + "business_ethics": 0.4258373205741627, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.4198473282442748, + "chinese_food_culture": 0.35294117647058826, + "chinese_foreign_policy": 0.4672897196261682, + "chinese_history": 0.5541795665634675, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.5083798882681564, + "clinical_knowledge": 0.3080168776371308, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.48598130841121495, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.37037037037037035, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.3516483516483517, + "computer_science": 0.39215686274509803, + "computer_security": 0.38011695906432746, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.34532374100719426, + "economics": 0.44654088050314467, + "education": 0.4785276073619632, + "electrical_engineering": 0.36627906976744184, + "elementary_chinese": 0.3253968253968254, + "elementary_commonsense": 0.3888888888888889, + "elementary_information_and_technology": 0.5084033613445378, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.45185185185185184, + "food_science": 0.3916083916083916, + "genetics": 0.3522727272727273, + "global_facts": 0.436241610738255, + "high_school_biology": 0.2958579881656805, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3728813559322034, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.4825174825174825, + "human_sexuality": 0.49206349206349204, + "international_law": 0.2918918918918919, + "journalism": 0.45930232558139533, + "jurisprudence": 0.38686131386861317, + "legal_and_moral_basis": 0.677570093457944, + "logical": 0.34959349593495936, + "machine_learning": 0.2786885245901639, + "management": 0.45714285714285713, + "marketing": 0.43333333333333335, + "marxist_theory": 0.48148148148148145, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.4206896551724138, + "philosophy": 0.38095238095238093, + "professional_accounting": 0.38857142857142857, + "professional_law": 0.32701421800947866, + "professional_medicine": 0.30319148936170215, + "professional_psychology": 0.4698275862068966, + "public_relations": 0.46551724137931033, + "security_study": 0.4444444444444444, + "sociology": 0.4424778761061947, + "sports_science": 0.43636363636363634, + "traditional_chinese_medicine": 0.40540540540540543, + "virology": 0.3609467455621302, + "world_history": 0.4968944099378882, + "world_religions": 0.48125 + } + }, + "prompt_2": { + "accuracy": 0.39380072526333965, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.3231707317073171, + "arts": 0.525, + "astronomy": 0.2787878787878788, + "business_ethics": 0.40669856459330145, + "chinese_civil_service_exam": 0.35625, + "chinese_driving_rule": 0.4732824427480916, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.5420560747663551, + "chinese_history": 0.5572755417956656, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.49162011173184356, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.5327102803738317, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.37037037037037035, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.32967032967032966, + "computer_science": 0.37745098039215685, + "computer_security": 0.42105263157894735, + "conceptual_physics": 0.3741496598639456, + "construction_project_management": 0.3669064748201439, + "economics": 0.4025157232704403, + "education": 0.4723926380368098, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.35714285714285715, + "elementary_commonsense": 0.41414141414141414, + "elementary_information_and_technology": 0.5168067226890757, + "elementary_mathematics": 0.32608695652173914, + "ethnology": 0.362962962962963, + "food_science": 0.38461538461538464, + "genetics": 0.3181818181818182, + "global_facts": 0.40939597315436244, + "high_school_biology": 0.31952662721893493, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3983050847457627, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.5104895104895105, + "human_sexuality": 0.48412698412698413, + "international_law": 0.31891891891891894, + "journalism": 0.45930232558139533, + "jurisprudence": 0.41362530413625304, + "legal_and_moral_basis": 0.6542056074766355, + "logical": 0.36585365853658536, + "machine_learning": 0.3442622950819672, + "management": 0.44761904761904764, + "marketing": 0.4666666666666667, + "marxist_theory": 0.43915343915343913, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.36551724137931035, + "philosophy": 0.4095238095238095, + "professional_accounting": 0.34285714285714286, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.45689655172413796, + "public_relations": 0.4942528735632184, + "security_study": 0.5185185185185185, + "sociology": 0.43805309734513276, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.33513513513513515, + "virology": 0.3727810650887574, + "world_history": 0.4968944099378882, + "world_religions": 0.48125 + } + }, + "prompt_3": { + "accuracy": 0.3936280435158004, + "category_acc": { + "agronomy": 0.3668639053254438, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.29878048780487804, + "arts": 0.50625, + "astronomy": 0.2606060606060606, + "business_ethics": 0.42105263157894735, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.4351145038167939, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.4953560371517028, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.547486033519553, + "clinical_knowledge": 0.29535864978902954, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.5046728971962616, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.39814814814814814, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.336996336996337, + "computer_science": 0.37254901960784315, + "computer_security": 0.4152046783625731, + "conceptual_physics": 0.30612244897959184, + "construction_project_management": 0.2805755395683453, + "economics": 0.44654088050314467, + "education": 0.4723926380368098, + "electrical_engineering": 0.4069767441860465, + "elementary_chinese": 0.34523809523809523, + "elementary_commonsense": 0.40404040404040403, + "elementary_information_and_technology": 0.542016806722689, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.4666666666666667, + "food_science": 0.4125874125874126, + "genetics": 0.3465909090909091, + "global_facts": 0.40939597315436244, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.3813559322033898, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.4825174825174825, + "human_sexuality": 0.4365079365079365, + "international_law": 0.3081081081081081, + "journalism": 0.4186046511627907, + "jurisprudence": 0.3746958637469586, + "legal_and_moral_basis": 0.7149532710280374, + "logical": 0.3902439024390244, + "machine_learning": 0.29508196721311475, + "management": 0.4666666666666667, + "marketing": 0.4888888888888889, + "marxist_theory": 0.47619047619047616, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.33793103448275863, + "philosophy": 0.37142857142857144, + "professional_accounting": 0.3942857142857143, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.5, + "public_relations": 0.5229885057471264, + "security_study": 0.4666666666666667, + "sociology": 0.4823008849557522, + "sports_science": 0.41818181818181815, + "traditional_chinese_medicine": 0.43783783783783786, + "virology": 0.38461538461538464, + "world_history": 0.4720496894409938, + "world_religions": 0.4625 + } + }, + "prompt_4": { + "accuracy": 0.3717838024520808, + "category_acc": { + "agronomy": 0.3727810650887574, + "anatomy": 0.32432432432432434, + "ancient_chinese": 0.31097560975609756, + "arts": 0.51875, + "astronomy": 0.3090909090909091, + "business_ethics": 0.4354066985645933, + "chinese_civil_service_exam": 0.35, + "chinese_driving_rule": 0.3511450381679389, + "chinese_food_culture": 0.40441176470588236, + "chinese_foreign_policy": 0.4672897196261682, + "chinese_history": 0.43653250773993807, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.45251396648044695, + "clinical_knowledge": 0.29957805907172996, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.4392523364485981, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.3148148148148148, + "college_mathematics": 0.3333333333333333, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.2967032967032967, + "computer_science": 0.3431372549019608, + "computer_security": 0.38596491228070173, + "conceptual_physics": 0.3741496598639456, + "construction_project_management": 0.302158273381295, + "economics": 0.41509433962264153, + "education": 0.49079754601226994, + "electrical_engineering": 0.436046511627907, + "elementary_chinese": 0.3412698412698413, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.49159663865546216, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.45185185185185184, + "food_science": 0.43356643356643354, + "genetics": 0.3068181818181818, + "global_facts": 0.33557046979865773, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.3, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.42857142857142855, + "international_law": 0.2918918918918919, + "journalism": 0.38953488372093026, + "jurisprudence": 0.34549878345498786, + "legal_and_moral_basis": 0.6588785046728972, + "logical": 0.4065040650406504, + "machine_learning": 0.319672131147541, + "management": 0.44761904761904764, + "marketing": 0.40555555555555556, + "marxist_theory": 0.48677248677248675, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.3931034482758621, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.4228571428571429, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.2765957446808511, + "professional_psychology": 0.39655172413793105, + "public_relations": 0.4367816091954023, + "security_study": 0.45925925925925926, + "sociology": 0.42035398230088494, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.34911242603550297, + "world_history": 0.40993788819875776, + "world_religions": 0.48125 + } + }, + "prompt_5": { + "accuracy": 0.36763944051113795, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.34146341463414637, + "arts": 0.45, + "astronomy": 0.296969696969697, + "business_ethics": 0.430622009569378, + "chinese_civil_service_exam": 0.38125, + "chinese_driving_rule": 0.42748091603053434, + "chinese_food_culture": 0.39705882352941174, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.4582043343653251, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.45251396648044695, + "clinical_knowledge": 0.29957805907172996, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5327102803738317, + "college_engineering_hydrology": 0.3490566037735849, + "college_law": 0.3425925925925926, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.31868131868131866, + "computer_science": 0.4117647058823529, + "computer_security": 0.43859649122807015, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.2949640287769784, + "economics": 0.33962264150943394, + "education": 0.4662576687116564, + "electrical_engineering": 0.37209302325581395, + "elementary_chinese": 0.3333333333333333, + "elementary_commonsense": 0.3787878787878788, + "elementary_information_and_technology": 0.48739495798319327, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.37777777777777777, + "food_science": 0.36363636363636365, + "genetics": 0.3352272727272727, + "global_facts": 0.3691275167785235, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3559322033898305, + "high_school_mathematics": 0.29878048780487804, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.32867132867132864, + "human_sexuality": 0.38095238095238093, + "international_law": 0.33513513513513515, + "journalism": 0.3546511627906977, + "jurisprudence": 0.3771289537712895, + "legal_and_moral_basis": 0.5093457943925234, + "logical": 0.35772357723577236, + "machine_learning": 0.27049180327868855, + "management": 0.43333333333333335, + "marketing": 0.36666666666666664, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.41379310344827586, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.4228571428571429, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.4353448275862069, + "public_relations": 0.5057471264367817, + "security_study": 0.37777777777777777, + "sociology": 0.4336283185840708, + "sports_science": 0.44242424242424244, + "traditional_chinese_medicine": 0.32432432432432434, + "virology": 0.33727810650887574, + "world_history": 0.43478260869565216, + "world_religions": 0.4125 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18181818181818182 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.3333333333333333 + }, + "prompt_4": { + "accuracy": 0.21212121212121213 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.15681818181818183 + }, + "prompt_2": { + "accuracy": 0.17727272727272728 + }, + "prompt_3": { + "accuracy": 0.1409090909090909 + }, + "prompt_4": { + "accuracy": 0.12727272727272726 + }, + "prompt_5": { + "accuracy": 0.1590909090909091 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3247457627118644 + }, + "prompt_2": { + "accuracy": 0.3396610169491525 + }, + "prompt_3": { + "accuracy": 0.3298305084745763 + }, + "prompt_4": { + "accuracy": 0.336271186440678 + }, + "prompt_5": { + "accuracy": 0.3342372881355932 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7086761406133134 + }, + "prompt_2": { + "accuracy": 0.6297681376215407 + }, + "prompt_3": { + "accuracy": 0.6054599850411369 + }, + "prompt_4": { + "accuracy": 0.6679132385938669 + }, + "prompt_5": { + "accuracy": 0.6839940164547494 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5644292013718766 + }, + "prompt_2": { + "accuracy": 0.48064674179323863 + }, + "prompt_3": { + "accuracy": 0.5227829495345419 + }, + "prompt_4": { + "accuracy": 0.6060754532092112 + }, + "prompt_5": { + "accuracy": 0.5948064674179324 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3843072810264833, + "rouge2": 0.15987370565036593, + "rougeL": 0.2925118402964389, + "avg_rouge": 0.278897608991096 + }, + "prompt_2": { + "rouge1": 0.4023878592108643, + "rouge2": 0.16620787536635565, + "rougeL": 0.30608185298992013, + "avg_rouge": 0.2915591958557133 + }, + "prompt_3": { + "rouge1": 0.37635396305069785, + "rouge2": 0.14925613615632702, + "rougeL": 0.28467211272664994, + "avg_rouge": 0.27009407064455826 + }, + "prompt_4": { + "rouge1": 0.35988573525651046, + "rouge2": 0.14231135209887094, + "rougeL": 0.27263007954479473, + "avg_rouge": 0.2582757223000587 + }, + "prompt_5": { + "rouge1": 0.38756186657067154, + "rouge2": 0.15299409733325245, + "rougeL": 0.29422223121733637, + "avg_rouge": 0.27825939837375346 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.19069741450448596, + "rouge2": 0.05719677875541152, + "rougeL": 0.14596979803825583, + "avg_rouge": 0.13128799709938443 + }, + "prompt_2": { + "rouge1": 0.18998113764359664, + "rouge2": 0.05864645725407613, + "rougeL": 0.14477960464242498, + "avg_rouge": 0.13113573318003258 + }, + "prompt_3": { + "rouge1": 0.19097851481129813, + "rouge2": 0.05932823019881826, + "rougeL": 0.14504508101280333, + "avg_rouge": 0.1317839420076399 + }, + "prompt_4": { + "rouge1": 0.18996472611471077, + "rouge2": 0.0586011995554055, + "rougeL": 0.144608770995444, + "avg_rouge": 0.13105823222185342 + }, + "prompt_5": { + "rouge1": 0.17900100087415544, + "rouge2": 0.0555183423640804, + "rougeL": 0.1383728888373575, + "avg_rouge": 0.12429741069186444 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8841743119266054 + }, + "prompt_2": { + "accuracy": 0.8727064220183486 + }, + "prompt_3": { + "accuracy": 0.8738532110091743 + }, + "prompt_4": { + "accuracy": 0.8784403669724771 + }, + "prompt_5": { + "accuracy": 0.6594036697247706 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6845637583892618 + }, + "prompt_2": { + "accuracy": 0.6768935762224353 + }, + "prompt_3": { + "accuracy": 0.7066155321188878 + }, + "prompt_4": { + "accuracy": 0.713326941514861 + }, + "prompt_5": { + "accuracy": 0.5896452540747843 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.644 + }, + "prompt_2": { + "accuracy": 0.629 + }, + "prompt_3": { + "accuracy": 0.6435 + }, + "prompt_4": { + "accuracy": 0.6455 + }, + "prompt_5": { + "accuracy": 0.6215 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4445 + }, + "prompt_2": { + "accuracy": 0.4795 + }, + "prompt_3": { + "accuracy": 0.4385 + }, + "prompt_4": { + "accuracy": 0.44 + }, + "prompt_5": { + "accuracy": 0.439 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5545 + }, + "prompt_2": { + "accuracy": 0.563 + }, + "prompt_3": { + "accuracy": 0.5725 + }, + "prompt_4": { + "accuracy": 0.534 + }, + "prompt_5": { + "accuracy": 0.5175 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5774647887323944 + }, + "prompt_2": { + "accuracy": 0.5774647887323944 + }, + "prompt_3": { + "accuracy": 0.4507042253521127 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5070422535211268 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.516245487364621 + }, + "prompt_2": { + "accuracy": 0.5595667870036101 + }, + "prompt_3": { + "accuracy": 0.5848375451263538 + }, + "prompt_4": { + "accuracy": 0.5306859205776173 + }, + "prompt_5": { + "accuracy": 0.5379061371841155 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6078431372549019 + }, + "prompt_2": { + "accuracy": 0.4877450980392157 + }, + "prompt_3": { + "accuracy": 0.6642156862745098 + }, + "prompt_4": { + "accuracy": 0.46078431372549017 + }, + "prompt_5": { + "accuracy": 0.47549019607843135 + } } }, "five_shot": { @@ -5412,50 +48607,722 @@ "model_link": "https://huggingface.co/THUDM/chatglm2-6b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "overall_acc": 0.3904761904761905, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.5266666666666666, + "Vietnamese": 0.36, + "Spanish": 0.38, + "Indonesian": 0.30666666666666664, + "Filipino": 0.3333333333333333, + "Chinese": 0.5133333333333333 + }, + "consistency_score_2": 0.47809523809523813, + "consistency_score_3": 0.2868571428571428, + "consistency_score_4": 0.19428571428571426, + "consistency_score_5": 0.14222222222222222, + "consistency_score_6": 0.10952380952380954, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4066666666666667, + "Malay,Vietnamese": 0.56, + "Malay,Spanish": 0.4666666666666667, + "Malay,Indonesian": 0.6533333333333333, + "Malay,Filipino": 0.5066666666666667, + "Malay,Chinese": 0.4, + "English,Vietnamese": 0.48, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.44, + "English,Filipino": 0.4, + "English,Chinese": 0.5933333333333334, + "Vietnamese,Spanish": 0.41333333333333333, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Chinese": 0.4, + "Spanish,Indonesian": 0.47333333333333333, + "Spanish,Filipino": 0.44, + "Spanish,Chinese": 0.5, + "Indonesian,Filipino": 0.5466666666666666, + "Indonesian,Chinese": 0.37333333333333335, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.29333333333333333, + "Malay,English,Spanish": 0.28, + "Malay,English,Indonesian": 0.3, + "Malay,English,Filipino": 0.25333333333333335, + "Malay,English,Chinese": 0.26, + "Malay,Vietnamese,Spanish": 0.2866666666666667, + "Malay,Vietnamese,Indonesian": 0.43333333333333335, + "Malay,Vietnamese,Filipino": 0.3466666666666667, + "Malay,Vietnamese,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian": 0.34, + "Malay,Spanish,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese": 0.28, + "Malay,Indonesian,Filipino": 0.3933333333333333, + "Malay,Indonesian,Chinese": 0.26666666666666666, + "Malay,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish": 0.29333333333333333, + "English,Vietnamese,Indonesian": 0.31333333333333335, + "English,Vietnamese,Filipino": 0.26666666666666666, + "English,Vietnamese,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian": 0.30666666666666664, + "English,Spanish,Filipino": 0.26, + "English,Spanish,Chinese": 0.34, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.25333333333333335, + "English,Filipino,Chinese": 0.26, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese": 0.24, + "Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino": 0.31333333333333335, + "Spanish,Indonesian,Chinese": 0.25333333333333335, + "Spanish,Filipino,Chinese": 0.24, + "Indonesian,Filipino,Chinese": 0.24 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.24666666666666667, + "Malay,English,Vietnamese,Filipino": 0.20666666666666667, + "Malay,English,Vietnamese,Chinese": 0.16666666666666666, + "Malay,English,Spanish,Indonesian": 0.21333333333333335, + "Malay,English,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Chinese": 0.18666666666666668, + "Malay,English,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian": 0.24, + "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.18, + "Malay,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian": 0.22, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.18, + "English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.18, + "English,Spanish,Filipino,Chinese": 0.16, + "English,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + } + }, + "AC3_2": 0.4298663324484203, + "AC3_3": 0.3307408002082808, + "AC3_4": 0.2594695206629339, + "AC3_5": 0.20850218510190016, + "AC3_6": 0.17106575960297507, + "AC3_7": 0.141849634035474 + }, + "prompt_2": { + "overall_acc": 0.4123809523809524, + "language_acc": { + "Malay": 0.32, + "English": 0.5666666666666667, + "Vietnamese": 0.35333333333333333, + "Spanish": 0.41333333333333333, + "Indonesian": 0.36666666666666664, + "Filipino": 0.36666666666666664, + "Chinese": 0.5 + }, + "consistency_score_2": 0.4657142857142857, + "consistency_score_3": 0.27599999999999997, + "consistency_score_4": 0.18666666666666662, + "consistency_score_5": 0.13841269841269843, + "consistency_score_6": 0.11047619047619046, + "consistency_score_7": 0.09333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4066666666666667, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.43333333333333335, + "Malay,Indonesian": 0.62, + "Malay,Filipino": 0.49333333333333335, + "Malay,Chinese": 0.4, + "English,Vietnamese": 0.3933333333333333, + "English,Spanish": 0.49333333333333335, + "English,Indonesian": 0.43333333333333335, + "English,Filipino": 0.4533333333333333, + "English,Chinese": 0.6533333333333333, + "Vietnamese,Spanish": 0.4266666666666667, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.37333333333333335, + "Spanish,Indonesian": 0.4866666666666667, + "Spanish,Filipino": 0.44666666666666666, + "Spanish,Chinese": 0.5, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.36666666666666664, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.25333333333333335, + "Malay,English,Spanish": 0.22666666666666666, + "Malay,English,Indonesian": 0.28, + "Malay,English,Filipino": 0.2733333333333333, + "Malay,English,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish": 0.24666666666666667, + "Malay,Vietnamese,Indonesian": 0.36666666666666664, + "Malay,Vietnamese,Filipino": 0.34, + "Malay,Vietnamese,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.2733333333333333, + "Malay,Spanish,Chinese": 0.24666666666666667, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.28, + "English,Vietnamese,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian": 0.28, + "English,Spanish,Filipino": 0.26666666666666666, + "English,Spanish,Chinese": 0.37333333333333335, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Chinese": 0.24666666666666667, + "Spanish,Indonesian,Filipino": 0.3, + "Spanish,Indonesian,Chinese": 0.26, + "Spanish,Filipino,Chinese": 0.24, + "Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.14, + "Malay,English,Vietnamese,Indonesian": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian": 0.18, + "Malay,English,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Indonesian,Chinese": 0.18, + "Malay,English,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,Vietnamese,Spanish,Filipino": 0.2, + "Malay,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.20666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.22, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.18, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Spanish,Filipino,Chinese": 0.19333333333333333, + "English,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + } + }, + "AC3_2": 0.4374279516080711, + "AC3_3": 0.33068068617221713, + "AC3_4": 0.25700052989880484, + "AC3_5": 0.20726005211003087, + "AC3_6": 0.17426663193822028, + "AC3_7": 0.15221594472821842 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "overall_acc": 0.33441558441558433, + "language_acc": { + "English": 0.4602272727272727, + "Vietnamese": 0.2784090909090909, + "Chinese": 0.4431818181818182, + "Indonesian": 0.2897727272727273, + "Filipino": 0.26704545454545453, + "Spanish": 0.375, + "Malay": 0.22727272727272727 + }, + "consistency_score_2": 0.42261904761904756, + "consistency_score_3": 0.22467532467532464, + "consistency_score_4": 0.1353896103896104, + "consistency_score_5": 0.08739177489177488, + "consistency_score_6": 0.05844155844155845, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3693181818181818, + "English,Chinese": 0.6022727272727273, + "English,Indonesian": 0.4034090909090909, + "English,Filipino": 0.32954545454545453, + "English,Spanish": 0.6022727272727273, + "English,Malay": 0.35795454545454547, + "Vietnamese,Chinese": 0.3409090909090909, + "Vietnamese,Indonesian": 0.4943181818181818, + "Vietnamese,Filipino": 0.4034090909090909, + "Vietnamese,Spanish": 0.38636363636363635, + "Vietnamese,Malay": 0.45454545454545453, + "Chinese,Indonesian": 0.36363636363636365, + "Chinese,Filipino": 0.2897727272727273, + "Chinese,Spanish": 0.5113636363636364, + "Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino": 0.44886363636363635, + "Indonesian,Spanish": 0.39204545454545453, + "Indonesian,Malay": 0.5681818181818182, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.4772727272727273, + "Spanish,Malay": 0.3693181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2159090909090909, + "English,Vietnamese,Indonesian": 0.2215909090909091, + "English,Vietnamese,Filipino": 0.17045454545454544, + "English,Vietnamese,Spanish": 0.2556818181818182, + "English,Vietnamese,Malay": 0.19318181818181818, + "English,Chinese,Indonesian": 0.25, + "English,Chinese,Filipino": 0.18181818181818182, + "English,Chinese,Spanish": 0.4090909090909091, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.19886363636363635, + "English,Indonesian,Spanish": 0.2897727272727273, + "English,Indonesian,Malay": 0.25, + "English,Filipino,Spanish": 0.25, + "English,Filipino,Malay": 0.20454545454545456, + "English,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian": 0.1875, + "Vietnamese,Chinese,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Malay": 0.3068181818181818, + "Vietnamese,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino": 0.17613636363636365, + "Chinese,Indonesian,Spanish": 0.23295454545454544, + "Chinese,Indonesian,Malay": 0.2159090909090909, + "Chinese,Filipino,Spanish": 0.2159090909090909, + "Chinese,Filipino,Malay": 0.1590909090909091, + "Chinese,Spanish,Malay": 0.17613636363636365, + "Indonesian,Filipino,Spanish": 0.2215909090909091, + "Indonesian,Filipino,Malay": 0.3068181818181818, + "Indonesian,Spanish,Malay": 0.24431818181818182, + "Filipino,Spanish,Malay": 0.23863636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Chinese,Indonesian,Malay": 0.14772727272727273, + "English,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.14772727272727273, + "English,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.125, + "Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.3733789440944548, + "AC3_3": 0.26877535931962016, + "AC3_4": 0.19274540244867439, + "AC3_5": 0.138571178628128, + "AC3_6": 0.09949554575121138, + "AC3_7": 0.07109051467238377 + }, + "prompt_2": { + "overall_acc": 0.3400974025974026, + "language_acc": { + "English": 0.45454545454545453, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.4431818181818182, + "Indonesian": 0.26136363636363635, + "Filipino": 0.2840909090909091, + "Spanish": 0.38636363636363635, + "Malay": 0.2215909090909091 + }, + "consistency_score_2": 0.4331709956709957, + "consistency_score_3": 0.2342532467532467, + "consistency_score_4": 0.14415584415584412, + "consistency_score_5": 0.09632034632034633, + "consistency_score_6": 0.06655844155844157, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.42045454545454547, + "English,Chinese": 0.5681818181818182, + "English,Indonesian": 0.375, + "English,Filipino": 0.3409090909090909, + "English,Spanish": 0.6193181818181818, + "English,Malay": 0.3465909090909091, + "Vietnamese,Chinese": 0.3693181818181818, + "Vietnamese,Indonesian": 0.4715909090909091, + "Vietnamese,Filipino": 0.4375, + "Vietnamese,Spanish": 0.4034090909090909, + "Vietnamese,Malay": 0.4772727272727273, + "Chinese,Indonesian": 0.30113636363636365, + "Chinese,Filipino": 0.2897727272727273, + "Chinese,Spanish": 0.5397727272727273, + "Chinese,Malay": 0.26704545454545453, + "Indonesian,Filipino": 0.45454545454545453, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.5056818181818182, + "Spanish,Malay": 0.3977272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2556818181818182, + "English,Vietnamese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Filipino": 0.21022727272727273, + "English,Vietnamese,Spanish": 0.30113636363636365, + "English,Vietnamese,Malay": 0.2159090909090909, + "English,Chinese,Indonesian": 0.1875, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.3977272727272727, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.18181818181818182, + "English,Indonesian,Spanish": 0.29545454545454547, + "English,Indonesian,Malay": 0.23863636363636365, + "English,Filipino,Spanish": 0.2727272727272727, + "English,Filipino,Malay": 0.1875, + "English,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "Vietnamese,Chinese,Filipino": 0.1590909090909091, + "Vietnamese,Chinese,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,Malay": 0.32954545454545453, + "Vietnamese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Filipino,Malay": 0.2840909090909091, + "Vietnamese,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Spanish": 0.20454545454545456, + "Chinese,Indonesian,Malay": 0.17613636363636365, + "Chinese,Filipino,Spanish": 0.2159090909090909, + "Chinese,Filipino,Malay": 0.14204545454545456, + "Chinese,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish": 0.2840909090909091, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.30113636363636365, + "Filipino,Spanish,Malay": 0.2727272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Filipino,Malay": 0.13068181818181818, + "English,Vietnamese,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.125, + "Vietnamese,Chinese,Filipino,Spanish": 0.125, + "Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.1875, + "Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.125, + "Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.38103284918686264, + "AC3_3": 0.2774225844333554, + "AC3_4": 0.20248507773659272, + "AC3_5": 0.15012358995279912, + "AC3_6": 0.11132928944562527, + "AC3_7": 0.08019138753900949 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "accuracy": 0.39805825242718446 + }, + "prompt_2": { + "accuracy": 0.42718446601941745 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "accuracy": 0.42857142857142855 + }, + "prompt_2": { + "accuracy": 0.47619047619047616 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "accuracy": 0.42990654205607476 + }, + "prompt_2": { + "accuracy": 0.42990654205607476 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.5, + "film": 0.2, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.29, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.4, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.3, + "film": 0.1, + "law": 0.4, + "geography": 0.5 + } + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "bleu_score": 0.2058508751184727 + }, + "prompt_2": { + "bleu_score": 0.19313230912604182 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 @@ -5468,175 +49335,425 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "bleu_score": 0.11554135044576604 + }, + "prompt_2": { + "bleu_score": 0.12398841143203776 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, + "prompt_1": { + "bleu_score": 0.039669140570556184 + }, + "prompt_2": { + "bleu_score": 0.04511474401482585 + }, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zho2eng": { - "prompt_1": -1, + "prompt_1": { + "bleu_score": 0.15159041341037452 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_zsm2eng": { - "prompt_1": -1, + "prompt_1": { + "bleu_score": 0.06807883561591403 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.46091015169194866 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mmlu_full": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.44190203789774757, + "category_acc": { + "high_school_european_history": 0.5914634146341463, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.42424242424242425, + "medical_genetics": 0.494949494949495, + "high_school_us_history": 0.6206896551724138, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.6483050847457628, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.510548523206751, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.32323232323232326, + "high_school_biology": 0.540453074433657, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.302491103202847, + "philosophy": 0.5193548387096775, + "professional_medicine": 0.36531365313653136, + "nutrition": 0.5081967213114754, + "global_facts": 0.25252525252525254, + "machine_learning": 0.38738738738738737, + "security_studies": 0.5532786885245902, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.4288052373158756, + "prehistory": 0.4953560371517028, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.4846153846153846, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.5572916666666666, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.43209876543209874, + "high_school_geography": 0.5634517766497462, + "elementary_mathematics": 0.3395225464190981, + "human_aging": 0.5, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.5827205882352942, + "formal_logic": 0.328, + "high_school_statistics": 0.31627906976744186, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.36752136752136755, + "miscellaneous": 0.5971867007672634, + "high_school_chemistry": 0.3910891089108911, + "marketing": 0.7124463519313304, + "professional_law": 0.345075016307893, + "management": 0.6666666666666666, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.47058823529411764, + "sociology": 0.63, + "us_foreign_policy": 0.696969696969697, + "high_school_macroeconomics": 0.42159383033419023, + "computer_security": 0.5757575757575758, + "moral_scenarios": 0.203579418344519, + "moral_disputes": 0.47246376811594204, + "electrical_engineering": 0.4791666666666667, + "astronomy": 0.5099337748344371, + "college_biology": 0.42657342657342656 + } + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.46953937592867756 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c_eval_full": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.4726027397260274, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.375, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.38095238095238093, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.4827586206896552, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.5, + "high_school_chemistry": 0.5416666666666666, + "high_school_biology": 0.6666666666666666, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.7083333333333334, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.4666666666666667, + "business_administration": 0.42105263157894735, + "marxism": 0.6666666666666666, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.6666666666666666, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.6071428571428571, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.25925925925925924, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.5, + "art_studies": 0.7368421052631579, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.25, + "high_school_history": 0.56, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.36538461538461536, + "sports_science": 0.375, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.5925925925925926, + "urban_and_rural_planner": 0.49019607843137253, + "accountant": 0.4444444444444444, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.37037037037037035, + "physician": 0.5 + } + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.5663082437275986 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cmmlu_full": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.47824209981005006, + "category_acc": { + "agronomy": 0.42011834319526625, + "anatomy": 0.43243243243243246, + "ancient_chinese": 0.2804878048780488, + "arts": 0.675, + "astronomy": 0.2727272727272727, + "business_ethics": 0.44019138755980863, + "chinese_civil_service_exam": 0.48125, + "chinese_driving_rule": 0.6183206106870229, + "chinese_food_culture": 0.41911764705882354, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.48297213622291024, + "chinese_literature": 0.38235294117647056, + "chinese_teacher_qualification": 0.5754189944134078, + "clinical_knowledge": 0.4177215189873418, + "college_actuarial_science": 0.330188679245283, + "college_education": 0.616822429906542, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.4074074074074074, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.46153846153846156, + "computer_science": 0.44607843137254904, + "computer_security": 0.5730994152046783, + "conceptual_physics": 0.6054421768707483, + "construction_project_management": 0.3381294964028777, + "economics": 0.4528301886792453, + "education": 0.5644171779141104, + "electrical_engineering": 0.436046511627907, + "elementary_chinese": 0.4166666666666667, + "elementary_commonsense": 0.5, + "elementary_information_and_technology": 0.6428571428571429, + "elementary_mathematics": 0.40869565217391307, + "ethnology": 0.45925925925925926, + "food_science": 0.44755244755244755, + "genetics": 0.4034090909090909, + "global_facts": 0.5637583892617449, + "high_school_biology": 0.5502958579881657, + "high_school_chemistry": 0.42424242424242425, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.32926829268292684, + "high_school_physics": 0.42727272727272725, + "high_school_politics": 0.5454545454545454, + "human_sexuality": 0.5, + "international_law": 0.3891891891891892, + "journalism": 0.47093023255813954, + "jurisprudence": 0.4768856447688564, + "legal_and_moral_basis": 0.8364485981308412, + "logical": 0.4146341463414634, + "machine_learning": 0.39344262295081966, + "management": 0.6285714285714286, + "marketing": 0.5277777777777778, + "marxist_theory": 0.5767195767195767, + "modern_chinese": 0.3793103448275862, + "nutrition": 0.4827586206896552, + "philosophy": 0.580952380952381, + "professional_accounting": 0.5542857142857143, + "professional_law": 0.3886255924170616, + "professional_medicine": 0.3484042553191489, + "professional_psychology": 0.5775862068965517, + "public_relations": 0.5, + "security_study": 0.6222222222222222, + "sociology": 0.5265486725663717, + "sports_science": 0.4666666666666667, + "traditional_chinese_medicine": 0.44324324324324327, + "virology": 0.47337278106508873, + "world_history": 0.5652173913043478, + "world_religions": 0.55625 + } + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "zbench": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.42424242424242425 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ind_emotion": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.21363636363636362 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "ocnli": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.34508474576271186 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "c3": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.737097980553478 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dream": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.7687408133268006 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "samsum": { - "prompt_1": -1, + "prompt_1": { + "rouge1": 0.37124840060817865, + "rouge2": 0.15019293414024326, + "rougeL": 0.2818319815775708, + "avg_rouge": 0.2677577721086642 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "dialogsum": { - "prompt_1": -1, + "prompt_1": { + "rouge1": 0.19256837023842613, + "rouge2": 0.0594379521412535, + "rougeL": 0.14637886246068085, + "avg_rouge": 0.13279506161345347 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "sst2": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.8818807339449541 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "cola": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.6874400767018217 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qqp": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.6575 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mnli": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.586 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "qnli": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.6465 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5492957746478874 + }, + "prompt_2": { + "accuracy": 0.4788732394366197 + }, + "prompt_3": { + "accuracy": 0.5070422535211268 + }, + "prompt_4": { + "accuracy": 0.4647887323943662 + }, + "prompt_5": { + "accuracy": 0.4647887323943662 + } }, "rte": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.6209386281588448 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "mrpc": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.47549019607843135 + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, @@ -5785,10 +49902,38 @@ "prompt_5": -1 }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, + "prompt_1": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.4, + "demographics": 0.6, + "biology": 0.3, + "history": 0.4, + "literature": 0.1, + "politics": 0.6, + "culture": 0.1, + "film": 0.3, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.3, + "history": 0.5333333333333333, + "literature": 0.1, + "politics": 0.4, + "culture": 0.2, + "film": 0.3, + "law": 0.4, + "geography": 0.6 + } + }, "prompt_5": -1 }, "sing2eng": { @@ -5884,9 +50029,15 @@ }, "ind_emotion": { "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, + "prompt_2": { + "accuracy": 0.3795454545454545 + }, + "prompt_3": { + "accuracy": 0.3568181818181818 + }, + "prompt_4": { + "accuracy": 0.4113636363636364 + }, "prompt_5": -1 }, "ocnli": { @@ -5960,10 +50111,18 @@ "prompt_5": -1 }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, + "prompt_1": { + "accuracy": 0.39436619718309857 + }, + "prompt_2": { + "accuracy": 0.5070422535211268 + }, + "prompt_3": { + "accuracy": 0.49295774647887325 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, "prompt_5": -1 }, "rte": { @@ -6088,53 +50247,1733 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan-7B", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.26666666666666666, + "language_acc": { + "Malay": 0.3, + "English": 0.2733333333333333, + "Vietnamese": 0.31333333333333335, + "Spanish": 0.22, + "Indonesian": 0.26, + "Filipino": 0.23333333333333334, + "Chinese": 0.26666666666666666 + }, + "consistency_score_2": 0.38603174603174606, + "consistency_score_3": 0.19123809523809526, + "consistency_score_4": 0.10895238095238094, + "consistency_score_5": 0.06857142857142857, + "consistency_score_6": 0.04666666666666667, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.44, + "Malay,Vietnamese": 0.3, + "Malay,Spanish": 0.37333333333333335, + "Malay,Indonesian": 0.5533333333333333, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.31333333333333335, + "English,Vietnamese": 0.26666666666666666, + "English,Spanish": 0.3933333333333333, + "English,Indonesian": 0.5066666666666667, + "English,Filipino": 0.44, + "English,Chinese": 0.4, + "Vietnamese,Spanish": 0.32666666666666666, + "Vietnamese,Indonesian": 0.34, + "Vietnamese,Filipino": 0.3466666666666667, + "Vietnamese,Chinese": 0.3, + "Spanish,Indonesian": 0.46, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.38666666666666666, + "Indonesian,Filipino": 0.4266666666666667, + "Indonesian,Chinese": 0.32666666666666666, + "Filipino,Chinese": 0.34 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.12, + "Malay,English,Spanish": 0.23333333333333334, + "Malay,English,Indonesian": 0.30666666666666664, + "Malay,English,Filipino": 0.28, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.12666666666666668, + "Malay,Vietnamese,Indonesian": 0.18, + "Malay,Vietnamese,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Chinese": 0.1, + "Malay,Spanish,Indonesian": 0.28, + "Malay,Spanish,Filipino": 0.22666666666666666, + "Malay,Spanish,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino": 0.31333333333333335, + "Malay,Indonesian,Chinese": 0.19333333333333333, + "Malay,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish": 0.12666666666666668, + "English,Vietnamese,Indonesian": 0.16, + "English,Vietnamese,Filipino": 0.14, + "English,Vietnamese,Chinese": 0.12, + "English,Spanish,Indonesian": 0.28, + "English,Spanish,Filipino": 0.22, + "English,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino": 0.2733333333333333, + "English,Indonesian,Chinese": 0.22, + "English,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino": 0.25333333333333335, + "Spanish,Indonesian,Chinese": 0.18666666666666668, + "Spanish,Filipino,Chinese": 0.18, + "Indonesian,Filipino,Chinese": 0.16 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.08, + "Malay,English,Vietnamese,Indonesian": 0.09333333333333334, + "Malay,English,Vietnamese,Filipino": 0.08, + "Malay,English,Vietnamese,Chinese": 0.06, + "Malay,English,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Indonesian,Chinese": 0.14, + "Malay,English,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.1, + "Malay,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.06, + "Malay,Spanish,Indonesian,Filipino": 0.18, + "Malay,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.06, + "English,Vietnamese,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "English,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + } + }, + "AC3_2": 0.31543450060018063, + "AC3_3": 0.22273987793249928, + "AC3_4": 0.1546991209865363, + "AC3_5": 0.10909090905836778, + "AC3_6": 0.07943262408812433, + "AC3_7": 0.05925925923950617 + }, + "prompt_2": { + "overall_acc": 0.29714285714285715, + "language_acc": { + "Malay": 0.3, + "English": 0.36, + "Vietnamese": 0.24666666666666667, + "Spanish": 0.30666666666666664, + "Indonesian": 0.2733333333333333, + "Filipino": 0.2866666666666667, + "Chinese": 0.30666666666666664 + }, + "consistency_score_2": 0.3196825396825396, + "consistency_score_3": 0.12857142857142856, + "consistency_score_4": 0.06057142857142856, + "consistency_score_5": 0.033015873015873005, + "consistency_score_6": 0.02, + "consistency_score_7": 0.013333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.34, + "Malay,Vietnamese": 0.3333333333333333, + "Malay,Spanish": 0.3, + "Malay,Indonesian": 0.36666666666666664, + "Malay,Filipino": 0.38, + "Malay,Chinese": 0.31333333333333335, + "English,Vietnamese": 0.34, + "English,Spanish": 0.3466666666666667, + "English,Indonesian": 0.3933333333333333, + "English,Filipino": 0.32666666666666666, + "English,Chinese": 0.32666666666666666, + "Vietnamese,Spanish": 0.25333333333333335, + "Vietnamese,Indonesian": 0.37333333333333335, + "Vietnamese,Filipino": 0.26666666666666666, + "Vietnamese,Chinese": 0.3, + "Spanish,Indonesian": 0.3, + "Spanish,Filipino": 0.22666666666666666, + "Spanish,Chinese": 0.26666666666666666, + "Indonesian,Filipino": 0.3466666666666667, + "Indonesian,Chinese": 0.3, + "Filipino,Chinese": 0.31333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.15333333333333332, + "Malay,English,Spanish": 0.14, + "Malay,English,Indonesian": 0.18, + "Malay,English,Filipino": 0.16, + "Malay,English,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish": 0.1, + "Malay,Vietnamese,Indonesian": 0.14666666666666667, + "Malay,Vietnamese,Filipino": 0.12, + "Malay,Vietnamese,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian": 0.13333333333333333, + "Malay,Spanish,Filipino": 0.13333333333333333, + "Malay,Spanish,Chinese": 0.12, + "Malay,Indonesian,Filipino": 0.16, + "Malay,Indonesian,Chinese": 0.14666666666666667, + "Malay,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish": 0.11333333333333333, + "English,Vietnamese,Indonesian": 0.16666666666666666, + "English,Vietnamese,Filipino": 0.12666666666666668, + "English,Vietnamese,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian": 0.17333333333333334, + "English,Spanish,Filipino": 0.11333333333333333, + "English,Spanish,Chinese": 0.13333333333333333, + "English,Indonesian,Filipino": 0.16, + "English,Indonesian,Chinese": 0.14666666666666667, + "English,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Vietnamese,Spanish,Filipino": 0.08, + "Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Vietnamese,Indonesian,Filipino": 0.14, + "Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Spanish,Indonesian,Filipino": 0.11333333333333333, + "Spanish,Indonesian,Chinese": 0.10666666666666667, + "Spanish,Filipino,Chinese": 0.06666666666666667, + "Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.08, + "Malay,English,Vietnamese,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian": 0.08, + "Malay,English,Spanish,Filipino": 0.08, + "Malay,English,Spanish,Chinese": 0.05333333333333334, + "Malay,English,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.06, + "Malay,Spanish,Filipino,Chinese": 0.04, + "Malay,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian": 0.07333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.05333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.04, + "English,Spanish,Indonesian,Filipino": 0.06666666666666667, + "English,Spanish,Indonesian,Chinese": 0.08, + "English,Spanish,Filipino,Chinese": 0.05333333333333334, + "English,Indonesian,Filipino,Chinese": 0.06, + "Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.04 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.04, + "Malay,English,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + } + }, + "AC3_2": 0.3080008822381338, + "AC3_3": 0.17948226266157893, + "AC3_4": 0.10062984935571163, + "AC3_5": 0.05942857141057142, + "AC3_6": 0.03747747746566025, + "AC3_7": 0.02552147238441793 + }, + "prompt_3": { + "overall_acc": 0.2676190476190476, + "language_acc": { + "Malay": 0.28, + "English": 0.29333333333333333, + "Vietnamese": 0.24, + "Spanish": 0.24, + "Indonesian": 0.26, + "Filipino": 0.24, + "Chinese": 0.32 + }, + "consistency_score_2": 0.48952380952380975, + "consistency_score_3": 0.3100952380952381, + "consistency_score_4": 0.2196190476190476, + "consistency_score_5": 0.16666666666666666, + "consistency_score_6": 0.13142857142857142, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6266666666666667, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.4866666666666667, + "Malay,Indonesian": 0.7, + "Malay,Filipino": 0.5333333333333333, + "Malay,Chinese": 0.4266666666666667, + "English,Vietnamese": 0.4533333333333333, + "English,Spanish": 0.49333333333333335, + "English,Indonesian": 0.6266666666666667, + "English,Filipino": 0.5466666666666666, + "English,Chinese": 0.47333333333333333, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Filipino": 0.44, + "Vietnamese,Chinese": 0.32, + "Spanish,Indonesian": 0.47333333333333333, + "Spanish,Filipino": 0.49333333333333335, + "Spanish,Chinese": 0.43333333333333335, + "Indonesian,Filipino": 0.54, + "Indonesian,Chinese": 0.41333333333333333, + "Filipino,Chinese": 0.43333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.3466666666666667, + "Malay,English,Spanish": 0.35333333333333333, + "Malay,English,Indonesian": 0.52, + "Malay,English,Filipino": 0.41333333333333333, + "Malay,English,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish": 0.28, + "Malay,Vietnamese,Indonesian": 0.38, + "Malay,Vietnamese,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian": 0.36666666666666664, + "Malay,Spanish,Filipino": 0.32666666666666666, + "Malay,Spanish,Chinese": 0.26, + "Malay,Indonesian,Filipino": 0.44, + "Malay,Indonesian,Chinese": 0.29333333333333333, + "Malay,Filipino,Chinese": 0.28, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.34, + "English,Vietnamese,Filipino": 0.3, + "English,Vietnamese,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian": 0.36, + "English,Spanish,Filipino": 0.3333333333333333, + "English,Spanish,Chinese": 0.26666666666666666, + "English,Indonesian,Filipino": 0.4266666666666667, + "English,Indonesian,Chinese": 0.30666666666666664, + "English,Filipino,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian": 0.28, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino": 0.3333333333333333, + "Spanish,Indonesian,Chinese": 0.24666666666666667, + "Spanish,Filipino,Chinese": 0.26, + "Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.30666666666666664, + "Malay,English,Vietnamese,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Chinese": 0.18, + "Malay,English,Spanish,Indonesian": 0.31333333333333335, + "Malay,English,Spanish,Filipino": 0.26666666666666666, + "Malay,English,Spanish,Chinese": 0.19333333333333333, + "Malay,English,Indonesian,Filipino": 0.37333333333333335, + "Malay,English,Indonesian,Chinese": 0.26, + "Malay,English,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino": 0.28, + "Malay,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.22, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.26, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "English,Spanish,Filipino,Chinese": 0.2, + "English,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.22, + "Malay,English,Vietnamese,Spanish,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14, + "Malay,English,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.346053309328359, + "AC3_3": 0.28729562113313184, + "AC3_4": 0.24125470042332486, + "AC3_5": 0.20540935667784796, + "AC3_6": 0.1762836685545283, + "AC3_7": 0.15253604745712565 + }, + "prompt_4": { + "overall_acc": 0.24000000000000002, + "language_acc": { + "Malay": 0.22, + "English": 0.32, + "Vietnamese": 0.2, + "Spanish": 0.3, + "Indonesian": 0.23333333333333334, + "Filipino": 0.22, + "Chinese": 0.18666666666666668 + }, + "consistency_score_2": 0.26571428571428574, + "consistency_score_3": 0.08000000000000002, + "consistency_score_4": 0.02876190476190476, + "consistency_score_5": 0.012698412698412695, + "consistency_score_6": 0.00761904761904762, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.30666666666666664, + "Malay,Vietnamese": 0.24666666666666667, + "Malay,Spanish": 0.2866666666666667, + "Malay,Indonesian": 0.3, + "Malay,Filipino": 0.23333333333333334, + "Malay,Chinese": 0.26666666666666666, + "English,Vietnamese": 0.24, + "English,Spanish": 0.3333333333333333, + "English,Indonesian": 0.2866666666666667, + "English,Filipino": 0.2733333333333333, + "English,Chinese": 0.28, + "Vietnamese,Spanish": 0.3, + "Vietnamese,Indonesian": 0.19333333333333333, + "Vietnamese,Filipino": 0.22, + "Vietnamese,Chinese": 0.16, + "Spanish,Indonesian": 0.3333333333333333, + "Spanish,Filipino": 0.2866666666666667, + "Spanish,Chinese": 0.25333333333333335, + "Indonesian,Filipino": 0.22666666666666666, + "Indonesian,Chinese": 0.31333333333333335, + "Filipino,Chinese": 0.24 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.08666666666666667, + "Malay,English,Spanish": 0.1, + "Malay,English,Indonesian": 0.12, + "Malay,English,Filipino": 0.06666666666666667, + "Malay,English,Chinese": 0.08, + "Malay,Vietnamese,Spanish": 0.09333333333333334, + "Malay,Vietnamese,Indonesian": 0.08, + "Malay,Vietnamese,Filipino": 0.06, + "Malay,Vietnamese,Chinese": 0.05333333333333334, + "Malay,Spanish,Indonesian": 0.12666666666666668, + "Malay,Spanish,Filipino": 0.08666666666666667, + "Malay,Spanish,Chinese": 0.09333333333333334, + "Malay,Indonesian,Filipino": 0.09333333333333334, + "Malay,Indonesian,Chinese": 0.08666666666666667, + "Malay,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish": 0.10666666666666667, + "English,Vietnamese,Indonesian": 0.07333333333333333, + "English,Vietnamese,Filipino": 0.07333333333333333, + "English,Vietnamese,Chinese": 0.04, + "English,Spanish,Indonesian": 0.10666666666666667, + "English,Spanish,Filipino": 0.10666666666666667, + "English,Spanish,Chinese": 0.08666666666666667, + "English,Indonesian,Filipino": 0.1, + "English,Indonesian,Chinese": 0.10666666666666667, + "English,Filipino,Chinese": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian": 0.08, + "Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Vietnamese,Spanish,Chinese": 0.04666666666666667, + "Vietnamese,Indonesian,Filipino": 0.05333333333333334, + "Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Vietnamese,Filipino,Chinese": 0.04, + "Spanish,Indonesian,Filipino": 0.10666666666666667, + "Spanish,Indonesian,Chinese": 0.11333333333333333, + "Spanish,Filipino,Chinese": 0.06666666666666667, + "Indonesian,Filipino,Chinese": 0.06 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.04666666666666667, + "Malay,English,Vietnamese,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian": 0.04, + "Malay,English,Spanish,Filipino": 0.04, + "Malay,English,Spanish,Chinese": 0.02, + "Malay,English,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Indonesian,Chinese": 0.02, + "Malay,English,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.04, + "Malay,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.02, + "Malay,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.04, + "Malay,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian": 0.03333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.03333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.02, + "English,Vietnamese,Indonesian,Filipino": 0.04, + "English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "English,Spanish,Indonesian,Filipino": 0.05333333333333334, + "English,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Spanish,Filipino,Chinese": 0.02666666666666667, + "English,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.02, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + } + }, + "AC3_2": 0.2522033897806377, + "AC3_3": 0.1199999999625, + "AC3_4": 0.05136782421901622, + "AC3_5": 0.024120603005530157, + "AC3_6": 0.014769230763266274, + "AC3_7": 0.012972972967713662 + }, + "prompt_5": { + "overall_acc": 0.27999999999999997, + "language_acc": { + "Malay": 0.26666666666666666, + "English": 0.32, + "Vietnamese": 0.24666666666666667, + "Spanish": 0.26666666666666666, + "Indonesian": 0.26666666666666666, + "Filipino": 0.28, + "Chinese": 0.31333333333333335 + }, + "consistency_score_2": 0.44857142857142857, + "consistency_score_3": 0.2615238095238096, + "consistency_score_4": 0.17847619047619043, + "consistency_score_5": 0.1349206349206349, + "consistency_score_6": 0.10952380952380954, + "consistency_score_7": 0.09333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.52, + "Malay,Vietnamese": 0.44666666666666666, + "Malay,Spanish": 0.5133333333333333, + "Malay,Indonesian": 0.5, + "Malay,Filipino": 0.36, + "Malay,Chinese": 0.44, + "English,Vietnamese": 0.4266666666666667, + "English,Spanish": 0.48, + "English,Indonesian": 0.47333333333333333, + "English,Filipino": 0.4066666666666667, + "English,Chinese": 0.5333333333333333, + "Vietnamese,Spanish": 0.5066666666666667, + "Vietnamese,Indonesian": 0.43333333333333335, + "Vietnamese,Filipino": 0.38666666666666666, + "Vietnamese,Chinese": 0.44666666666666666, + "Spanish,Indonesian": 0.52, + "Spanish,Filipino": 0.35333333333333333, + "Spanish,Chinese": 0.42, + "Indonesian,Filipino": 0.43333333333333335, + "Indonesian,Chinese": 0.44666666666666666, + "Filipino,Chinese": 0.37333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.32666666666666666, + "Malay,English,Indonesian": 0.32666666666666666, + "Malay,English,Filipino": 0.24, + "Malay,English,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish": 0.30666666666666664, + "Malay,Vietnamese,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.22, + "Malay,Spanish,Chinese": 0.26, + "Malay,Indonesian,Filipino": 0.26, + "Malay,Indonesian,Chinese": 0.2733333333333333, + "Malay,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish": 0.2866666666666667, + "English,Vietnamese,Indonesian": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian": 0.29333333333333333, + "English,Spanish,Filipino": 0.23333333333333334, + "English,Spanish,Chinese": 0.30666666666666664, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.3, + "English,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.22, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Chinese": 0.24, + "Vietnamese,Filipino,Chinese": 0.20666666666666667, + "Spanish,Indonesian,Filipino": 0.28, + "Spanish,Indonesian,Chinese": 0.26666666666666666, + "Spanish,Filipino,Chinese": 0.17333333333333334, + "Indonesian,Filipino,Chinese": 0.22 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22, + "Malay,English,Vietnamese,Indonesian": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Chinese": 0.22, + "Malay,English,Indonesian,Filipino": 0.2, + "Malay,English,Indonesian,Chinese": 0.22, + "Malay,English,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian": 0.22, + "Malay,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.12, + "Malay,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian,Filipino": 0.2, + "English,Spanish,Indonesian,Chinese": 0.2, + "English,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Indonesian,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.16, + "Malay,English,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + } + }, + "AC3_2": 0.34478431367816686, + "AC3_3": 0.27044671117059993, + "AC3_4": 0.21799750722291283, + "AC3_5": 0.18209640393468982, + "AC3_6": 0.15745721267351345, + "AC3_7": 0.1399999999625 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.26379870129870125, + "language_acc": { + "English": 0.3522727272727273, + "Vietnamese": 0.23295454545454544, + "Chinese": 0.2556818181818182, + "Indonesian": 0.22727272727272727, + "Filipino": 0.23295454545454544, + "Spanish": 0.26704545454545453, + "Malay": 0.2784090909090909 + }, + "consistency_score_2": 0.3506493506493506, + "consistency_score_3": 0.14545454545454548, + "consistency_score_4": 0.06250000000000001, + "consistency_score_5": 0.025974025974025972, + "consistency_score_6": 0.008928571428571428, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2215909090909091, + "English,Chinese": 0.4318181818181818, + "English,Indonesian": 0.4375, + "English,Filipino": 0.35795454545454547, + "English,Spanish": 0.38636363636363635, + "English,Malay": 0.4375, + "Vietnamese,Chinese": 0.2727272727272727, + "Vietnamese,Indonesian": 0.26136363636363635, + "Vietnamese,Filipino": 0.2897727272727273, + "Vietnamese,Spanish": 0.3181818181818182, + "Vietnamese,Malay": 0.2215909090909091, + "Chinese,Indonesian": 0.35795454545454547, + "Chinese,Filipino": 0.3181818181818182, + "Chinese,Spanish": 0.3693181818181818, + "Chinese,Malay": 0.3522727272727273, + "Indonesian,Filipino": 0.3409090909090909, + "Indonesian,Spanish": 0.3465909090909091, + "Indonesian,Malay": 0.5511363636363636, + "Filipino,Spanish": 0.3465909090909091, + "Filipino,Malay": 0.3693181818181818, + "Spanish,Malay": 0.375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.10227272727272728, + "English,Vietnamese,Indonesian": 0.09659090909090909, + "English,Vietnamese,Filipino": 0.0625, + "English,Vietnamese,Spanish": 0.09659090909090909, + "English,Vietnamese,Malay": 0.07954545454545454, + "English,Chinese,Indonesian": 0.1875, + "English,Chinese,Filipino": 0.1590909090909091, + "English,Chinese,Spanish": 0.18181818181818182, + "English,Chinese,Malay": 0.20454545454545456, + "English,Indonesian,Filipino": 0.16477272727272727, + "English,Indonesian,Spanish": 0.17045454545454544, + "English,Indonesian,Malay": 0.2784090909090909, + "English,Filipino,Spanish": 0.16477272727272727, + "English,Filipino,Malay": 0.19318181818181818, + "English,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian": 0.10227272727272728, + "Vietnamese,Chinese,Filipino": 0.08522727272727272, + "Vietnamese,Chinese,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,Malay": 0.125, + "Vietnamese,Filipino,Spanish": 0.125, + "Vietnamese,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino": 0.11363636363636363, + "Chinese,Indonesian,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Malay": 0.2159090909090909, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.13068181818181818, + "Chinese,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish": 0.14204545454545456, + "Indonesian,Filipino,Malay": 0.2159090909090909, + "Indonesian,Spanish,Malay": 0.21022727272727273, + "Filipino,Spanish,Malay": 0.16477272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino": 0.028409090909090908, + "English,Vietnamese,Indonesian,Spanish": 0.045454545454545456, + "English,Vietnamese,Indonesian,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Chinese,Indonesian,Malay": 0.11363636363636363, + "English,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Chinese,Filipino,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.30108596814396843, + "AC3_3": 0.18751577652192108, + "AC3_4": 0.10105721389937751, + "AC3_5": 0.04729164391397914, + "AC3_6": 0.017272534007272175, + "AC3_7": 0.0 + }, + "prompt_2": { + "overall_acc": 0.2581168831168831, + "language_acc": { + "English": 0.22727272727272727, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.3125, + "Indonesian": 0.26136363636363635, + "Filipino": 0.2215909090909091, + "Spanish": 0.2556818181818182, + "Malay": 0.2556818181818182 + }, + "consistency_score_2": 0.2491883116883117, + "consistency_score_3": 0.0603896103896104, + "consistency_score_4": 0.012499999999999995, + "consistency_score_5": 0.0016233766233766235, + "consistency_score_6": 0.0, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.21022727272727273, + "English,Chinese": 0.29545454545454547, + "English,Indonesian": 0.26136363636363635, + "English,Filipino": 0.2840909090909091, + "English,Spanish": 0.20454545454545456, + "English,Malay": 0.19886363636363635, + "Vietnamese,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian": 0.29545454545454547, + "Vietnamese,Filipino": 0.1875, + "Vietnamese,Spanish": 0.3125, + "Vietnamese,Malay": 0.2897727272727273, + "Chinese,Indonesian": 0.2897727272727273, + "Chinese,Filipino": 0.21022727272727273, + "Chinese,Spanish": 0.25, + "Chinese,Malay": 0.19318181818181818, + "Indonesian,Filipino": 0.24431818181818182, + "Indonesian,Spanish": 0.32386363636363635, + "Indonesian,Malay": 0.2840909090909091, + "Filipino,Spanish": 0.2215909090909091, + "Filipino,Malay": 0.25, + "Spanish,Malay": 0.2215909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.03977272727272727, + "English,Vietnamese,Indonesian": 0.07954545454545454, + "English,Vietnamese,Filipino": 0.05113636363636364, + "English,Vietnamese,Spanish": 0.045454545454545456, + "English,Vietnamese,Malay": 0.056818181818181816, + "English,Chinese,Indonesian": 0.08522727272727272, + "English,Chinese,Filipino": 0.08522727272727272, + "English,Chinese,Spanish": 0.06818181818181818, + "English,Chinese,Malay": 0.045454545454545456, + "English,Indonesian,Filipino": 0.0625, + "English,Indonesian,Spanish": 0.05113636363636364, + "English,Indonesian,Malay": 0.056818181818181816, + "English,Filipino,Spanish": 0.0625, + "English,Filipino,Malay": 0.056818181818181816, + "English,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian": 0.07386363636363637, + "Vietnamese,Chinese,Filipino": 0.028409090909090908, + "Vietnamese,Chinese,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish": 0.13068181818181818, + "Vietnamese,Indonesian,Malay": 0.09090909090909091, + "Vietnamese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino": 0.06818181818181818, + "Chinese,Indonesian,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish": 0.045454545454545456, + "Chinese,Filipino,Malay": 0.03409090909090909, + "Chinese,Spanish,Malay": 0.045454545454545456, + "Indonesian,Filipino,Spanish": 0.07386363636363637, + "Indonesian,Filipino,Malay": 0.03409090909090909, + "Indonesian,Spanish,Malay": 0.08522727272727272, + "Filipino,Spanish,Malay": 0.05113636363636364 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Indonesian,Malay": 0.028409090909090908, + "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Chinese,Indonesian,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Malay": 0.017045454545454544, + "English,Chinese,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Chinese,Spanish,Malay": 0.005681818181818182, + "English,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Indonesian,Filipino,Malay": 0.0, + "English,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.0, + "Vietnamese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.25357402592404144, + "AC3_3": 0.09787918500441768, + "AC3_4": 0.023845230944997794, + "AC3_5": 0.0032264610377188514, + "AC3_6": 0.0, + "AC3_7": 0.0 + }, + "prompt_3": { + "overall_acc": 0.27759740259740256, + "language_acc": { + "English": 0.3181818181818182, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.22727272727272727, + "Indonesian": 0.29545454545454547, + "Filipino": 0.2840909090909091, + "Spanish": 0.2159090909090909, + "Malay": 0.2727272727272727 + }, + "consistency_score_2": 0.34442640692640697, + "consistency_score_3": 0.14042207792207792, + "consistency_score_4": 0.062499999999999986, + "consistency_score_5": 0.030573593073593076, + "consistency_score_6": 0.017045454545454548, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.21022727272727273, + "English,Chinese": 0.42613636363636365, + "English,Indonesian": 0.44886363636363635, + "English,Filipino": 0.39204545454545453, + "English,Spanish": 0.3522727272727273, + "English,Malay": 0.42613636363636365, + "Vietnamese,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian": 0.23863636363636365, + "Vietnamese,Filipino": 0.2159090909090909, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Malay": 0.24431818181818182, + "Chinese,Indonesian": 0.4034090909090909, + "Chinese,Filipino": 0.26136363636363635, + "Chinese,Spanish": 0.3465909090909091, + "Chinese,Malay": 0.3806818181818182, + "Indonesian,Filipino": 0.36363636363636365, + "Indonesian,Spanish": 0.32386363636363635, + "Indonesian,Malay": 0.6647727272727273, + "Filipino,Spanish": 0.25, + "Filipino,Malay": 0.3977272727272727, + "Spanish,Malay": 0.3125 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.09659090909090909, + "English,Vietnamese,Indonesian": 0.08522727272727272, + "English,Vietnamese,Filipino": 0.06818181818181818, + "English,Vietnamese,Spanish": 0.11363636363636363, + "English,Vietnamese,Malay": 0.07386363636363637, + "English,Chinese,Indonesian": 0.2215909090909091, + "English,Chinese,Filipino": 0.14772727272727273, + "English,Chinese,Spanish": 0.18181818181818182, + "English,Chinese,Malay": 0.21022727272727273, + "English,Indonesian,Filipino": 0.2215909090909091, + "English,Indonesian,Spanish": 0.17045454545454544, + "English,Indonesian,Malay": 0.32386363636363635, + "English,Filipino,Spanish": 0.125, + "English,Filipino,Malay": 0.22727272727272727, + "English,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian": 0.08522727272727272, + "Vietnamese,Chinese,Filipino": 0.03977272727272727, + "Vietnamese,Chinese,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish": 0.10227272727272728, + "Vietnamese,Indonesian,Malay": 0.14772727272727273, + "Vietnamese,Filipino,Spanish": 0.0625, + "Vietnamese,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino": 0.125, + "Chinese,Indonesian,Spanish": 0.125, + "Chinese,Indonesian,Malay": 0.2840909090909091, + "Chinese,Filipino,Spanish": 0.07954545454545454, + "Chinese,Filipino,Malay": 0.14772727272727273, + "Chinese,Spanish,Malay": 0.11931818181818182, + "Indonesian,Filipino,Spanish": 0.10795454545454546, + "Indonesian,Filipino,Malay": 0.2727272727272727, + "Indonesian,Spanish,Malay": 0.2159090909090909, + "Filipino,Spanish,Malay": 0.10795454545454546 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Filipino,Spanish,Malay": 0.03409090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.3074219168766263, + "AC3_3": 0.18650233257422177, + "AC3_4": 0.10202863958813857, + "AC3_5": 0.05508078399179555, + "AC3_6": 0.03211870772764156, + "AC3_7": 0.021833503567520743 + }, + "prompt_4": { + "overall_acc": 0.25243506493506496, + "language_acc": { + "English": 0.22727272727272727, + "Vietnamese": 0.30113636363636365, + "Chinese": 0.23863636363636365, + "Indonesian": 0.24431818181818182, + "Filipino": 0.26136363636363635, + "Spanish": 0.23295454545454544, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.25730519480519487, + "consistency_score_3": 0.06590909090909092, + "consistency_score_4": 0.01688311688311688, + "consistency_score_5": 0.004329004329004329, + "consistency_score_6": 0.0008116883116883117, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.24431818181818182, + "English,Chinese": 0.3806818181818182, + "English,Indonesian": 0.23295454545454544, + "English,Filipino": 0.29545454545454547, + "English,Spanish": 0.29545454545454547, + "English,Malay": 0.26136363636363635, + "Vietnamese,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian": 0.23295454545454544, + "Vietnamese,Filipino": 0.2727272727272727, + "Vietnamese,Spanish": 0.21022727272727273, + "Vietnamese,Malay": 0.23295454545454544, + "Chinese,Indonesian": 0.24431818181818182, + "Chinese,Filipino": 0.26136363636363635, + "Chinese,Spanish": 0.2840909090909091, + "Chinese,Malay": 0.1875, + "Indonesian,Filipino": 0.24431818181818182, + "Indonesian,Spanish": 0.29545454545454547, + "Indonesian,Malay": 0.23295454545454544, + "Filipino,Spanish": 0.23863636363636365, + "Filipino,Malay": 0.2556818181818182, + "Spanish,Malay": 0.2556818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.07954545454545454, + "English,Vietnamese,Indonesian": 0.0625, + "English,Vietnamese,Filipino": 0.07386363636363637, + "English,Vietnamese,Spanish": 0.056818181818181816, + "English,Vietnamese,Malay": 0.05113636363636364, + "English,Chinese,Indonesian": 0.09090909090909091, + "English,Chinese,Filipino": 0.09659090909090909, + "English,Chinese,Spanish": 0.13068181818181818, + "English,Chinese,Malay": 0.07386363636363637, + "English,Indonesian,Filipino": 0.045454545454545456, + "English,Indonesian,Spanish": 0.09090909090909091, + "English,Indonesian,Malay": 0.06818181818181818, + "English,Filipino,Spanish": 0.0625, + "English,Filipino,Malay": 0.07954545454545454, + "English,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian": 0.0625, + "Vietnamese,Chinese,Filipino": 0.0625, + "Vietnamese,Chinese,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Spanish": 0.03409090909090909, + "Vietnamese,Indonesian,Malay": 0.056818181818181816, + "Vietnamese,Filipino,Spanish": 0.0625, + "Vietnamese,Filipino,Malay": 0.0625, + "Vietnamese,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino": 0.056818181818181816, + "Chinese,Indonesian,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Malay": 0.03409090909090909, + "Chinese,Filipino,Spanish": 0.07954545454545454, + "Chinese,Filipino,Malay": 0.05113636363636364, + "Chinese,Spanish,Malay": 0.056818181818181816, + "Indonesian,Filipino,Spanish": 0.06818181818181818, + "Indonesian,Filipino,Malay": 0.045454545454545456, + "Indonesian,Spanish,Malay": 0.06818181818181818, + "Filipino,Spanish,Malay": 0.07386363636363637 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.011363636363636364, + "English,Vietnamese,Indonesian,Spanish": 0.017045454545454544, + "English,Vietnamese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Chinese,Indonesian,Spanish": 0.03977272727272727, + "English,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Chinese,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.011363636363636364, + "Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.25484686486852554, + "AC3_3": 0.10452691109785016, + "AC3_4": 0.03164948371729801, + "AC3_5": 0.008512035905981064, + "AC3_6": 0.0016181734925345222, + "AC3_7": 0.0 + }, + "prompt_5": { + "overall_acc": 0.273538961038961, + "language_acc": { + "English": 0.3465909090909091, + "Vietnamese": 0.23863636363636365, + "Chinese": 0.2727272727272727, + "Indonesian": 0.2897727272727273, + "Filipino": 0.2556818181818182, + "Spanish": 0.24431818181818182, + "Malay": 0.26704545454545453 + }, + "consistency_score_2": 0.30681818181818177, + "consistency_score_3": 0.10925324675324674, + "consistency_score_4": 0.04042207792207792, + "consistency_score_5": 0.013528138528138525, + "consistency_score_6": 0.003246753246753247, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2897727272727273, + "English,Chinese": 0.3806818181818182, + "English,Indonesian": 0.3125, + "English,Filipino": 0.21022727272727273, + "English,Spanish": 0.38636363636363635, + "English,Malay": 0.3352272727272727, + "Vietnamese,Chinese": 0.36363636363636365, + "Vietnamese,Indonesian": 0.2840909090909091, + "Vietnamese,Filipino": 0.26136363636363635, + "Vietnamese,Spanish": 0.4034090909090909, + "Vietnamese,Malay": 0.2727272727272727, + "Chinese,Indonesian": 0.32954545454545453, + "Chinese,Filipino": 0.26704545454545453, + "Chinese,Spanish": 0.39204545454545453, + "Chinese,Malay": 0.24431818181818182, + "Indonesian,Filipino": 0.2784090909090909, + "Indonesian,Spanish": 0.30113636363636365, + "Indonesian,Malay": 0.29545454545454547, + "Filipino,Spanish": 0.22727272727272727, + "Filipino,Malay": 0.2727272727272727, + "Spanish,Malay": 0.3352272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13636363636363635, + "English,Vietnamese,Indonesian": 0.09659090909090909, + "English,Vietnamese,Filipino": 0.05113636363636364, + "English,Vietnamese,Spanish": 0.14772727272727273, + "English,Vietnamese,Malay": 0.11363636363636363, + "English,Chinese,Indonesian": 0.14772727272727273, + "English,Chinese,Filipino": 0.07954545454545454, + "English,Chinese,Spanish": 0.16477272727272727, + "English,Chinese,Malay": 0.11363636363636363, + "English,Indonesian,Filipino": 0.09659090909090909, + "English,Indonesian,Spanish": 0.125, + "English,Indonesian,Malay": 0.11363636363636363, + "English,Filipino,Spanish": 0.07954545454545454, + "English,Filipino,Malay": 0.10227272727272728, + "English,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "Vietnamese,Chinese,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino": 0.09090909090909091, + "Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino": 0.11363636363636363, + "Chinese,Indonesian,Spanish": 0.14204545454545456, + "Chinese,Indonesian,Malay": 0.09659090909090909, + "Chinese,Filipino,Spanish": 0.11363636363636363, + "Chinese,Filipino,Malay": 0.07386363636363637, + "Chinese,Spanish,Malay": 0.09659090909090909, + "Indonesian,Filipino,Spanish": 0.09659090909090909, + "Indonesian,Filipino,Malay": 0.10227272727272728, + "Indonesian,Spanish,Malay": 0.10227272727272728, + "Filipino,Spanish,Malay": 0.09090909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.03977272727272727, + "English,Vietnamese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino": 0.03977272727272727, + "English,Chinese,Indonesian,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish": 0.03409090909090909, + "English,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Filipino,Spanish,Malay": 0.022727272727272728, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.28922441190184905, + "AC3_3": 0.15614225676940538, + "AC3_4": 0.07043557526060717, + "AC3_5": 0.02578124042824444, + "AC3_6": 0.006417336327041269, + "AC3_7": 0.0 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30097087378640774 + }, + "prompt_2": { + "accuracy": 0.3300970873786408 + }, + "prompt_3": { + "accuracy": 0.2621359223300971 + }, + "prompt_4": { + "accuracy": 0.22330097087378642 + }, + "prompt_5": { + "accuracy": 0.34951456310679613 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3142857142857143 + }, + "prompt_2": { + "accuracy": 0.2761904761904762 + }, + "prompt_3": { + "accuracy": 0.29523809523809524 + }, + "prompt_4": { + "accuracy": 0.3523809523809524 + }, + "prompt_5": { + "accuracy": 0.3523809523809524 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2616822429906542 + }, + "prompt_2": { + "accuracy": 0.37383177570093457 + }, + "prompt_3": { + "accuracy": 0.32710280373831774 + }, + "prompt_4": { + "accuracy": 0.16822429906542055 + }, + "prompt_5": { + "accuracy": 0.22429906542056074 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.6, + "culture": 0.2, + "film": 0.1, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.4, + "history": 0.4666666666666667, + "literature": 0.2, + "politics": 0.5, + "culture": 0.3, + "film": 0.3, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_3": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.4666666666666667, + "literature": 0.2, + "politics": 0.4, + "culture": 0.5, + "film": 0.2, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.6, + "culture": 0.3, + "film": 0.1, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.27, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.2, + "literature": 0.0, + "politics": 0.4, + "culture": 0.2, + "film": 0.0, + "law": 0.2, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.021144113947468228 + }, + "prompt_2": { + "bleu_score": 0.022958993118697445 + }, + "prompt_3": { + "bleu_score": 0.021286229844316423 + }, + "prompt_4": { + "bleu_score": 0.02363993192960348 + }, + "prompt_5": { + "bleu_score": 0.02710789302102475 + } }, "indommlu": { "prompt_1": -1, @@ -6144,179 +51983,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07454977177836561 + }, + "prompt_2": { + "bleu_score": 0.08310604498630976 + }, + "prompt_3": { + "bleu_score": 0.0772564557381875 + }, + "prompt_4": { + "bleu_score": 0.07781314070427331 + }, + "prompt_5": { + "bleu_score": 0.06576856126023248 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.024452497879287247 + }, + "prompt_2": { + "bleu_score": 0.025575811552228548 + }, + "prompt_3": { + "bleu_score": 0.024384466778407932 + }, + "prompt_4": { + "bleu_score": 0.026537642363609525 + }, + "prompt_5": { + "bleu_score": 0.022658280765940057 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.12605360193129247 + }, + "prompt_2": { + "bleu_score": 0.13498705539451716 + }, + "prompt_3": { + "bleu_score": 0.12294223265180326 + }, + "prompt_4": { + "bleu_score": 0.12119968741644492 + }, + "prompt_5": { + "bleu_score": 0.11048320873150336 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06342771693899499 + }, + "prompt_2": { + "bleu_score": 0.06681047988260326 + }, + "prompt_3": { + "bleu_score": 0.06703394513494204 + }, + "prompt_4": { + "bleu_score": 0.06865733812126519 + }, + "prompt_5": { + "bleu_score": 0.057691658472745024 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3115519253208868 + }, + "prompt_2": { + "accuracy": 0.2928821470245041 + }, + "prompt_3": { + "accuracy": 0.29171528588098017 + }, + "prompt_4": { + "accuracy": 0.30688448074679114 + }, + "prompt_5": { + "accuracy": 0.28588098016336055 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3074007865570254, + "category_acc": { + "high_school_european_history": 0.38414634146341464, + "business_ethics": 0.26262626262626265, + "clinical_knowledge": 0.29545454545454547, + "medical_genetics": 0.25252525252525254, + "high_school_us_history": 0.3448275862068966, + "high_school_physics": 0.28, + "high_school_world_history": 0.3347457627118644, + "virology": 0.26666666666666666, + "high_school_microeconomics": 0.29957805907172996, + "econometrics": 0.21238938053097345, + "college_computer_science": 0.26262626262626265, + "high_school_biology": 0.34951456310679613, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.3548387096774194, + "professional_medicine": 0.3025830258302583, + "nutrition": 0.3114754098360656, + "global_facts": 0.29292929292929293, + "machine_learning": 0.3153153153153153, + "security_studies": 0.28688524590163933, + "public_relations": 0.3302752293577982, + "professional_psychology": 0.29296235679214405, + "prehistory": 0.35294117647058826, + "anatomy": 0.373134328358209, + "human_sexuality": 0.3, + "college_medicine": 0.28488372093023256, + "high_school_government_and_politics": 0.40625, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.3395061728395062, + "high_school_geography": 0.3197969543147208, + "elementary_mathematics": 0.2519893899204244, + "human_aging": 0.25225225225225223, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.3713235294117647, + "formal_logic": 0.264, + "high_school_statistics": 0.2186046511627907, + "international_law": 0.38333333333333336, + "high_school_mathematics": 0.2936802973977695, + "high_school_computer_science": 0.2727272727272727, + "conceptual_physics": 0.2777777777777778, + "miscellaneous": 0.3900255754475703, + "high_school_chemistry": 0.23267326732673269, + "marketing": 0.3090128755364807, + "professional_law": 0.3013698630136986, + "management": 0.3235294117647059, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.32710280373831774, + "world_religions": 0.4, + "sociology": 0.345, + "us_foreign_policy": 0.3838383838383838, + "high_school_macroeconomics": 0.30077120822622105, + "computer_security": 0.30303030303030304, + "moral_scenarios": 0.2360178970917226, + "moral_disputes": 0.2898550724637681, + "electrical_engineering": 0.2708333333333333, + "astronomy": 0.3509933774834437, + "college_biology": 0.34965034965034963 + } + }, + "prompt_2": { + "accuracy": 0.30268144440471934, + "category_acc": { + "high_school_european_history": 0.2865853658536585, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.26515151515151514, + "medical_genetics": 0.23232323232323232, + "high_school_us_history": 0.3645320197044335, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.3220338983050847, + "virology": 0.23030303030303031, + "high_school_microeconomics": 0.25316455696202533, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.28802588996763756, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3274021352313167, + "philosophy": 0.3548387096774194, + "professional_medicine": 0.2952029520295203, + "nutrition": 0.2754098360655738, + "global_facts": 0.25252525252525254, + "machine_learning": 0.25225225225225223, + "security_studies": 0.3073770491803279, + "public_relations": 0.3302752293577982, + "professional_psychology": 0.31751227495908346, + "prehistory": 0.3281733746130031, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.33076923076923076, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.359375, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.30246913580246915, + "high_school_geography": 0.3197969543147208, + "elementary_mathematics": 0.246684350132626, + "human_aging": 0.36036036036036034, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.3786764705882353, + "formal_logic": 0.264, + "high_school_statistics": 0.23255813953488372, + "international_law": 0.36666666666666664, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.2727272727272727, + "conceptual_physics": 0.2905982905982906, + "miscellaneous": 0.40664961636828645, + "high_school_chemistry": 0.3118811881188119, + "marketing": 0.351931330472103, + "professional_law": 0.2622309197651663, + "management": 0.3137254901960784, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.3925233644859813, + "world_religions": 0.3352941176470588, + "sociology": 0.415, + "us_foreign_policy": 0.24242424242424243, + "high_school_macroeconomics": 0.2647814910025707, + "computer_security": 0.3838383838383838, + "moral_scenarios": 0.2494407158836689, + "moral_disputes": 0.24057971014492754, + "electrical_engineering": 0.24305555555555555, + "astronomy": 0.31125827814569534, + "college_biology": 0.3356643356643357 + } + }, + "prompt_3": { + "accuracy": 0.30396853771898463, + "category_acc": { + "high_school_european_history": 0.2621951219512195, + "business_ethics": 0.2727272727272727, + "clinical_knowledge": 0.3409090909090909, + "medical_genetics": 0.31313131313131315, + "high_school_us_history": 0.3448275862068966, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.2584745762711864, + "virology": 0.20606060606060606, + "high_school_microeconomics": 0.2869198312236287, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.25252525252525254, + "high_school_biology": 0.3365695792880259, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.25622775800711745, + "philosophy": 0.36774193548387096, + "professional_medicine": 0.2693726937269373, + "nutrition": 0.3114754098360656, + "global_facts": 0.32323232323232326, + "machine_learning": 0.2882882882882883, + "security_studies": 0.3729508196721312, + "public_relations": 0.27522935779816515, + "professional_psychology": 0.32733224222585927, + "prehistory": 0.30959752321981426, + "anatomy": 0.3582089552238806, + "human_sexuality": 0.2923076923076923, + "college_medicine": 0.3488372093023256, + "high_school_government_and_politics": 0.328125, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.345679012345679, + "high_school_geography": 0.3350253807106599, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.32882882882882886, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.3805147058823529, + "formal_logic": 0.24, + "high_school_statistics": 0.2837209302325581, + "international_law": 0.3333333333333333, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.30303030303030304, + "conceptual_physics": 0.21794871794871795, + "miscellaneous": 0.4309462915601023, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.2832618025751073, + "professional_law": 0.25701239399869535, + "management": 0.3235294117647059, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.3364485981308411, + "world_religions": 0.4, + "sociology": 0.36, + "us_foreign_policy": 0.31313131313131315, + "high_school_macroeconomics": 0.2776349614395887, + "computer_security": 0.3333333333333333, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.3130434782608696, + "electrical_engineering": 0.2708333333333333, + "astronomy": 0.37748344370860926, + "college_biology": 0.3146853146853147 + } + }, + "prompt_4": { + "accuracy": 0.3066857347157669, + "category_acc": { + "high_school_european_history": 0.4634146341463415, + "business_ethics": 0.3333333333333333, + "clinical_knowledge": 0.2727272727272727, + "medical_genetics": 0.2727272727272727, + "high_school_us_history": 0.3694581280788177, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.3389830508474576, + "virology": 0.23636363636363636, + "high_school_microeconomics": 0.3080168776371308, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.34951456310679613, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.2846975088967972, + "philosophy": 0.3193548387096774, + "professional_medicine": 0.22140221402214022, + "nutrition": 0.3180327868852459, + "global_facts": 0.29292929292929293, + "machine_learning": 0.2882882882882883, + "security_studies": 0.3073770491803279, + "public_relations": 0.25688073394495414, + "professional_psychology": 0.2896890343698854, + "prehistory": 0.33746130030959753, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.33076923076923076, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.3229166666666667, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.345679012345679, + "high_school_geography": 0.2893401015228426, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.3108108108108108, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.3180147058823529, + "formal_logic": 0.248, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.35, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.24786324786324787, + "miscellaneous": 0.391304347826087, + "high_school_chemistry": 0.3069306930693069, + "marketing": 0.3605150214592275, + "professional_law": 0.27266797129810827, + "management": 0.29411764705882354, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.2523364485981308, + "world_religions": 0.4764705882352941, + "sociology": 0.355, + "us_foreign_policy": 0.31313131313131315, + "high_school_macroeconomics": 0.2442159383033419, + "computer_security": 0.3333333333333333, + "moral_scenarios": 0.27069351230425054, + "moral_disputes": 0.3507246376811594, + "electrical_engineering": 0.2847222222222222, + "astronomy": 0.33774834437086093, + "college_biology": 0.32167832167832167 + } + }, + "prompt_5": { + "accuracy": 0.300464783696818, + "category_acc": { + "high_school_european_history": 0.35365853658536583, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.24621212121212122, + "medical_genetics": 0.2828282828282828, + "high_school_us_history": 0.3497536945812808, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.3389830508474576, + "virology": 0.2606060606060606, + "high_school_microeconomics": 0.28270042194092826, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.2750809061488673, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.2597864768683274, + "philosophy": 0.3419354838709677, + "professional_medicine": 0.23985239852398524, + "nutrition": 0.23278688524590163, + "global_facts": 0.26262626262626265, + "machine_learning": 0.21621621621621623, + "security_studies": 0.29098360655737704, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.2733224222585925, + "prehistory": 0.3560371517027864, + "anatomy": 0.3208955223880597, + "human_sexuality": 0.3384615384615385, + "college_medicine": 0.3430232558139535, + "high_school_government_and_politics": 0.3697916666666667, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.30864197530864196, + "high_school_geography": 0.3401015228426396, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.25675675675675674, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.34191176470588236, + "formal_logic": 0.312, + "high_school_statistics": 0.2930232558139535, + "international_law": 0.3333333333333333, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.25252525252525254, + "conceptual_physics": 0.25213675213675213, + "miscellaneous": 0.3938618925831202, + "high_school_chemistry": 0.27722772277227725, + "marketing": 0.3261802575107296, + "professional_law": 0.27788649706457924, + "management": 0.29411764705882354, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.37383177570093457, + "world_religions": 0.4764705882352941, + "sociology": 0.425, + "us_foreign_policy": 0.26262626262626265, + "high_school_macroeconomics": 0.2622107969151671, + "computer_security": 0.3434343434343434, + "moral_scenarios": 0.21923937360178972, + "moral_disputes": 0.34202898550724636, + "electrical_engineering": 0.3402777777777778, + "astronomy": 0.304635761589404, + "college_biology": 0.3076923076923077 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2934621099554235 + }, + "prompt_2": { + "accuracy": 0.27934621099554235 + }, + "prompt_3": { + "accuracy": 0.2711738484398217 + }, + "prompt_4": { + "accuracy": 0.28454680534918275 + }, + "prompt_5": { + "accuracy": 0.27786032689450224 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2851805728518057, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.25, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.23809523809523808, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.125, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.25, + "college_economics": 0.26666666666666666, + "business_administration": 0.2631578947368421, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.42857142857142855, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.125, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.2962962962962963, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.28, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.19230769230769232, + "sports_science": 0.16666666666666666, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.2037037037037037, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.3333333333333333 + } + }, + "prompt_2": { + "accuracy": 0.2783312577833126, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.30952380952380953, + "college_physics": 0.4583333333333333, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.2, + "business_administration": 0.2894736842105263, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.2962962962962963, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.25, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.14705882352941177, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.48, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.36538461538461536, + "sports_science": 0.16666666666666666, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.14814814814814814, + "physician": 0.2777777777777778 + } + }, + "prompt_3": { + "accuracy": 0.2764632627646326, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.11904761904761904, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.041666666666666664, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.041666666666666664, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.31666666666666665, + "business_administration": 0.21052631578947367, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.18518518518518517, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.03571428571428571, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.24, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.21153846153846154, + "sports_science": 0.4166666666666667, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2777777777777778, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2222222222222222, + "physician": 0.2222222222222222 + } + }, + "prompt_4": { + "accuracy": 0.2957658779576588, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.35714285714285715, + "college_physics": 0.25, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.26666666666666666, + "business_administration": 0.2894736842105263, + "marxism": 0.16666666666666666, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.25925925925925924, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.125, + "high_school_history": 0.28, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.3269230769230769, + "sports_science": 0.25, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3148148148148148, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2962962962962963, + "physician": 0.25925925925925924 + } + }, + "prompt_5": { + "accuracy": 0.2577833125778331, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.11538461538461539, + "college_programming": 0.14285714285714285, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.6086956521739131, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.23333333333333334, + "business_administration": 0.2894736842105263, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.13793103448275862, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.14814814814814814, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.17307692307692307, + "sports_science": 0.25, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.25, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.3148148148148148, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.2222222222222222 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30824372759856633 + }, + "prompt_2": { + "accuracy": 0.3046594982078853 + }, + "prompt_3": { + "accuracy": 0.25089605734767023 + }, + "prompt_4": { + "accuracy": 0.30824372759856633 + }, + "prompt_5": { + "accuracy": 0.23655913978494625 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30003453634950783, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.32926829268292684, + "arts": 0.40625, + "astronomy": 0.19393939393939394, + "business_ethics": 0.22009569377990432, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.366412213740458, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.3177570093457944, + "chinese_history": 0.26625386996904027, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.31223628691983124, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.16981132075471697, + "college_law": 0.23148148148148148, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.31868131868131866, + "computer_science": 0.28431372549019607, + "computer_security": 0.30409356725146197, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.2805755395683453, + "economics": 0.2578616352201258, + "education": 0.3496932515337423, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.3412698412698413, + "elementary_commonsense": 0.36363636363636365, + "elementary_information_and_technology": 0.33613445378151263, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.34814814814814815, + "food_science": 0.3076923076923077, + "genetics": 0.24431818181818182, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.20710059171597633, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.25, + "high_school_physics": 0.21818181818181817, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.2619047619047619, + "international_law": 0.34054054054054056, + "journalism": 0.31976744186046513, + "jurisprudence": 0.3236009732360097, + "legal_and_moral_basis": 0.411214953271028, + "logical": 0.3008130081300813, + "machine_learning": 0.2459016393442623, + "management": 0.3, + "marketing": 0.36666666666666664, + "marxist_theory": 0.3439153439153439, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.2827586206896552, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.32, + "professional_law": 0.3080568720379147, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.27155172413793105, + "public_relations": 0.3390804597701149, + "security_study": 0.3851851851851852, + "sociology": 0.3230088495575221, + "sports_science": 0.296969696969697, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.24260355029585798, + "world_history": 0.3167701863354037, + "world_religions": 0.3875 + } + }, + "prompt_2": { + "accuracy": 0.27266447936453114, + "category_acc": { + "agronomy": 0.2781065088757396, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2621951219512195, + "arts": 0.38125, + "astronomy": 0.2606060606060606, + "business_ethics": 0.32057416267942584, + "chinese_civil_service_exam": 0.2375, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.27205882352941174, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.21981424148606812, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.2777777777777778, + "college_mathematics": 0.2, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.2857142857142857, + "computer_science": 0.24019607843137256, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.2446043165467626, + "economics": 0.24528301886792453, + "education": 0.3067484662576687, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.32323232323232326, + "elementary_information_and_technology": 0.2647058823529412, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.32592592592592595, + "food_science": 0.2937062937062937, + "genetics": 0.26704545454545453, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.3253968253968254, + "international_law": 0.23783783783783785, + "journalism": 0.31976744186046513, + "jurisprudence": 0.24574209245742093, + "legal_and_moral_basis": 0.2570093457943925, + "logical": 0.3008130081300813, + "machine_learning": 0.22950819672131148, + "management": 0.24761904761904763, + "marketing": 0.3111111111111111, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.2482758620689655, + "philosophy": 0.26666666666666666, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.26540284360189575, + "professional_medicine": 0.21808510638297873, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.3218390804597701, + "security_study": 0.35555555555555557, + "sociology": 0.29646017699115046, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.31351351351351353, + "virology": 0.28402366863905326, + "world_history": 0.2732919254658385, + "world_religions": 0.3125 + } + }, + "prompt_3": { + "accuracy": 0.27732688654809184, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.23780487804878048, + "arts": 0.2875, + "astronomy": 0.26666666666666666, + "business_ethics": 0.35406698564593303, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.3281733746130031, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.26256983240223464, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.21296296296296297, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.358974358974359, + "computer_science": 0.28921568627450983, + "computer_security": 0.28654970760233917, + "conceptual_physics": 0.21768707482993196, + "construction_project_management": 0.2589928057553957, + "economics": 0.27044025157232704, + "education": 0.2883435582822086, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.23949579831932774, + "elementary_mathematics": 0.3, + "ethnology": 0.3111111111111111, + "food_science": 0.3006993006993007, + "genetics": 0.32954545454545453, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.1952662721893491, + "high_school_chemistry": 0.19696969696969696, + "high_school_geography": 0.1694915254237288, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.2857142857142857, + "international_law": 0.2918918918918919, + "journalism": 0.27906976744186046, + "jurisprudence": 0.2360097323600973, + "legal_and_moral_basis": 0.3317757009345794, + "logical": 0.25203252032520324, + "machine_learning": 0.23770491803278687, + "management": 0.28095238095238095, + "marketing": 0.3333333333333333, + "marxist_theory": 0.24867724867724866, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.20689655172413793, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.26540284360189575, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.2801724137931034, + "public_relations": 0.27586206896551724, + "security_study": 0.2740740740740741, + "sociology": 0.26991150442477874, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.27218934911242604, + "world_history": 0.21739130434782608, + "world_religions": 0.31875 + } + }, + "prompt_4": { + "accuracy": 0.2966672422724918, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.32926829268292684, + "arts": 0.36875, + "astronomy": 0.2787878787878788, + "business_ethics": 0.27751196172248804, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.2631578947368421, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.31843575418994413, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.3333333333333333, + "college_mathematics": 0.3333333333333333, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.27106227106227104, + "computer_science": 0.23039215686274508, + "computer_security": 0.3508771929824561, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.35251798561151076, + "economics": 0.2830188679245283, + "education": 0.36809815950920244, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.3531746031746032, + "elementary_commonsense": 0.3333333333333333, + "elementary_information_and_technology": 0.28991596638655465, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.32592592592592595, + "food_science": 0.25874125874125875, + "genetics": 0.3181818181818182, + "global_facts": 0.348993288590604, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.2867132867132867, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2756756756756757, + "journalism": 0.27906976744186046, + "jurisprudence": 0.30413625304136255, + "legal_and_moral_basis": 0.3598130841121495, + "logical": 0.2682926829268293, + "machine_learning": 0.32786885245901637, + "management": 0.26666666666666666, + "marketing": 0.2833333333333333, + "marxist_theory": 0.291005291005291, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.2206896551724138, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.3485714285714286, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.2765957446808511, + "professional_psychology": 0.33620689655172414, + "public_relations": 0.3045977011494253, + "security_study": 0.3111111111111111, + "sociology": 0.29646017699115046, + "sports_science": 0.34545454545454546, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.2485207100591716, + "world_history": 0.3105590062111801, + "world_religions": 0.35625 + } + }, + "prompt_5": { + "accuracy": 0.27611811431531685, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.27439024390243905, + "arts": 0.40625, + "astronomy": 0.2545454545454545, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.2595419847328244, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.3743016759776536, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.1792452830188679, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.2893772893772894, + "computer_science": 0.2696078431372549, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.23741007194244604, + "economics": 0.22641509433962265, + "education": 0.27607361963190186, + "electrical_engineering": 0.25, + "elementary_chinese": 0.3531746031746032, + "elementary_commonsense": 0.3282828282828283, + "elementary_information_and_technology": 0.36554621848739494, + "elementary_mathematics": 0.22608695652173913, + "ethnology": 0.31851851851851853, + "food_science": 0.2727272727272727, + "genetics": 0.26704545454545453, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.23668639053254437, + "high_school_chemistry": 0.17424242424242425, + "high_school_geography": 0.22033898305084745, + "high_school_mathematics": 0.18292682926829268, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.25396825396825395, + "international_law": 0.2918918918918919, + "journalism": 0.29651162790697677, + "jurisprudence": 0.24817518248175183, + "legal_and_moral_basis": 0.29439252336448596, + "logical": 0.22764227642276422, + "machine_learning": 0.22950819672131148, + "management": 0.29523809523809524, + "marketing": 0.3611111111111111, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.1793103448275862, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.3175355450236967, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.22988505747126436, + "security_study": 0.2518518518518518, + "sociology": 0.2920353982300885, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.27218934911242604, + "world_history": 0.2795031055900621, + "world_religions": 0.31875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.21212121212121213 + }, + "prompt_2": { + "accuracy": 0.2727272727272727 + }, + "prompt_3": { + "accuracy": 0.24242424242424243 + }, + "prompt_4": { + "accuracy": 0.2727272727272727 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.16818181818181818 + }, + "prompt_2": { + "accuracy": 0.2159090909090909 + }, + "prompt_3": { + "accuracy": 0.2159090909090909 + }, + "prompt_4": { + "accuracy": 0.1590909090909091 + }, + "prompt_5": { + "accuracy": 0.09318181818181819 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3461016949152542 + }, + "prompt_2": { + "accuracy": 0.31661016949152543 + }, + "prompt_3": { + "accuracy": 0.3461016949152542 + }, + "prompt_4": { + "accuracy": 0.3247457627118644 + }, + "prompt_5": { + "accuracy": 0.3288135593220339 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4005235602094241 + }, + "prompt_2": { + "accuracy": 0.4181002243829469 + }, + "prompt_3": { + "accuracy": 0.42408376963350786 + }, + "prompt_4": { + "accuracy": 0.40201944652206434 + }, + "prompt_5": { + "accuracy": 0.3145100972326103 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39784419402253796 + }, + "prompt_2": { + "accuracy": 0.35521803037726607 + }, + "prompt_3": { + "accuracy": 0.45173934345908867 + }, + "prompt_4": { + "accuracy": 0.3811856932876041 + }, + "prompt_5": { + "accuracy": 0.37971582557569816 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.12607423589680386, + "rouge2": 0.015578846265454192, + "rougeL": 0.08532663868398548, + "avg_rouge": 0.07565990694874784 + }, + "prompt_2": { + "rouge1": 0.1356630540364643, + "rouge2": 0.012522847594516997, + "rougeL": 0.0859530124571699, + "avg_rouge": 0.0780463046960504 + }, + "prompt_3": { + "rouge1": 0.12803991131416526, + "rouge2": 0.010081604216709856, + "rougeL": 0.07980233609908952, + "avg_rouge": 0.07264128387665487 + }, + "prompt_4": { + "rouge1": 0.15396160380407656, + "rouge2": 0.01475404230651049, + "rougeL": 0.09897827645388753, + "avg_rouge": 0.08923130752149151 + }, + "prompt_5": { + "rouge1": 0.14400515083666787, + "rouge2": 0.014374762056825143, + "rougeL": 0.09453080096167288, + "avg_rouge": 0.08430357128505529 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.13216885302015785, + "rouge2": 0.022456871780656626, + "rougeL": 0.09938227782439747, + "avg_rouge": 0.08466933420840399 + }, + "prompt_2": { + "rouge1": 0.13459800914321937, + "rouge2": 0.022618310147703033, + "rougeL": 0.09754113947335037, + "avg_rouge": 0.08491915292142425 + }, + "prompt_3": { + "rouge1": 0.12827866756183498, + "rouge2": 0.022343756702968284, + "rougeL": 0.09392936191046984, + "avg_rouge": 0.08151726205842437 + }, + "prompt_4": { + "rouge1": 0.13398657759652913, + "rouge2": 0.02080990608031354, + "rougeL": 0.09819851902155226, + "avg_rouge": 0.08433166756613164 + }, + "prompt_5": { + "rouge1": 0.1437078549597196, + "rouge2": 0.027061546117823014, + "rougeL": 0.11619780076213451, + "avg_rouge": 0.09565573394655906 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.591743119266055 + }, + "prompt_2": { + "accuracy": 0.5756880733944955 + }, + "prompt_3": { + "accuracy": 0.5894495412844036 + }, + "prompt_4": { + "accuracy": 0.5309633027522935 + }, + "prompt_5": { + "accuracy": 0.5114678899082569 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6788111217641419 + }, + "prompt_2": { + "accuracy": 0.6423777564717162 + }, + "prompt_3": { + "accuracy": 0.5695110258868649 + }, + "prompt_4": { + "accuracy": 0.6912751677852349 + }, + "prompt_5": { + "accuracy": 0.6596356663470757 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.516 + }, + "prompt_2": { + "accuracy": 0.499 + }, + "prompt_3": { + "accuracy": 0.5145 + }, + "prompt_4": { + "accuracy": 0.4805 + }, + "prompt_5": { + "accuracy": 0.503 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.324 + }, + "prompt_2": { + "accuracy": 0.339 + }, + "prompt_3": { + "accuracy": 0.3365 + }, + "prompt_4": { + "accuracy": 0.332 + }, + "prompt_5": { + "accuracy": 0.3135 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5075 + }, + "prompt_2": { + "accuracy": 0.4875 + }, + "prompt_3": { + "accuracy": 0.506 + }, + "prompt_4": { + "accuracy": 0.493 + }, + "prompt_5": { + "accuracy": 0.498 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6197183098591549 + }, + "prompt_2": { + "accuracy": 0.5915492957746479 + }, + "prompt_3": { + "accuracy": 0.4507042253521127 + }, + "prompt_4": { + "accuracy": 0.5492957746478874 + }, + "prompt_5": { + "accuracy": 0.4788732394366197 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5415162454873647 + }, + "prompt_2": { + "accuracy": 0.4548736462093863 + }, + "prompt_3": { + "accuracy": 0.5018050541516246 + }, + "prompt_4": { + "accuracy": 0.5270758122743683 + }, + "prompt_5": { + "accuracy": 0.5090252707581228 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5294117647058824 + }, + "prompt_2": { + "accuracy": 0.49019607843137253 + }, + "prompt_3": { + "accuracy": 0.5024509803921569 + }, + "prompt_4": { + "accuracy": 0.5196078431372549 + }, + "prompt_5": { + "accuracy": 0.4852941176470588 + } } }, "five_shot": { @@ -6426,53 +53455,1733 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan-13B-Base", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3523809523809524, + "language_acc": { + "Malay": 0.28, + "English": 0.4066666666666667, + "Vietnamese": 0.31333333333333335, + "Spanish": 0.41333333333333333, + "Indonesian": 0.34, + "Filipino": 0.28, + "Chinese": 0.43333333333333335 + }, + "consistency_score_2": 0.4234920634920635, + "consistency_score_3": 0.23485714285714282, + "consistency_score_4": 0.15085714285714286, + "consistency_score_5": 0.10476190476190475, + "consistency_score_6": 0.07714285714285715, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.32666666666666666, + "Malay,Vietnamese": 0.3466666666666667, + "Malay,Spanish": 0.30666666666666664, + "Malay,Indonesian": 0.5, + "Malay,Filipino": 0.36, + "Malay,Chinese": 0.28, + "English,Vietnamese": 0.5266666666666666, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.4533333333333333, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.47333333333333333, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Indonesian": 0.43333333333333335, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Chinese": 0.4666666666666667, + "Spanish,Indonesian": 0.31333333333333335, + "Spanish,Filipino": 0.46, + "Spanish,Chinese": 0.5066666666666667, + "Indonesian,Filipino": 0.4266666666666667, + "Indonesian,Chinese": 0.3466666666666667, + "Filipino,Chinese": 0.35333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2, + "Malay,English,Spanish": 0.19333333333333333, + "Malay,English,Indonesian": 0.22666666666666666, + "Malay,English,Filipino": 0.18666666666666668, + "Malay,English,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish": 0.17333333333333334, + "Malay,Vietnamese,Indonesian": 0.22666666666666666, + "Malay,Vietnamese,Filipino": 0.22, + "Malay,Vietnamese,Chinese": 0.14, + "Malay,Spanish,Indonesian": 0.18, + "Malay,Spanish,Filipino": 0.19333333333333333, + "Malay,Spanish,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino": 0.21333333333333335, + "Malay,Indonesian,Chinese": 0.16666666666666666, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.35333333333333333, + "English,Vietnamese,Indonesian": 0.3, + "English,Vietnamese,Filipino": 0.34, + "English,Vietnamese,Chinese": 0.30666666666666664, + "English,Spanish,Indonesian": 0.26, + "English,Spanish,Filipino": 0.32666666666666666, + "English,Spanish,Chinese": 0.34, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.22666666666666666, + "English,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian": 0.22, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino": 0.20666666666666667, + "Spanish,Indonesian,Chinese": 0.2, + "Spanish,Filipino,Chinese": 0.2733333333333333, + "Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.16, + "Malay,English,Vietnamese,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian": 0.14666666666666667, + "Malay,English,Spanish,Filipino": 0.14, + "Malay,English,Spanish,Chinese": 0.14, + "Malay,English,Indonesian,Filipino": 0.14, + "Malay,English,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Filipino,Chinese": 0.1, + "Malay,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.24, + "English,Vietnamese,Spanish,Chinese": 0.24, + "English,Vietnamese,Indonesian,Filipino": 0.2, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Spanish,Indonesian,Chinese": 0.18666666666666668, + "English,Spanish,Filipino,Chinese": 0.23333333333333334, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.1, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.3846777335643239, + "AC3_3": 0.2818590426284309, + "AC3_4": 0.21126851947786246, + "AC3_5": 0.16150793647260664, + "AC3_6": 0.12657586313176014, + "AC3_7": 0.10254041567952253 + }, + "prompt_2": { + "overall_acc": 0.31714285714285717, + "language_acc": { + "Malay": 0.2, + "English": 0.48, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.37333333333333335, + "Indonesian": 0.2733333333333333, + "Filipino": 0.22, + "Chinese": 0.38 + }, + "consistency_score_2": 0.3219047619047619, + "consistency_score_3": 0.1238095238095238, + "consistency_score_4": 0.052761904761904746, + "consistency_score_5": 0.024444444444444442, + "consistency_score_6": 0.012380952380952381, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.32, + "Malay,Vietnamese": 0.22, + "Malay,Spanish": 0.30666666666666664, + "Malay,Indonesian": 0.36666666666666664, + "Malay,Filipino": 0.22, + "Malay,Chinese": 0.24666666666666667, + "English,Vietnamese": 0.35333333333333333, + "English,Spanish": 0.4666666666666667, + "English,Indonesian": 0.36, + "English,Filipino": 0.28, + "English,Chinese": 0.43333333333333335, + "Vietnamese,Spanish": 0.3466666666666667, + "Vietnamese,Indonesian": 0.28, + "Vietnamese,Filipino": 0.31333333333333335, + "Vietnamese,Chinese": 0.3933333333333333, + "Spanish,Indonesian": 0.35333333333333333, + "Spanish,Filipino": 0.25333333333333335, + "Spanish,Chinese": 0.44666666666666666, + "Indonesian,Filipino": 0.22, + "Indonesian,Chinese": 0.32, + "Filipino,Chinese": 0.26 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.09333333333333334, + "Malay,English,Spanish": 0.16, + "Malay,English,Indonesian": 0.16, + "Malay,English,Filipino": 0.06666666666666667, + "Malay,English,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish": 0.08, + "Malay,Vietnamese,Indonesian": 0.08, + "Malay,Vietnamese,Filipino": 0.06, + "Malay,Vietnamese,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian": 0.15333333333333332, + "Malay,Spanish,Filipino": 0.06666666666666667, + "Malay,Spanish,Chinese": 0.16, + "Malay,Indonesian,Filipino": 0.07333333333333333, + "Malay,Indonesian,Chinese": 0.12, + "Malay,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish": 0.16666666666666666, + "English,Vietnamese,Indonesian": 0.13333333333333333, + "English,Vietnamese,Filipino": 0.10666666666666667, + "English,Vietnamese,Chinese": 0.18, + "English,Spanish,Indonesian": 0.19333333333333333, + "English,Spanish,Filipino": 0.14666666666666667, + "English,Spanish,Chinese": 0.26666666666666666, + "English,Indonesian,Filipino": 0.09333333333333334, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian": 0.14, + "Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Spanish,Indonesian,Filipino": 0.07333333333333333, + "Spanish,Indonesian,Chinese": 0.2, + "Spanish,Filipino,Chinese": 0.12666666666666668, + "Indonesian,Filipino,Chinese": 0.06666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.06, + "Malay,English,Vietnamese,Indonesian": 0.06, + "Malay,English,Vietnamese,Filipino": 0.02, + "Malay,English,Vietnamese,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian": 0.09333333333333334, + "Malay,English,Spanish,Filipino": 0.04, + "Malay,English,Spanish,Chinese": 0.1, + "Malay,English,Indonesian,Filipino": 0.03333333333333333, + "Malay,English,Indonesian,Chinese": 0.08, + "Malay,English,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.04, + "Malay,Vietnamese,Spanish,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Indonesian,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.04, + "Malay,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian": 0.08, + "English,Vietnamese,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.08, + "English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "English,Spanish,Indonesian,Filipino": 0.04666666666666667, + "English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Spanish,Filipino,Chinese": 0.07333333333333333, + "English,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.04, + "Malay,English,Vietnamese,Spanish,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.04, + "Malay,English,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.05333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.02, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + } + }, + "AC3_2": 0.3195060676527918, + "AC3_3": 0.1780931810765509, + "AC3_4": 0.09047226715223732, + "AC3_5": 0.04539033455920264, + "AC3_6": 0.02383154417113287, + "AC3_7": 0.013058823525378896 + }, + "prompt_3": { + "overall_acc": 0.3590476190476191, + "language_acc": { + "Malay": 0.3, + "English": 0.4666666666666667, + "Vietnamese": 0.28, + "Spanish": 0.43333333333333335, + "Indonesian": 0.28, + "Filipino": 0.2866666666666667, + "Chinese": 0.4666666666666667 + }, + "consistency_score_2": 0.3736507936507936, + "consistency_score_3": 0.1798095238095238, + "consistency_score_4": 0.09542857142857142, + "consistency_score_5": 0.05206349206349206, + "consistency_score_6": 0.02761904761904762, + "consistency_score_7": 0.013333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.32666666666666666, + "Malay,Vietnamese": 0.22666666666666666, + "Malay,Spanish": 0.31333333333333335, + "Malay,Indonesian": 0.31333333333333335, + "Malay,Filipino": 0.28, + "Malay,Chinese": 0.3466666666666667, + "English,Vietnamese": 0.42, + "English,Spanish": 0.4066666666666667, + "English,Indonesian": 0.3466666666666667, + "English,Filipino": 0.4266666666666667, + "English,Chinese": 0.49333333333333335, + "Vietnamese,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian": 0.37333333333333335, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Chinese": 0.4066666666666667, + "Spanish,Indonesian": 0.35333333333333333, + "Spanish,Filipino": 0.37333333333333335, + "Spanish,Chinese": 0.4666666666666667, + "Indonesian,Filipino": 0.35333333333333333, + "Indonesian,Chinese": 0.37333333333333335, + "Filipino,Chinese": 0.41333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.14, + "Malay,English,Spanish": 0.17333333333333334, + "Malay,English,Indonesian": 0.16, + "Malay,English,Filipino": 0.16, + "Malay,English,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish": 0.12, + "Malay,Vietnamese,Indonesian": 0.08666666666666667, + "Malay,Vietnamese,Filipino": 0.10666666666666667, + "Malay,Vietnamese,Chinese": 0.14666666666666667, + "Malay,Spanish,Indonesian": 0.13333333333333333, + "Malay,Spanish,Filipino": 0.12, + "Malay,Spanish,Chinese": 0.18666666666666668, + "Malay,Indonesian,Filipino": 0.12666666666666668, + "Malay,Indonesian,Chinese": 0.16666666666666666, + "Malay,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish": 0.18666666666666668, + "English,Vietnamese,Indonesian": 0.16666666666666666, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian": 0.16, + "English,Spanish,Filipino": 0.20666666666666667, + "English,Spanish,Chinese": 0.2866666666666667, + "English,Indonesian,Filipino": 0.18666666666666668, + "English,Indonesian,Chinese": 0.2, + "English,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Vietnamese,Spanish,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Vietnamese,Indonesian,Chinese": 0.18, + "Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino": 0.18, + "Spanish,Indonesian,Chinese": 0.19333333333333333, + "Spanish,Filipino,Chinese": 0.22666666666666666, + "Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.08, + "Malay,English,Vietnamese,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Chinese": 0.12, + "Malay,English,Spanish,Indonesian": 0.1, + "Malay,English,Spanish,Filipino": 0.08666666666666667, + "Malay,English,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino": 0.08, + "Malay,English,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.04, + "Malay,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.06, + "Malay,Spanish,Indonesian,Chinese": 0.1, + "Malay,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.08, + "English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.16, + "English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "English,Spanish,Indonesian,Chinese": 0.12, + "English,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.04, + "Malay,English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.06, + "Malay,English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Indonesian,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + } + }, + "AC3_2": 0.3662036807290906, + "AC3_3": 0.23961891291484794, + "AC3_4": 0.1507819422491843, + "AC3_5": 0.09094024634669719, + "AC3_6": 0.05129251699353742, + "AC3_7": 0.02571184995046954 + }, + "prompt_4": { + "overall_acc": 0.36095238095238097, + "language_acc": { + "Malay": 0.28, + "English": 0.38666666666666666, + "Vietnamese": 0.32, + "Spanish": 0.4666666666666667, + "Indonesian": 0.26, + "Filipino": 0.32, + "Chinese": 0.49333333333333335 + }, + "consistency_score_2": 0.3374603174603175, + "consistency_score_3": 0.14419047619047617, + "consistency_score_4": 0.07066666666666666, + "consistency_score_5": 0.03777777777777777, + "consistency_score_6": 0.021904761904761903, + "consistency_score_7": 0.013333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.31333333333333335, + "Malay,Vietnamese": 0.26666666666666666, + "Malay,Spanish": 0.29333333333333333, + "Malay,Indonesian": 0.42, + "Malay,Filipino": 0.29333333333333333, + "Malay,Chinese": 0.31333333333333335, + "English,Vietnamese": 0.34, + "English,Spanish": 0.38, + "English,Indonesian": 0.3, + "English,Filipino": 0.36666666666666664, + "English,Chinese": 0.37333333333333335, + "Vietnamese,Spanish": 0.4, + "Vietnamese,Indonesian": 0.2866666666666667, + "Vietnamese,Filipino": 0.31333333333333335, + "Vietnamese,Chinese": 0.3333333333333333, + "Spanish,Indonesian": 0.28, + "Spanish,Filipino": 0.3466666666666667, + "Spanish,Chinese": 0.4866666666666667, + "Indonesian,Filipino": 0.32, + "Indonesian,Chinese": 0.31333333333333335, + "Filipino,Chinese": 0.3466666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.1, + "Malay,English,Spanish": 0.13333333333333333, + "Malay,English,Indonesian": 0.15333333333333332, + "Malay,English,Filipino": 0.15333333333333332, + "Malay,English,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish": 0.14, + "Malay,Vietnamese,Indonesian": 0.14, + "Malay,Vietnamese,Filipino": 0.1, + "Malay,Vietnamese,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian": 0.15333333333333332, + "Malay,Spanish,Filipino": 0.15333333333333332, + "Malay,Spanish,Chinese": 0.16, + "Malay,Indonesian,Filipino": 0.13333333333333333, + "Malay,Indonesian,Chinese": 0.16666666666666666, + "Malay,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish": 0.16666666666666666, + "English,Vietnamese,Indonesian": 0.10666666666666667, + "English,Vietnamese,Filipino": 0.1, + "English,Vietnamese,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian": 0.12666666666666668, + "English,Spanish,Filipino": 0.17333333333333334, + "English,Spanish,Chinese": 0.24, + "English,Indonesian,Filipino": 0.14666666666666667, + "English,Indonesian,Chinese": 0.12666666666666668, + "English,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Vietnamese,Spanish,Filipino": 0.16, + "Vietnamese,Spanish,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "Vietnamese,Indonesian,Chinese": 0.08, + "Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino": 0.14, + "Spanish,Indonesian,Chinese": 0.17333333333333334, + "Spanish,Filipino,Chinese": 0.20666666666666667, + "Indonesian,Filipino,Chinese": 0.14 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Chinese": 0.04, + "Malay,English,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Spanish,Filipino": 0.08, + "Malay,English,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino": 0.07333333333333333, + "Malay,English,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.06, + "Malay,Vietnamese,Indonesian,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.1, + "Malay,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.06, + "English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.04, + "English,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino": 0.07333333333333333, + "English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.06, + "Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.04, + "Malay,English,Spanish,Indonesian,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "English,Spanish,Indonesian,Filipino,Chinese": 0.06, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + } + }, + "AC3_2": 0.348811255361312, + "AC3_3": 0.20606406661150872, + "AC3_4": 0.11819358632154237, + "AC3_5": 0.06839702758369554, + "AC3_6": 0.04130300875490723, + "AC3_7": 0.02571670906861683 + }, + "prompt_5": { + "overall_acc": 0.3723809523809524, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.44, + "Vietnamese": 0.32, + "Spanish": 0.4533333333333333, + "Indonesian": 0.32666666666666666, + "Filipino": 0.29333333333333333, + "Chinese": 0.46 + }, + "consistency_score_2": 0.4285714285714285, + "consistency_score_3": 0.24419047619047624, + "consistency_score_4": 0.1584761904761905, + "consistency_score_5": 0.1073015873015873, + "consistency_score_6": 0.07238095238095239, + "consistency_score_7": 0.04666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38, + "Malay,Vietnamese": 0.37333333333333335, + "Malay,Spanish": 0.37333333333333335, + "Malay,Indonesian": 0.44666666666666666, + "Malay,Filipino": 0.42, + "Malay,Chinese": 0.36666666666666664, + "English,Vietnamese": 0.46, + "English,Spanish": 0.5266666666666666, + "English,Indonesian": 0.38666666666666666, + "English,Filipino": 0.5066666666666667, + "English,Chinese": 0.4533333333333333, + "Vietnamese,Spanish": 0.4666666666666667, + "Vietnamese,Indonesian": 0.31333333333333335, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.36666666666666664, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.52, + "Indonesian,Filipino": 0.3933333333333333, + "Indonesian,Chinese": 0.35333333333333333, + "Filipino,Chinese": 0.44 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22666666666666666, + "Malay,English,Spanish": 0.24, + "Malay,English,Indonesian": 0.22, + "Malay,English,Filipino": 0.26666666666666666, + "Malay,English,Chinese": 0.2, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.2, + "Malay,Vietnamese,Filipino": 0.26, + "Malay,Vietnamese,Chinese": 0.2, + "Malay,Spanish,Indonesian": 0.20666666666666667, + "Malay,Spanish,Filipino": 0.26, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.22666666666666666, + "Malay,Indonesian,Chinese": 0.18, + "Malay,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Indonesian": 0.18666666666666668, + "English,Vietnamese,Filipino": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.26, + "English,Spanish,Indonesian": 0.24, + "English,Spanish,Filipino": 0.34, + "English,Spanish,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.25333333333333335, + "English,Indonesian,Chinese": 0.2, + "English,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Chinese": 0.16, + "Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Spanish,Indonesian,Filipino": 0.24, + "Spanish,Indonesian,Chinese": 0.22666666666666666, + "Spanish,Filipino,Chinese": 0.3333333333333333, + "Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.13333333333333333, + "Malay,English,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Chinese": 0.16, + "Malay,English,Indonesian,Filipino": 0.16, + "Malay,English,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian": 0.14, + "English,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.16, + "English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.20666666666666667, + "English,Spanish,Indonesian,Filipino": 0.18, + "English,Spanish,Indonesian,Chinese": 0.16666666666666666, + "English,Spanish,Filipino,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.1, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.1, + "Malay,English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + } + }, + "AC3_2": 0.3985051808573295, + "AC3_3": 0.29495976575530064, + "AC3_4": 0.22233294031043416, + "AC3_5": 0.16659796410128752, + "AC3_6": 0.12120322216098166, + "AC3_7": 0.08293939391960158 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3279220779220779, + "language_acc": { + "English": 0.32954545454545453, + "Vietnamese": 0.3522727272727273, + "Chinese": 0.3693181818181818, + "Indonesian": 0.32954545454545453, + "Filipino": 0.30113636363636365, + "Spanish": 0.3522727272727273, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.5119047619047619, + "consistency_score_3": 0.3384740259740259, + "consistency_score_4": 0.24334415584415592, + "consistency_score_5": 0.18290043290043292, + "consistency_score_6": 0.1436688311688312, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6022727272727273, + "English,Chinese": 0.45454545454545453, + "English,Indonesian": 0.48863636363636365, + "English,Filipino": 0.6761363636363636, + "English,Spanish": 0.5568181818181818, + "English,Malay": 0.48295454545454547, + "Vietnamese,Chinese": 0.4715909090909091, + "Vietnamese,Indonesian": 0.5340909090909091, + "Vietnamese,Filipino": 0.6931818181818182, + "Vietnamese,Spanish": 0.5511363636363636, + "Vietnamese,Malay": 0.5511363636363636, + "Chinese,Indonesian": 0.36363636363636365, + "Chinese,Filipino": 0.4659090909090909, + "Chinese,Spanish": 0.4772727272727273, + "Chinese,Malay": 0.45454545454545453, + "Indonesian,Filipino": 0.5738636363636364, + "Indonesian,Spanish": 0.3977272727272727, + "Indonesian,Malay": 0.4772727272727273, + "Filipino,Spanish": 0.5284090909090909, + "Filipino,Malay": 0.5511363636363636, + "Spanish,Malay": 0.3977272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3352272727272727, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Filipino": 0.5056818181818182, + "English,Vietnamese,Spanish": 0.4090909090909091, + "English,Vietnamese,Malay": 0.375, + "English,Chinese,Indonesian": 0.26704545454545453, + "English,Chinese,Filipino": 0.3522727272727273, + "English,Chinese,Spanish": 0.3181818181818182, + "English,Chinese,Malay": 0.2897727272727273, + "English,Indonesian,Filipino": 0.42613636363636365, + "English,Indonesian,Spanish": 0.2897727272727273, + "English,Indonesian,Malay": 0.32386363636363635, + "English,Filipino,Spanish": 0.42045454545454547, + "English,Filipino,Malay": 0.4034090909090909, + "English,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "Vietnamese,Chinese,Filipino": 0.36363636363636365, + "Vietnamese,Chinese,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Malay": 0.3125, + "Vietnamese,Indonesian,Filipino": 0.45454545454545453, + "Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay": 0.3693181818181818, + "Vietnamese,Filipino,Spanish": 0.42045454545454547, + "Vietnamese,Filipino,Malay": 0.4318181818181818, + "Vietnamese,Spanish,Malay": 0.3125, + "Chinese,Indonesian,Filipino": 0.2784090909090909, + "Chinese,Indonesian,Spanish": 0.22727272727272727, + "Chinese,Indonesian,Malay": 0.2556818181818182, + "Chinese,Filipino,Spanish": 0.29545454545454547, + "Chinese,Filipino,Malay": 0.32954545454545453, + "Chinese,Spanish,Malay": 0.23863636363636365, + "Indonesian,Filipino,Spanish": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.375, + "Indonesian,Spanish,Malay": 0.23863636363636365, + "Filipino,Spanish,Malay": 0.3068181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino": 0.2784090909090909, + "English,Vietnamese,Chinese,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay": 0.23863636363636365, + "English,Vietnamese,Indonesian,Filipino": 0.3465909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.2556818181818182, + "English,Vietnamese,Indonesian,Malay": 0.26136363636363635, + "English,Vietnamese,Filipino,Spanish": 0.3465909090909091, + "English,Vietnamese,Filipino,Malay": 0.3181818181818182, + "English,Vietnamese,Spanish,Malay": 0.25, + "English,Chinese,Indonesian,Filipino": 0.22727272727272727, + "English,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Chinese,Filipino,Spanish": 0.2556818181818182, + "English,Chinese,Filipino,Malay": 0.25, + "English,Chinese,Spanish,Malay": 0.20454545454545456, + "English,Indonesian,Filipino,Spanish": 0.2727272727272727, + "English,Indonesian,Filipino,Malay": 0.2897727272727273, + "English,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Filipino,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.25, + "Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Spanish,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.23863636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Indonesian,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + } + }, + "AC3_2": 0.3997606774192626, + "AC3_3": 0.33311451028742173, + "AC3_4": 0.2793720913436478, + "AC3_5": 0.23482555575416197, + "AC3_6": 0.19980105948330484, + "AC3_7": 0.1749711268376306 + }, + "prompt_2": { + "overall_acc": 0.31655844155844154, + "language_acc": { + "English": 0.3125, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.3522727272727273, + "Indonesian": 0.2556818181818182, + "Filipino": 0.26704545454545453, + "Spanish": 0.4090909090909091, + "Malay": 0.2897727272727273 + }, + "consistency_score_2": 0.40503246753246747, + "consistency_score_3": 0.214448051948052, + "consistency_score_4": 0.13116883116883118, + "consistency_score_5": 0.08955627705627704, + "consistency_score_6": 0.06737012987012987, + "consistency_score_7": 0.056818181818181816, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5170454545454546, + "English,Chinese": 0.3977272727272727, + "English,Indonesian": 0.4318181818181818, + "English,Filipino": 0.5852272727272727, + "English,Spanish": 0.3181818181818182, + "English,Malay": 0.4715909090909091, + "Vietnamese,Chinese": 0.3806818181818182, + "Vietnamese,Indonesian": 0.4034090909090909, + "Vietnamese,Filipino": 0.5909090909090909, + "Vietnamese,Spanish": 0.36363636363636365, + "Vietnamese,Malay": 0.45454545454545453, + "Chinese,Indonesian": 0.2727272727272727, + "Chinese,Filipino": 0.39204545454545453, + "Chinese,Spanish": 0.36363636363636365, + "Chinese,Malay": 0.29545454545454547, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.25, + "Indonesian,Malay": 0.4090909090909091, + "Filipino,Spanish": 0.2897727272727273, + "Filipino,Malay": 0.5511363636363636, + "Spanish,Malay": 0.29545454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23863636363636365, + "English,Vietnamese,Indonesian": 0.2784090909090909, + "English,Vietnamese,Filipino": 0.42045454545454547, + "English,Vietnamese,Spanish": 0.19318181818181818, + "English,Vietnamese,Malay": 0.3125, + "English,Chinese,Indonesian": 0.16477272727272727, + "English,Chinese,Filipino": 0.23863636363636365, + "English,Chinese,Spanish": 0.1590909090909091, + "English,Chinese,Malay": 0.1875, + "English,Indonesian,Filipino": 0.3465909090909091, + "English,Indonesian,Spanish": 0.11931818181818182, + "English,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Spanish": 0.1590909090909091, + "English,Filipino,Malay": 0.3693181818181818, + "English,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "Vietnamese,Chinese,Filipino": 0.2556818181818182, + "Vietnamese,Chinese,Spanish": 0.1875, + "Vietnamese,Chinese,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino": 0.32386363636363635, + "Vietnamese,Indonesian,Spanish": 0.13068181818181818, + "Vietnamese,Indonesian,Malay": 0.2556818181818182, + "Vietnamese,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Filipino,Malay": 0.3806818181818182, + "Vietnamese,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino": 0.17613636363636365, + "Chinese,Indonesian,Spanish": 0.10795454545454546, + "Chinese,Indonesian,Malay": 0.14772727272727273, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.19886363636363635, + "Chinese,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish": 0.125, + "Indonesian,Filipino,Malay": 0.3068181818181818, + "Indonesian,Spanish,Malay": 0.10795454545454546, + "Filipino,Spanish,Malay": 0.17045454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.125, + "English,Vietnamese,Chinese,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Filipino,Malay": 0.2840909090909091, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Malay": 0.11363636363636363, + "English,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + } + }, + "AC3_2": 0.3553715687643902, + "AC3_3": 0.2556855403362581, + "AC3_4": 0.18548166839890676, + "AC3_5": 0.13961471569626027, + "AC3_6": 0.11109662005288413, + "AC3_7": 0.09634387349198316 + }, + "prompt_3": { + "overall_acc": 0.3206168831168831, + "language_acc": { + "English": 0.3409090909090909, + "Vietnamese": 0.3068181818181818, + "Chinese": 0.3522727272727273, + "Indonesian": 0.3409090909090909, + "Filipino": 0.2727272727272727, + "Spanish": 0.32386363636363635, + "Malay": 0.3068181818181818 + }, + "consistency_score_2": 0.4507575757575758, + "consistency_score_3": 0.25925324675324674, + "consistency_score_4": 0.15957792207792212, + "consistency_score_5": 0.10173160173160173, + "consistency_score_6": 0.06737012987012987, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5227272727272727, + "English,Chinese": 0.38636363636363635, + "English,Indonesian": 0.48863636363636365, + "English,Filipino": 0.5681818181818182, + "English,Spanish": 0.3522727272727273, + "English,Malay": 0.5170454545454546, + "Vietnamese,Chinese": 0.38636363636363635, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Filipino": 0.6363636363636364, + "Vietnamese,Spanish": 0.4147727272727273, + "Vietnamese,Malay": 0.5113636363636364, + "Chinese,Indonesian": 0.3522727272727273, + "Chinese,Filipino": 0.38636363636363635, + "Chinese,Spanish": 0.3181818181818182, + "Chinese,Malay": 0.35795454545454547, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,Spanish": 0.32954545454545453, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.3409090909090909, + "Filipino,Malay": 0.6022727272727273, + "Spanish,Malay": 0.3465909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23295454545454544, + "English,Vietnamese,Indonesian": 0.3181818181818182, + "English,Vietnamese,Filipino": 0.4034090909090909, + "English,Vietnamese,Spanish": 0.2215909090909091, + "English,Vietnamese,Malay": 0.32954545454545453, + "English,Chinese,Indonesian": 0.2215909090909091, + "English,Chinese,Filipino": 0.23863636363636365, + "English,Chinese,Spanish": 0.17045454545454544, + "English,Chinese,Malay": 0.23295454545454544, + "English,Indonesian,Filipino": 0.36363636363636365, + "English,Indonesian,Spanish": 0.19886363636363635, + "English,Indonesian,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.20454545454545456, + "English,Filipino,Malay": 0.39204545454545453, + "English,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "Vietnamese,Chinese,Filipino": 0.26136363636363635, + "Vietnamese,Chinese,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino": 0.3977272727272727, + "Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Malay": 0.36363636363636365, + "Vietnamese,Filipino,Spanish": 0.25, + "Vietnamese,Filipino,Malay": 0.4090909090909091, + "Vietnamese,Spanish,Malay": 0.2215909090909091, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.24431818181818182, + "Chinese,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish": 0.21022727272727273, + "Indonesian,Filipino,Malay": 0.42045454545454547, + "Indonesian,Spanish,Malay": 0.2215909090909091, + "Filipino,Spanish,Malay": 0.23295454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino": 0.16477272727272727, + "English,Vietnamese,Chinese,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "English,Vietnamese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Filipino,Malay": 0.26704545454545453, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Chinese,Filipino,Malay": 0.18181818181818182, + "English,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Indonesian,Filipino,Malay": 0.2727272727272727, + "English,Indonesian,Spanish,Malay": 0.1590909090909091, + "English,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.125, + "Vietnamese,Chinese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Malay": 0.125, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.37470903346438517, + "AC3_3": 0.28668822074432376, + "AC3_4": 0.21309425019490408, + "AC3_5": 0.15445477004920538, + "AC3_6": 0.11134393845959209, + "AC3_7": 0.07962104412257559 + }, + "prompt_4": { + "overall_acc": 0.31737012987012986, + "language_acc": { + "English": 0.3409090909090909, + "Vietnamese": 0.3125, + "Chinese": 0.3465909090909091, + "Indonesian": 0.2840909090909091, + "Filipino": 0.2727272727272727, + "Spanish": 0.36363636363636365, + "Malay": 0.30113636363636365 + }, + "consistency_score_2": 0.3912337662337662, + "consistency_score_3": 0.19837662337662335, + "consistency_score_4": 0.11444805194805195, + "consistency_score_5": 0.07278138528138525, + "consistency_score_6": 0.049512987012987016, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5454545454545454, + "English,Chinese": 0.38636363636363635, + "English,Indonesian": 0.3465909090909091, + "English,Filipino": 0.5965909090909091, + "English,Spanish": 0.4318181818181818, + "English,Malay": 0.38636363636363635, + "Vietnamese,Chinese": 0.38636363636363635, + "Vietnamese,Indonesian": 0.32386363636363635, + "Vietnamese,Filipino": 0.6079545454545454, + "Vietnamese,Spanish": 0.4375, + "Vietnamese,Malay": 0.39204545454545453, + "Chinese,Indonesian": 0.2727272727272727, + "Chinese,Filipino": 0.3522727272727273, + "Chinese,Spanish": 0.3977272727272727, + "Chinese,Malay": 0.36363636363636365, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,Spanish": 0.2556818181818182, + "Indonesian,Malay": 0.30113636363636365, + "Filipino,Spanish": 0.3409090909090909, + "Filipino,Malay": 0.38636363636363635, + "Spanish,Malay": 0.32386363636363635 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.24431818181818182, + "English,Vietnamese,Indonesian": 0.18181818181818182, + "English,Vietnamese,Filipino": 0.4090909090909091, + "English,Vietnamese,Spanish": 0.2897727272727273, + "English,Vietnamese,Malay": 0.24431818181818182, + "English,Chinese,Indonesian": 0.13068181818181818, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.23863636363636365, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.25, + "English,Indonesian,Spanish": 0.13068181818181818, + "English,Indonesian,Malay": 0.1590909090909091, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.2784090909090909, + "English,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "Vietnamese,Chinese,Filipino": 0.25, + "Vietnamese,Chinese,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Malay": 0.125, + "Vietnamese,Filipino,Spanish": 0.24431818181818182, + "Vietnamese,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Spanish": 0.125, + "Chinese,Indonesian,Malay": 0.11363636363636363, + "Chinese,Filipino,Spanish": 0.17045454545454544, + "Chinese,Filipino,Malay": 0.18181818181818182, + "Chinese,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish": 0.125, + "Indonesian,Filipino,Malay": 0.1590909090909091, + "Indonesian,Spanish,Malay": 0.09090909090909091, + "Filipino,Spanish,Malay": 0.17045454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish": 0.20454545454545456, + "English,Vietnamese,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Spanish,Malay": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Chinese,Indonesian,Malay": 0.07954545454545454, + "English,Chinese,Filipino,Spanish": 0.1534090909090909, + "English,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Indonesian,Spanish,Malay": 0.0625, + "English,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.3504522395780406, + "AC3_3": 0.24414623772496163, + "AC3_4": 0.16823003120799, + "AC3_5": 0.11840855050372122, + "AC3_6": 0.08566184918938563, + "AC3_7": 0.061568339264447304 + }, + "prompt_5": { + "overall_acc": 0.31737012987012986, + "language_acc": { + "English": 0.3125, + "Vietnamese": 0.3409090909090909, + "Chinese": 0.375, + "Indonesian": 0.2784090909090909, + "Filipino": 0.30113636363636365, + "Spanish": 0.3409090909090909, + "Malay": 0.2727272727272727 + }, + "consistency_score_2": 0.458603896103896, + "consistency_score_3": 0.2689935064935065, + "consistency_score_4": 0.16801948051948049, + "consistency_score_5": 0.10579004329004327, + "consistency_score_6": 0.06574675324675326, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.625, + "English,Chinese": 0.35795454545454547, + "English,Indonesian": 0.5284090909090909, + "English,Filipino": 0.6988636363636364, + "English,Spanish": 0.2840909090909091, + "English,Malay": 0.5625, + "Vietnamese,Chinese": 0.4659090909090909, + "Vietnamese,Indonesian": 0.5170454545454546, + "Vietnamese,Filipino": 0.7159090909090909, + "Vietnamese,Spanish": 0.29545454545454547, + "Vietnamese,Malay": 0.5738636363636364, + "Chinese,Indonesian": 0.3693181818181818, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,Spanish": 0.3352272727272727, + "Chinese,Malay": 0.38636363636363635, + "Indonesian,Filipino": 0.5965909090909091, + "Indonesian,Spanish": 0.2556818181818182, + "Indonesian,Malay": 0.5625, + "Filipino,Spanish": 0.23863636363636365, + "Filipino,Malay": 0.625, + "Spanish,Malay": 0.23863636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2840909090909091, + "English,Vietnamese,Indonesian": 0.3977272727272727, + "English,Vietnamese,Filipino": 0.5511363636363636, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.4431818181818182, + "English,Chinese,Indonesian": 0.1875, + "English,Chinese,Filipino": 0.2784090909090909, + "English,Chinese,Spanish": 0.10795454545454546, + "English,Chinese,Malay": 0.2556818181818182, + "English,Indonesian,Filipino": 0.44886363636363635, + "English,Indonesian,Spanish": 0.14772727272727273, + "English,Indonesian,Malay": 0.4090909090909091, + "English,Filipino,Spanish": 0.1875, + "English,Filipino,Malay": 0.4943181818181818, + "English,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian": 0.26136363636363635, + "Vietnamese,Chinese,Filipino": 0.3409090909090909, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.29545454545454547, + "Vietnamese,Indonesian,Filipino": 0.44886363636363635, + "Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Malay": 0.3977272727272727, + "Vietnamese,Filipino,Spanish": 0.1875, + "Vietnamese,Filipino,Malay": 0.5056818181818182, + "Vietnamese,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Indonesian,Spanish": 0.11931818181818182, + "Chinese,Indonesian,Malay": 0.22727272727272727, + "Chinese,Filipino,Spanish": 0.10795454545454546, + "Chinese,Filipino,Malay": 0.2897727272727273, + "Chinese,Spanish,Malay": 0.11931818181818182, + "Indonesian,Filipino,Spanish": 0.1534090909090909, + "Indonesian,Filipino,Malay": 0.4375, + "Indonesian,Spanish,Malay": 0.13636363636363635, + "Filipino,Spanish,Malay": 0.1534090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.23863636363636365, + "English,Vietnamese,Chinese,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.3522727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay": 0.32386363636363635, + "English,Vietnamese,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Filipino,Malay": 0.3977272727272727, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Malay": 0.16477272727272727, + "English,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Filipino,Malay": 0.2159090909090909, + "English,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Indonesian,Filipino,Malay": 0.3522727272727273, + "English,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.125, + "Vietnamese,Indonesian,Filipino,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.1875, + "Chinese,Indonesian,Spanish,Malay": 0.0625, + "Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2784090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Filipino,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.37513414927513616, + "AC3_3": 0.2911862154513398, + "AC3_4": 0.2197177821725159, + "AC3_5": 0.1586850648975649, + "AC3_6": 0.10892788352868883, + "AC3_7": 0.07068698345128202 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3786407766990291 + }, + "prompt_2": { + "accuracy": 0.4077669902912621 + }, + "prompt_3": { + "accuracy": 0.3883495145631068 + }, + "prompt_4": { + "accuracy": 0.46601941747572817 + }, + "prompt_5": { + "accuracy": 0.3883495145631068 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5142857142857142 + }, + "prompt_2": { + "accuracy": 0.3047619047619048 + }, + "prompt_3": { + "accuracy": 0.3904761904761905 + }, + "prompt_4": { + "accuracy": 0.5142857142857142 + }, + "prompt_5": { + "accuracy": 0.49523809523809526 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4392523364485981 + }, + "prompt_2": { + "accuracy": 0.34579439252336447 + }, + "prompt_3": { + "accuracy": 0.29906542056074764 + }, + "prompt_4": { + "accuracy": 0.3364485981308411 + }, + "prompt_5": { + "accuracy": 0.42990654205607476 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.1, + "demographics": 1.0, + "biology": 0.4, + "history": 0.2, + "literature": 0.3, + "politics": 0.6, + "culture": 0.5, + "film": 0.2, + "law": 0.4, + "geography": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.13333333333333333, + "literature": 0.1, + "politics": 0.5, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.4, + "culture": 0.4, + "film": 0.6, + "law": 0.2, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.5, + "culture": 0.3, + "film": 0.3, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.2, + "demographics": 0.6, + "biology": 0.5, + "history": 0.4, + "literature": 0.5, + "politics": 0.6, + "culture": 0.3, + "film": 0.1, + "law": 0.5, + "geography": 0.3 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.01634444388111461 + }, + "prompt_2": { + "bleu_score": 0.01662217356239316 + }, + "prompt_3": { + "bleu_score": 0.01430073296845697 + }, + "prompt_4": { + "bleu_score": 0.014849799746178166 + }, + "prompt_5": { + "bleu_score": 0.014231294874622916 + } }, "indommlu": { "prompt_1": -1, @@ -6482,179 +55191,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07810929577419859 + }, + "prompt_2": { + "bleu_score": 0.061754412763199165 + }, + "prompt_3": { + "bleu_score": 0.05331934507633124 + }, + "prompt_4": { + "bleu_score": 0.07592058403807185 + }, + "prompt_5": { + "bleu_score": 0.057939531759755594 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.03865466093688017 + }, + "prompt_2": { + "bleu_score": 0.01589931359462183 + }, + "prompt_3": { + "bleu_score": 0.014131706213244246 + }, + "prompt_4": { + "bleu_score": 0.03612950415095827 + }, + "prompt_5": { + "bleu_score": 0.018228041408202514 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10432881759658119 + }, + "prompt_2": { + "bleu_score": 0.052645611125843095 + }, + "prompt_3": { + "bleu_score": 0.038657835173347796 + }, + "prompt_4": { + "bleu_score": 0.046287164209048444 + }, + "prompt_5": { + "bleu_score": 0.11249867231409513 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.068365463195887 + }, + "prompt_2": { + "bleu_score": 0.046361491347707375 + }, + "prompt_3": { + "bleu_score": 0.04408434092353076 + }, + "prompt_4": { + "bleu_score": 0.06364848283241047 + }, + "prompt_5": { + "bleu_score": 0.048744137768210946 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3372228704784131 + }, + "prompt_2": { + "accuracy": 0.3418903150525088 + }, + "prompt_3": { + "accuracy": 0.33255542590431736 + }, + "prompt_4": { + "accuracy": 0.3687281213535589 + }, + "prompt_5": { + "accuracy": 0.38156359393232203 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3415087593850554, + "category_acc": { + "high_school_european_history": 0.4573170731707317, + "business_ethics": 0.36363636363636365, + "clinical_knowledge": 0.25757575757575757, + "medical_genetics": 0.3333333333333333, + "high_school_us_history": 0.43349753694581283, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.3940677966101695, + "virology": 0.28484848484848485, + "high_school_microeconomics": 0.3333333333333333, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.2828282828282828, + "high_school_biology": 0.40129449838187703, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.302491103202847, + "philosophy": 0.3935483870967742, + "professional_medicine": 0.2952029520295203, + "nutrition": 0.35081967213114756, + "global_facts": 0.26262626262626265, + "machine_learning": 0.2702702702702703, + "security_studies": 0.3442622950819672, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.3436988543371522, + "prehistory": 0.32507739938080493, + "anatomy": 0.3283582089552239, + "human_sexuality": 0.3769230769230769, + "college_medicine": 0.31976744186046513, + "high_school_government_and_politics": 0.3854166666666667, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.42592592592592593, + "high_school_geography": 0.3553299492385787, + "elementary_mathematics": 0.26790450928381965, + "human_aging": 0.33783783783783783, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.47610294117647056, + "formal_logic": 0.368, + "high_school_statistics": 0.3023255813953488, + "international_law": 0.25833333333333336, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.3333333333333333, + "conceptual_physics": 0.2863247863247863, + "miscellaneous": 0.48081841432225064, + "high_school_chemistry": 0.24257425742574257, + "marketing": 0.4592274678111588, + "professional_law": 0.2974559686888454, + "management": 0.39215686274509803, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.37383177570093457, + "world_religions": 0.5176470588235295, + "sociology": 0.455, + "us_foreign_policy": 0.37373737373737376, + "high_school_macroeconomics": 0.27249357326478146, + "computer_security": 0.42424242424242425, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.37681159420289856, + "electrical_engineering": 0.2569444444444444, + "astronomy": 0.25165562913907286, + "college_biology": 0.3356643356643357 + } + }, + "prompt_2": { + "accuracy": 0.34472649267071864, + "category_acc": { + "high_school_european_history": 0.38414634146341464, + "business_ethics": 0.36363636363636365, + "clinical_knowledge": 0.2840909090909091, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.3251231527093596, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.3983050847457627, + "virology": 0.296969696969697, + "high_school_microeconomics": 0.34177215189873417, + "econometrics": 0.21238938053097345, + "college_computer_science": 0.26262626262626265, + "high_school_biology": 0.37216828478964403, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.25622775800711745, + "philosophy": 0.4258064516129032, + "professional_medicine": 0.3210332103321033, + "nutrition": 0.3114754098360656, + "global_facts": 0.25252525252525254, + "machine_learning": 0.3333333333333333, + "security_studies": 0.35655737704918034, + "public_relations": 0.44954128440366975, + "professional_psychology": 0.3158756137479542, + "prehistory": 0.3591331269349845, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.34615384615384615, + "college_medicine": 0.3081395348837209, + "high_school_government_and_politics": 0.3697916666666667, + "college_chemistry": 0.2727272727272727, + "logical_fallacies": 0.37037037037037035, + "high_school_geography": 0.45685279187817257, + "elementary_mathematics": 0.2440318302387268, + "human_aging": 0.4099099099099099, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.4522058823529412, + "formal_logic": 0.288, + "high_school_statistics": 0.28837209302325584, + "international_law": 0.4583333333333333, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.29292929292929293, + "conceptual_physics": 0.4017094017094017, + "miscellaneous": 0.5690537084398977, + "high_school_chemistry": 0.29207920792079206, + "marketing": 0.3948497854077253, + "professional_law": 0.279191128506197, + "management": 0.4411764705882353, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.42990654205607476, + "world_religions": 0.5529411764705883, + "sociology": 0.42, + "us_foreign_policy": 0.32323232323232326, + "high_school_macroeconomics": 0.2827763496143959, + "computer_security": 0.3939393939393939, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.35942028985507246, + "electrical_engineering": 0.3958333333333333, + "astronomy": 0.3708609271523179, + "college_biology": 0.3706293706293706 + } + }, + "prompt_3": { + "accuracy": 0.3373614587057562, + "category_acc": { + "high_school_european_history": 0.36585365853658536, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.36742424242424243, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.32019704433497537, + "high_school_physics": 0.22666666666666666, + "high_school_world_history": 0.3644067796610169, + "virology": 0.28484848484848485, + "high_school_microeconomics": 0.29535864978902954, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.40129449838187703, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.32903225806451614, + "professional_medicine": 0.25830258302583026, + "nutrition": 0.3770491803278688, + "global_facts": 0.30303030303030304, + "machine_learning": 0.32432432432432434, + "security_studies": 0.32786885245901637, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.32242225859247137, + "prehistory": 0.3560371517027864, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.3153846153846154, + "college_medicine": 0.3372093023255814, + "high_school_government_and_politics": 0.3854166666666667, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.37037037037037035, + "high_school_geography": 0.3756345177664975, + "elementary_mathematics": 0.26790450928381965, + "human_aging": 0.3333333333333333, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.47610294117647056, + "formal_logic": 0.288, + "high_school_statistics": 0.26046511627906976, + "international_law": 0.35, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.3162393162393162, + "miscellaneous": 0.5447570332480819, + "high_school_chemistry": 0.24257425742574257, + "marketing": 0.38626609442060084, + "professional_law": 0.27723418134377037, + "management": 0.4803921568627451, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.5647058823529412, + "sociology": 0.36, + "us_foreign_policy": 0.41414141414141414, + "high_school_macroeconomics": 0.2647814910025707, + "computer_security": 0.35353535353535354, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.3710144927536232, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.3443708609271523, + "college_biology": 0.32867132867132864 + } + }, + "prompt_4": { + "accuracy": 0.3532356095816947, + "category_acc": { + "high_school_european_history": 0.4817073170731707, + "business_ethics": 0.30303030303030304, + "clinical_knowledge": 0.3068181818181818, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.49261083743842365, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.4576271186440678, + "virology": 0.28484848484848485, + "high_school_microeconomics": 0.3037974683544304, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.42394822006472493, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.27402135231316727, + "philosophy": 0.4161290322580645, + "professional_medicine": 0.3062730627306273, + "nutrition": 0.36065573770491804, + "global_facts": 0.24242424242424243, + "machine_learning": 0.27927927927927926, + "security_studies": 0.32786885245901637, + "public_relations": 0.4036697247706422, + "professional_psychology": 0.3911620294599018, + "prehistory": 0.38080495356037153, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.3081395348837209, + "high_school_government_and_politics": 0.4114583333333333, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.43209876543209874, + "high_school_geography": 0.3756345177664975, + "elementary_mathematics": 0.3076923076923077, + "human_aging": 0.38288288288288286, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.4724264705882353, + "formal_logic": 0.192, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.48333333333333334, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.3034188034188034, + "miscellaneous": 0.46930946291560105, + "high_school_chemistry": 0.3069306930693069, + "marketing": 0.4291845493562232, + "professional_law": 0.299412915851272, + "management": 0.3627450980392157, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.38317757009345793, + "world_religions": 0.5470588235294118, + "sociology": 0.505, + "us_foreign_policy": 0.41414141414141414, + "high_school_macroeconomics": 0.2802056555269923, + "computer_security": 0.4444444444444444, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.3739130434782609, + "electrical_engineering": 0.2777777777777778, + "astronomy": 0.31788079470198677, + "college_biology": 0.2937062937062937 + } + }, + "prompt_5": { + "accuracy": 0.3610296746514122, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.3106060606060606, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.41379310344827586, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.4194915254237288, + "virology": 0.3333333333333333, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.4045307443365696, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.3238434163701068, + "philosophy": 0.4161290322580645, + "professional_medicine": 0.2767527675276753, + "nutrition": 0.39672131147540984, + "global_facts": 0.2828282828282828, + "machine_learning": 0.32432432432432434, + "security_studies": 0.3442622950819672, + "public_relations": 0.3119266055045872, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.4117647058823529, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.3923076923076923, + "college_medicine": 0.313953488372093, + "high_school_government_and_politics": 0.4479166666666667, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.4382716049382716, + "high_school_geography": 0.39593908629441626, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.33783783783783783, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.4834558823529412, + "formal_logic": 0.24, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.475, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.3939393939393939, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.5370843989769821, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.4592274678111588, + "professional_law": 0.30528375733855184, + "management": 0.5, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.38317757009345793, + "world_religions": 0.5235294117647059, + "sociology": 0.47, + "us_foreign_policy": 0.48484848484848486, + "high_school_macroeconomics": 0.2853470437017995, + "computer_security": 0.42424242424242425, + "moral_scenarios": 0.22371364653243847, + "moral_disputes": 0.3855072463768116, + "electrical_engineering": 0.3263888888888889, + "astronomy": 0.40397350993377484, + "college_biology": 0.40559440559440557 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3863298662704309 + }, + "prompt_2": { + "accuracy": 0.262258543833581 + }, + "prompt_3": { + "accuracy": 0.2800891530460624 + }, + "prompt_4": { + "accuracy": 0.41901931649331353 + }, + "prompt_5": { + "accuracy": 0.3135215453194651 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.412826899128269, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.5, + "college_programming": 0.38095238095238093, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.14285714285714285, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.4166666666666667, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.36666666666666664, + "business_administration": 0.42105263157894735, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.6176470588235294, + "teacher_qualification": 0.5714285714285714, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.8823529411764706, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.37037037037037035, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.5526315789473685, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.23076923076923078, + "sports_science": 0.375, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.35185185185185186, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.2037037037037037, + "physician": 0.3888888888888889 + } + }, + "prompt_2": { + "accuracy": 0.27334993773349936, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.25, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.3333333333333333, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.125, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.31666666666666665, + "business_administration": 0.23684210526315788, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.25, + "high_school_geography": 0.25, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.2222222222222222, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.25, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.16, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.25, + "sports_science": 0.375, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.18518518518518517, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.1111111111111111, + "tax_accountant": 0.2777777777777778, + "physician": 0.2777777777777778 + } + }, + "prompt_3": { + "accuracy": 0.25840597758405975, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.23809523809523808, + "college_physics": 0.375, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.08, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.23333333333333334, + "business_administration": 0.2631578947368421, + "marxism": 0.5, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.125, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.058823529411764705, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.2962962962962963, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.36, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.3076923076923077, + "sports_science": 0.3333333333333333, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2222222222222222, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2222222222222222, + "physician": 0.18518518518518517 + } + }, + "prompt_4": { + "accuracy": 0.4246575342465753, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.4523809523809524, + "college_physics": 0.25, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.4, + "business_administration": 0.34210526315789475, + "marxism": 0.6666666666666666, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.5, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.37037037037037035, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.5526315789473685, + "professional_tour_guide": 0.6470588235294118, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.25, + "high_school_history": 0.48, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.25, + "sports_science": 0.4583333333333333, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.3148148148148148, + "physician": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.31320049813200496, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.5, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.3333333333333333, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.375, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.5, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.31666666666666665, + "business_administration": 0.2894736842105263, + "marxism": 0.25, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.2222222222222222, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.5, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.5, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.32, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.3076923076923077, + "sports_science": 0.375, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.35185185185185186, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2037037037037037, + "physician": 0.2962962962962963 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.44802867383512546 + }, + "prompt_2": { + "accuracy": 0.27956989247311825 + }, + "prompt_3": { + "accuracy": 0.2939068100358423 + }, + "prompt_4": { + "accuracy": 0.44802867383512546 + }, + "prompt_5": { + "accuracy": 0.34408602150537637 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42479709894664136, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.3581081081081081, + "ancient_chinese": 0.3353658536585366, + "arts": 0.725, + "astronomy": 0.34545454545454546, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.5572519083969466, + "chinese_food_culture": 0.4264705882352941, + "chinese_foreign_policy": 0.40186915887850466, + "chinese_history": 0.3684210526315789, + "chinese_literature": 0.5294117647058824, + "chinese_teacher_qualification": 0.5754189944134078, + "clinical_knowledge": 0.379746835443038, + "college_actuarial_science": 0.3490566037735849, + "college_education": 0.5233644859813084, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.39814814814814814, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.42857142857142855, + "computer_science": 0.49019607843137253, + "computer_security": 0.4152046783625731, + "conceptual_physics": 0.4217687074829932, + "construction_project_management": 0.381294964028777, + "economics": 0.44654088050314467, + "education": 0.50920245398773, + "electrical_engineering": 0.38953488372093026, + "elementary_chinese": 0.5436507936507936, + "elementary_commonsense": 0.4696969696969697, + "elementary_information_and_technology": 0.5714285714285714, + "elementary_mathematics": 0.3, + "ethnology": 0.45185185185185184, + "food_science": 0.46153846153846156, + "genetics": 0.3693181818181818, + "global_facts": 0.436241610738255, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.4661016949152542, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.40559440559440557, + "human_sexuality": 0.48412698412698413, + "international_law": 0.34594594594594597, + "journalism": 0.38953488372093026, + "jurisprudence": 0.3746958637469586, + "legal_and_moral_basis": 0.6728971962616822, + "logical": 0.4146341463414634, + "machine_learning": 0.4016393442622951, + "management": 0.4857142857142857, + "marketing": 0.5166666666666667, + "marxist_theory": 0.5396825396825397, + "modern_chinese": 0.3879310344827586, + "nutrition": 0.4068965517241379, + "philosophy": 0.5904761904761905, + "professional_accounting": 0.46285714285714286, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.4870689655172414, + "public_relations": 0.40229885057471265, + "security_study": 0.5111111111111111, + "sociology": 0.5, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.4, + "virology": 0.3727810650887574, + "world_history": 0.391304347826087, + "world_religions": 0.5375 + } + }, + "prompt_2": { + "accuracy": 0.2725781384907615, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.29878048780487804, + "arts": 0.425, + "astronomy": 0.2545454545454545, + "business_ethics": 0.22488038277511962, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.183206106870229, + "chinese_food_culture": 0.33088235294117646, + "chinese_foreign_policy": 0.3364485981308411, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.3284313725490196, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.24050632911392406, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.2336448598130841, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.29304029304029305, + "computer_science": 0.29901960784313725, + "computer_security": 0.25146198830409355, + "conceptual_physics": 0.23809523809523808, + "construction_project_management": 0.2733812949640288, + "economics": 0.2641509433962264, + "education": 0.4171779141104294, + "electrical_engineering": 0.22093023255813954, + "elementary_chinese": 0.4523809523809524, + "elementary_commonsense": 0.2777777777777778, + "elementary_information_and_technology": 0.35294117647058826, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.22962962962962963, + "food_science": 0.27972027972027974, + "genetics": 0.26136363636363635, + "global_facts": 0.2214765100671141, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.20454545454545456, + "high_school_geography": 0.1864406779661017, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.21428571428571427, + "international_law": 0.1891891891891892, + "journalism": 0.29069767441860467, + "jurisprudence": 0.2749391727493917, + "legal_and_moral_basis": 0.2757009345794392, + "logical": 0.2601626016260163, + "machine_learning": 0.18032786885245902, + "management": 0.2857142857142857, + "marketing": 0.2611111111111111, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2482758620689655, + "philosophy": 0.22857142857142856, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.25862068965517243, + "public_relations": 0.21839080459770116, + "security_study": 0.2962962962962963, + "sociology": 0.24778761061946902, + "sports_science": 0.32727272727272727, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.2603550295857988, + "world_history": 0.2732919254658385, + "world_religions": 0.36875 + } + }, + "prompt_3": { + "accuracy": 0.2737869107235365, + "category_acc": { + "agronomy": 0.23668639053254437, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.25, + "arts": 0.40625, + "astronomy": 0.2727272727272727, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.22137404580152673, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.2777777777777778, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.23076923076923078, + "computer_science": 0.3284313725490196, + "computer_security": 0.24561403508771928, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.30935251798561153, + "economics": 0.23270440251572327, + "education": 0.2883435582822086, + "electrical_engineering": 0.31976744186046513, + "elementary_chinese": 0.39285714285714285, + "elementary_commonsense": 0.32323232323232326, + "elementary_information_and_technology": 0.3445378151260504, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.2962962962962963, + "food_science": 0.2727272727272727, + "genetics": 0.2556818181818182, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.20118343195266272, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.18902439024390244, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.23809523809523808, + "international_law": 0.2972972972972973, + "journalism": 0.2558139534883721, + "jurisprudence": 0.2871046228710462, + "legal_and_moral_basis": 0.32710280373831774, + "logical": 0.2764227642276423, + "machine_learning": 0.19672131147540983, + "management": 0.23333333333333334, + "marketing": 0.3055555555555556, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2413793103448276, + "philosophy": 0.22857142857142856, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.23138297872340424, + "professional_psychology": 0.2672413793103448, + "public_relations": 0.26436781609195403, + "security_study": 0.32592592592592595, + "sociology": 0.26991150442477874, + "sports_science": 0.2606060606060606, + "traditional_chinese_medicine": 0.3027027027027027, + "virology": 0.21301775147928995, + "world_history": 0.3105590062111801, + "world_religions": 0.3625 + } + }, + "prompt_4": { + "accuracy": 0.43360386807114487, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.3108108108108108, + "ancient_chinese": 0.31097560975609756, + "arts": 0.75625, + "astronomy": 0.34545454545454546, + "business_ethics": 0.37799043062200954, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.6183206106870229, + "chinese_food_culture": 0.49264705882352944, + "chinese_foreign_policy": 0.4672897196261682, + "chinese_history": 0.47987616099071206, + "chinese_literature": 0.553921568627451, + "chinese_teacher_qualification": 0.5307262569832403, + "clinical_knowledge": 0.3459915611814346, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.5233644859813084, + "college_engineering_hydrology": 0.42452830188679247, + "college_law": 0.35185185185185186, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.3516483516483517, + "computer_science": 0.46078431372549017, + "computer_security": 0.4093567251461988, + "conceptual_physics": 0.40816326530612246, + "construction_project_management": 0.28776978417266186, + "economics": 0.49056603773584906, + "education": 0.4785276073619632, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.5277777777777778, + "elementary_commonsense": 0.51010101010101, + "elementary_information_and_technology": 0.592436974789916, + "elementary_mathematics": 0.3130434782608696, + "ethnology": 0.5259259259259259, + "food_science": 0.40559440559440557, + "genetics": 0.3977272727272727, + "global_facts": 0.4966442953020134, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.4152542372881356, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.3776223776223776, + "human_sexuality": 0.42063492063492064, + "international_law": 0.3621621621621622, + "journalism": 0.4941860465116279, + "jurisprudence": 0.39902676399026765, + "legal_and_moral_basis": 0.7242990654205608, + "logical": 0.3902439024390244, + "machine_learning": 0.3524590163934426, + "management": 0.5333333333333333, + "marketing": 0.4777777777777778, + "marxist_theory": 0.5343915343915344, + "modern_chinese": 0.3448275862068966, + "nutrition": 0.45517241379310347, + "philosophy": 0.5523809523809524, + "professional_accounting": 0.49142857142857144, + "professional_law": 0.32701421800947866, + "professional_medicine": 0.2978723404255319, + "professional_psychology": 0.5387931034482759, + "public_relations": 0.46551724137931033, + "security_study": 0.5037037037037037, + "sociology": 0.4469026548672566, + "sports_science": 0.46060606060606063, + "traditional_chinese_medicine": 0.35135135135135137, + "virology": 0.40236686390532544, + "world_history": 0.45962732919254656, + "world_religions": 0.60625 + } + }, + "prompt_5": { + "accuracy": 0.3248143671213953, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.27439024390243905, + "arts": 0.55625, + "astronomy": 0.3090909090909091, + "business_ethics": 0.21052631578947367, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.3893129770992366, + "chinese_food_culture": 0.4117647058823529, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.3622291021671827, + "chinese_literature": 0.39705882352941174, + "chinese_teacher_qualification": 0.4022346368715084, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.23148148148148148, + "college_mathematics": 0.34285714285714286, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.23443223443223443, + "computer_science": 0.37745098039215685, + "computer_security": 0.3157894736842105, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.302158273381295, + "economics": 0.3081761006289308, + "education": 0.3496932515337423, + "electrical_engineering": 0.23837209302325582, + "elementary_chinese": 0.5357142857142857, + "elementary_commonsense": 0.35858585858585856, + "elementary_information_and_technology": 0.4117647058823529, + "elementary_mathematics": 0.32608695652173914, + "ethnology": 0.362962962962963, + "food_science": 0.3986013986013986, + "genetics": 0.2727272727272727, + "global_facts": 0.40268456375838924, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.3898305084745763, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.2777777777777778, + "international_law": 0.31891891891891894, + "journalism": 0.36627906976744184, + "jurisprudence": 0.35766423357664234, + "legal_and_moral_basis": 0.4158878504672897, + "logical": 0.22764227642276422, + "machine_learning": 0.3114754098360656, + "management": 0.3333333333333333, + "marketing": 0.3277777777777778, + "marxist_theory": 0.25925925925925924, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.2689655172413793, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.28, + "professional_law": 0.26066350710900477, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.3922413793103448, + "public_relations": 0.3045977011494253, + "security_study": 0.3111111111111111, + "sociology": 0.3185840707964602, + "sports_science": 0.3090909090909091, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.26627218934911245, + "world_history": 0.37267080745341613, + "world_religions": 0.46875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30303030303030304 + }, + "prompt_2": { + "accuracy": 0.2727272727272727 + }, + "prompt_3": { + "accuracy": 0.18181818181818182 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.3333333333333333 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18636363636363637 + }, + "prompt_2": { + "accuracy": 0.19772727272727272 + }, + "prompt_3": { + "accuracy": 0.175 + }, + "prompt_4": { + "accuracy": 0.2409090909090909 + }, + "prompt_5": { + "accuracy": 0.30227272727272725 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.31728813559322033 + }, + "prompt_2": { + "accuracy": 0.3335593220338983 + }, + "prompt_3": { + "accuracy": 0.35491525423728815 + }, + "prompt_4": { + "accuracy": 0.3410169491525424 + }, + "prompt_5": { + "accuracy": 0.32372881355932204 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6645474943904264 + }, + "prompt_2": { + "accuracy": 0.450261780104712 + }, + "prompt_3": { + "accuracy": 0.3612565445026178 + }, + "prompt_4": { + "accuracy": 0.6578160059835453 + }, + "prompt_5": { + "accuracy": 0.4525056095736724 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5242528172464478 + }, + "prompt_2": { + "accuracy": 0.4781969622733954 + }, + "prompt_3": { + "accuracy": 0.5923566878980892 + }, + "prompt_4": { + "accuracy": 0.5570798628123469 + }, + "prompt_5": { + "accuracy": 0.4977951984321411 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.12933706932140868, + "rouge2": 0.012207361661244972, + "rougeL": 0.08526526953428971, + "avg_rouge": 0.0756032335056478 + }, + "prompt_2": { + "rouge1": 0.13016811564594952, + "rouge2": 0.01214413167508352, + "rougeL": 0.08717258955549251, + "avg_rouge": 0.07649494562550851 + }, + "prompt_3": { + "rouge1": 0.13117867342187914, + "rouge2": 0.012449838080703138, + "rougeL": 0.08823936032809573, + "avg_rouge": 0.077289290610226 + }, + "prompt_4": { + "rouge1": 0.12115514469567917, + "rouge2": 0.01157424935126332, + "rougeL": 0.07908959607732913, + "avg_rouge": 0.07060633004142387 + }, + "prompt_5": { + "rouge1": 0.14321997401693895, + "rouge2": 0.018268063817769537, + "rougeL": 0.10068806932371115, + "avg_rouge": 0.08739203571947322 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.07832483411444602, + "rouge2": 0.005988932397215633, + "rougeL": 0.06069709779687164, + "avg_rouge": 0.0483369547695111 + }, + "prompt_2": { + "rouge1": 0.08840968599027747, + "rouge2": 0.007847027833629934, + "rougeL": 0.06665337777140212, + "avg_rouge": 0.05430336386510318 + }, + "prompt_3": { + "rouge1": 0.07807049229756861, + "rouge2": 0.0070257470027871504, + "rougeL": 0.05822044489757202, + "avg_rouge": 0.047772228065975926 + }, + "prompt_4": { + "rouge1": 0.09461172251538741, + "rouge2": 0.010162988565801313, + "rougeL": 0.07116584463738221, + "avg_rouge": 0.05864685190619031 + }, + "prompt_5": { + "rouge1": 0.08925434275789769, + "rouge2": 0.011234932681211458, + "rougeL": 0.07260172083837872, + "avg_rouge": 0.05769699875916262 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5963302752293578 + }, + "prompt_2": { + "accuracy": 0.694954128440367 + }, + "prompt_3": { + "accuracy": 0.7912844036697247 + }, + "prompt_4": { + "accuracy": 0.6685779816513762 + }, + "prompt_5": { + "accuracy": 0.5240825688073395 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32502396931927136 + }, + "prompt_2": { + "accuracy": 0.46021093000958774 + }, + "prompt_3": { + "accuracy": 0.4956855225311601 + }, + "prompt_4": { + "accuracy": 0.36145733461169705 + }, + "prompt_5": { + "accuracy": 0.4813039309683605 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5045 + }, + "prompt_2": { + "accuracy": 0.516 + }, + "prompt_3": { + "accuracy": 0.515 + }, + "prompt_4": { + "accuracy": 0.4945 + }, + "prompt_5": { + "accuracy": 0.485 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3755 + }, + "prompt_2": { + "accuracy": 0.347 + }, + "prompt_3": { + "accuracy": 0.3675 + }, + "prompt_4": { + "accuracy": 0.341 + }, + "prompt_5": { + "accuracy": 0.3465 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.527 + }, + "prompt_2": { + "accuracy": 0.5035 + }, + "prompt_3": { + "accuracy": 0.497 + }, + "prompt_4": { + "accuracy": 0.5115 + }, + "prompt_5": { + "accuracy": 0.521 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6056338028169014 + }, + "prompt_2": { + "accuracy": 0.5070422535211268 + }, + "prompt_3": { + "accuracy": 0.5211267605633803 + }, + "prompt_4": { + "accuracy": 0.4647887323943662 + }, + "prompt_5": { + "accuracy": 0.5492957746478874 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48014440433212996 + }, + "prompt_2": { + "accuracy": 0.5126353790613718 + }, + "prompt_3": { + "accuracy": 0.5379061371841155 + }, + "prompt_4": { + "accuracy": 0.5018050541516246 + }, + "prompt_5": { + "accuracy": 0.5126353790613718 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5318627450980392 + }, + "prompt_2": { + "accuracy": 0.5073529411764706 + }, + "prompt_3": { + "accuracy": 0.5024509803921569 + }, + "prompt_4": { + "accuracy": 0.4950980392156863 + }, + "prompt_5": { + "accuracy": 0.4803921568627451 + } } }, "five_shot": { @@ -6764,55 +56663,1735 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan-13B-Chat", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3704761904761904, + "language_acc": { + "Malay": 0.2866666666666667, + "English": 0.52, + "Vietnamese": 0.21333333333333335, + "Spanish": 0.4, + "Indonesian": 0.2866666666666667, + "Filipino": 0.34, + "Chinese": 0.5466666666666666 + }, + "consistency_score_2": 0.33269841269841277, + "consistency_score_3": 0.13904761904761903, + "consistency_score_4": 0.06933333333333333, + "consistency_score_5": 0.040634920634920635, + "consistency_score_6": 0.026666666666666665, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38666666666666666, + "Malay,Vietnamese": 0.30666666666666664, + "Malay,Spanish": 0.31333333333333335, + "Malay,Indonesian": 0.32, + "Malay,Filipino": 0.34, + "Malay,Chinese": 0.2866666666666667, + "English,Vietnamese": 0.24666666666666667, + "English,Spanish": 0.42, + "English,Indonesian": 0.36, + "English,Filipino": 0.32, + "English,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.2866666666666667, + "Vietnamese,Indonesian": 0.28, + "Vietnamese,Filipino": 0.2733333333333333, + "Vietnamese,Chinese": 0.24666666666666667, + "Spanish,Indonesian": 0.36, + "Spanish,Filipino": 0.38, + "Spanish,Chinese": 0.44, + "Indonesian,Filipino": 0.2866666666666667, + "Indonesian,Chinese": 0.32, + "Filipino,Chinese": 0.26 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.11333333333333333, + "Malay,English,Spanish": 0.18666666666666668, + "Malay,English,Indonesian": 0.16666666666666666, + "Malay,English,Filipino": 0.15333333333333332, + "Malay,English,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish": 0.12666666666666668, + "Malay,Vietnamese,Indonesian": 0.12, + "Malay,Vietnamese,Filipino": 0.14, + "Malay,Vietnamese,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian": 0.16, + "Malay,Spanish,Filipino": 0.14666666666666667, + "Malay,Spanish,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino": 0.12, + "Malay,Indonesian,Chinese": 0.12, + "Malay,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish": 0.12, + "English,Vietnamese,Indonesian": 0.09333333333333334, + "English,Vietnamese,Filipino": 0.1, + "English,Vietnamese,Chinese": 0.12, + "English,Spanish,Indonesian": 0.19333333333333333, + "English,Spanish,Filipino": 0.16, + "English,Spanish,Chinese": 0.3, + "English,Indonesian,Filipino": 0.12, + "English,Indonesian,Chinese": 0.24, + "English,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "Vietnamese,Spanish,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Vietnamese,Indonesian,Filipino": 0.1, + "Vietnamese,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Spanish,Indonesian,Filipino": 0.14666666666666667, + "Spanish,Indonesian,Chinese": 0.18666666666666668, + "Spanish,Filipino,Chinese": 0.16, + "Indonesian,Filipino,Chinese": 0.08 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.06, + "Malay,English,Vietnamese,Filipino": 0.06, + "Malay,English,Vietnamese,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian": 0.1, + "Malay,English,Spanish,Filipino": 0.08, + "Malay,English,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino": 0.06, + "Malay,English,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.06, + "Malay,Vietnamese,Indonesian,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.03333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.1, + "Malay,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.04, + "English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino": 0.06666666666666667, + "English,Spanish,Indonesian,Chinese": 0.16, + "English,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.04, + "Malay,English,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.04, + "Malay,English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.06, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + } + }, + "AC3_2": 0.3505725034436411, + "AC3_3": 0.20220382728563774, + "AC3_4": 0.11680669839988873, + "AC3_5": 0.07323699207632055, + "AC3_6": 0.04975219822887934, + "AC3_7": 0.03795121950247591 + }, + "prompt_2": { + "overall_acc": 0.36095238095238097, + "language_acc": { + "Malay": 0.22666666666666666, + "English": 0.43333333333333335, + "Vietnamese": 0.26666666666666666, + "Spanish": 0.44666666666666666, + "Indonesian": 0.30666666666666664, + "Filipino": 0.29333333333333333, + "Chinese": 0.5533333333333333 + }, + "consistency_score_2": 0.3060317460317461, + "consistency_score_3": 0.11752380952380952, + "consistency_score_4": 0.05638095238095238, + "consistency_score_5": 0.03333333333333333, + "consistency_score_6": 0.023809523809523808, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.26, + "Malay,Vietnamese": 0.24, + "Malay,Spanish": 0.2866666666666667, + "Malay,Indonesian": 0.34, + "Malay,Filipino": 0.2866666666666667, + "Malay,Chinese": 0.26, + "English,Vietnamese": 0.34, + "English,Spanish": 0.4533333333333333, + "English,Indonesian": 0.29333333333333333, + "English,Filipino": 0.22, + "English,Chinese": 0.5066666666666667, + "Vietnamese,Spanish": 0.28, + "Vietnamese,Indonesian": 0.24666666666666667, + "Vietnamese,Filipino": 0.2733333333333333, + "Vietnamese,Chinese": 0.24, + "Spanish,Indonesian": 0.2866666666666667, + "Spanish,Filipino": 0.26, + "Spanish,Chinese": 0.43333333333333335, + "Indonesian,Filipino": 0.32, + "Indonesian,Chinese": 0.32666666666666666, + "Filipino,Chinese": 0.2733333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.09333333333333334, + "Malay,English,Spanish": 0.10666666666666667, + "Malay,English,Indonesian": 0.12666666666666668, + "Malay,English,Filipino": 0.06666666666666667, + "Malay,English,Chinese": 0.14, + "Malay,Vietnamese,Spanish": 0.09333333333333334, + "Malay,Vietnamese,Indonesian": 0.09333333333333334, + "Malay,Vietnamese,Filipino": 0.10666666666666667, + "Malay,Vietnamese,Chinese": 0.05333333333333334, + "Malay,Spanish,Indonesian": 0.11333333333333333, + "Malay,Spanish,Filipino": 0.10666666666666667, + "Malay,Spanish,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino": 0.10666666666666667, + "Malay,Indonesian,Chinese": 0.11333333333333333, + "Malay,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish": 0.14666666666666667, + "English,Vietnamese,Indonesian": 0.10666666666666667, + "English,Vietnamese,Filipino": 0.09333333333333334, + "English,Vietnamese,Chinese": 0.15333333333333332, + "English,Spanish,Indonesian": 0.17333333333333334, + "English,Spanish,Filipino": 0.11333333333333333, + "English,Spanish,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.09333333333333334, + "English,Indonesian,Chinese": 0.2, + "English,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian": 0.09333333333333334, + "Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Chinese": 0.1, + "Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Chinese": 0.07333333333333333, + "Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Spanish,Indonesian,Filipino": 0.10666666666666667, + "Spanish,Indonesian,Chinese": 0.17333333333333334, + "Spanish,Filipino,Chinese": 0.14666666666666667, + "Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04, + "Malay,English,Vietnamese,Indonesian": 0.05333333333333334, + "Malay,English,Vietnamese,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Chinese": 0.04, + "Malay,English,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Spanish,Filipino": 0.04, + "Malay,English,Spanish,Chinese": 0.06666666666666667, + "Malay,English,Indonesian,Filipino": 0.04, + "Malay,English,Indonesian,Chinese": 0.08, + "Malay,English,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.04, + "Malay,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.04, + "Malay,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian": 0.07333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.05333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.06, + "English,Spanish,Indonesian,Filipino": 0.06, + "English,Spanish,Indonesian,Chinese": 0.12666666666666668, + "English,Spanish,Filipino,Chinese": 0.08, + "English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Vietnamese,Spanish,Indonesian,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.02, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.03333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + } + }, + "AC3_2": 0.331230933066089, + "AC3_3": 0.17731498328110976, + "AC3_4": 0.09752798241561265, + "AC3_5": 0.06103059579772573, + "AC3_6": 0.04467232436368429, + "AC3_7": 0.037899999990051246 + }, + "prompt_3": { + "overall_acc": 0.379047619047619, + "language_acc": { + "Malay": 0.29333333333333333, + "English": 0.4666666666666667, + "Vietnamese": 0.29333333333333333, + "Spanish": 0.44666666666666666, + "Indonesian": 0.32, + "Filipino": 0.3, + "Chinese": 0.5333333333333333 + }, + "consistency_score_2": 0.31269841269841275, + "consistency_score_3": 0.11942857142857143, + "consistency_score_4": 0.05447619047619047, + "consistency_score_5": 0.02857142857142857, + "consistency_score_6": 0.017142857142857144, + "consistency_score_7": 0.013333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.23333333333333334, + "Malay,Vietnamese": 0.24666666666666667, + "Malay,Spanish": 0.30666666666666664, + "Malay,Indonesian": 0.26666666666666666, + "Malay,Filipino": 0.29333333333333333, + "Malay,Chinese": 0.2866666666666667, + "English,Vietnamese": 0.28, + "English,Spanish": 0.42, + "English,Indonesian": 0.3333333333333333, + "English,Filipino": 0.35333333333333333, + "English,Chinese": 0.44666666666666666, + "Vietnamese,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian": 0.28, + "Vietnamese,Filipino": 0.29333333333333333, + "Vietnamese,Chinese": 0.31333333333333335, + "Spanish,Indonesian": 0.32, + "Spanish,Filipino": 0.28, + "Spanish,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.31333333333333335, + "Indonesian,Chinese": 0.26666666666666666, + "Filipino,Chinese": 0.32666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.06666666666666667, + "Malay,English,Spanish": 0.12666666666666668, + "Malay,English,Indonesian": 0.08, + "Malay,English,Filipino": 0.11333333333333333, + "Malay,English,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish": 0.11333333333333333, + "Malay,Vietnamese,Indonesian": 0.08666666666666667, + "Malay,Vietnamese,Filipino": 0.08666666666666667, + "Malay,Vietnamese,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian": 0.09333333333333334, + "Malay,Spanish,Filipino": 0.08, + "Malay,Spanish,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino": 0.12666666666666668, + "Malay,Indonesian,Chinese": 0.06, + "Malay,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish": 0.13333333333333333, + "English,Vietnamese,Indonesian": 0.1, + "English,Vietnamese,Filipino": 0.12666666666666668, + "English,Vietnamese,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian": 0.15333333333333332, + "English,Spanish,Filipino": 0.16, + "English,Spanish,Chinese": 0.25333333333333335, + "English,Indonesian,Filipino": 0.16666666666666666, + "English,Indonesian,Chinese": 0.16666666666666666, + "English,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Filipino,Chinese": 0.1, + "Spanish,Indonesian,Filipino": 0.11333333333333333, + "Spanish,Indonesian,Chinese": 0.12666666666666668, + "Spanish,Filipino,Chinese": 0.12, + "Indonesian,Filipino,Chinese": 0.12 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04, + "Malay,English,Vietnamese,Indonesian": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Chinese": 0.04, + "Malay,English,Spanish,Indonesian": 0.04, + "Malay,English,Spanish,Filipino": 0.06, + "Malay,English,Spanish,Chinese": 0.08666666666666667, + "Malay,English,Indonesian,Filipino": 0.06, + "Malay,English,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.04, + "Malay,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.02, + "Malay,Vietnamese,Filipino,Chinese": 0.03333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.04, + "English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.06, + "English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino": 0.08, + "English,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Spanish,Filipino,Chinese": 0.1, + "English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "English,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + } + }, + "AC3_2": 0.3426910552619834, + "AC3_3": 0.1816300016012069, + "AC3_4": 0.09526152813937863, + "AC3_5": 0.05313751667588248, + "AC3_6": 0.0328021977939184, + "AC3_7": 0.025760517792787575 + }, + "prompt_4": { + "overall_acc": 0.3923809523809524, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.5, + "Vietnamese": 0.30666666666666664, + "Spanish": 0.42, + "Indonesian": 0.3466666666666667, + "Filipino": 0.2866666666666667, + "Chinese": 0.5733333333333334 + }, + "consistency_score_2": 0.34285714285714286, + "consistency_score_3": 0.15809523809523815, + "consistency_score_4": 0.09409523809523809, + "consistency_score_5": 0.06634920634920635, + "consistency_score_6": 0.05047619047619047, + "consistency_score_7": 0.04, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.32, + "Malay,Vietnamese": 0.32, + "Malay,Spanish": 0.38666666666666666, + "Malay,Indonesian": 0.30666666666666664, + "Malay,Filipino": 0.29333333333333333, + "Malay,Chinese": 0.36666666666666664, + "English,Vietnamese": 0.36, + "English,Spanish": 0.38, + "English,Indonesian": 0.36, + "English,Filipino": 0.37333333333333335, + "English,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.34, + "Vietnamese,Indonesian": 0.2866666666666667, + "Vietnamese,Filipino": 0.30666666666666664, + "Vietnamese,Chinese": 0.3, + "Spanish,Indonesian": 0.25333333333333335, + "Spanish,Filipino": 0.32, + "Spanish,Chinese": 0.42, + "Indonesian,Filipino": 0.24666666666666667, + "Indonesian,Chinese": 0.35333333333333333, + "Filipino,Chinese": 0.35333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.15333333333333332, + "Malay,English,Spanish": 0.19333333333333333, + "Malay,English,Indonesian": 0.13333333333333333, + "Malay,English,Filipino": 0.14666666666666667, + "Malay,English,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish": 0.17333333333333334, + "Malay,Vietnamese,Indonesian": 0.12666666666666668, + "Malay,Vietnamese,Filipino": 0.12, + "Malay,Vietnamese,Chinese": 0.14666666666666667, + "Malay,Spanish,Indonesian": 0.14, + "Malay,Spanish,Filipino": 0.14, + "Malay,Spanish,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino": 0.09333333333333334, + "Malay,Indonesian,Chinese": 0.17333333333333334, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.15333333333333332, + "English,Vietnamese,Indonesian": 0.14666666666666667, + "English,Vietnamese,Filipino": 0.16, + "English,Vietnamese,Chinese": 0.2, + "English,Spanish,Indonesian": 0.14, + "English,Spanish,Filipino": 0.19333333333333333, + "English,Spanish,Chinese": 0.2866666666666667, + "English,Indonesian,Filipino": 0.13333333333333333, + "English,Indonesian,Chinese": 0.22, + "English,Filipino,Chinese": 0.24666666666666667, + "Vietnamese,Spanish,Indonesian": 0.1, + "Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino": 0.11333333333333333, + "Spanish,Indonesian,Chinese": 0.15333333333333332, + "Spanish,Filipino,Chinese": 0.18, + "Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.08666666666666667, + "Malay,English,Vietnamese,Filipino": 0.08, + "Malay,English,Vietnamese,Chinese": 0.12, + "Malay,English,Spanish,Indonesian": 0.1, + "Malay,English,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.07333333333333333, + "Malay,English,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.06, + "Malay,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.08, + "English,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.1, + "English,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino": 0.08, + "English,Spanish,Indonesian,Chinese": 0.12, + "English,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.06, + "Malay,English,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + } + }, + "AC3_2": 0.3659511472485244, + "AC3_3": 0.22538144665538262, + "AC3_4": 0.15179028225242405, + "AC3_5": 0.11350535505846637, + "AC3_6": 0.08944598052255726, + "AC3_7": 0.07259911892594073 + }, + "prompt_5": { + "overall_acc": 0.3733333333333334, + "language_acc": { + "Malay": 0.2866666666666667, + "English": 0.5266666666666666, + "Vietnamese": 0.26, + "Spanish": 0.4266666666666667, + "Indonesian": 0.29333333333333333, + "Filipino": 0.26, + "Chinese": 0.56 + }, + "consistency_score_2": 0.33587301587301577, + "consistency_score_3": 0.14438095238095236, + "consistency_score_4": 0.07485714285714289, + "consistency_score_5": 0.04603174603174602, + "consistency_score_6": 0.03333333333333334, + "consistency_score_7": 0.02666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.29333333333333333, + "Malay,Vietnamese": 0.32, + "Malay,Spanish": 0.2866666666666667, + "Malay,Indonesian": 0.3466666666666667, + "Malay,Filipino": 0.3333333333333333, + "Malay,Chinese": 0.35333333333333333, + "English,Vietnamese": 0.28, + "English,Spanish": 0.44, + "English,Indonesian": 0.29333333333333333, + "English,Filipino": 0.3, + "English,Chinese": 0.5266666666666666, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.25333333333333335, + "Vietnamese,Filipino": 0.26666666666666666, + "Vietnamese,Chinese": 0.34, + "Spanish,Indonesian": 0.34, + "Spanish,Filipino": 0.25333333333333335, + "Spanish,Chinese": 0.5, + "Indonesian,Filipino": 0.2733333333333333, + "Indonesian,Chinese": 0.3333333333333333, + "Filipino,Chinese": 0.32666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.1, + "Malay,English,Spanish": 0.15333333333333332, + "Malay,English,Indonesian": 0.11333333333333333, + "Malay,English,Filipino": 0.13333333333333333, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.12, + "Malay,Vietnamese,Indonesian": 0.12, + "Malay,Vietnamese,Filipino": 0.12, + "Malay,Vietnamese,Chinese": 0.14, + "Malay,Spanish,Indonesian": 0.12666666666666668, + "Malay,Spanish,Filipino": 0.11333333333333333, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino": 0.11333333333333333, + "Malay,Indonesian,Chinese": 0.15333333333333332, + "Malay,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish": 0.18, + "English,Vietnamese,Indonesian": 0.08666666666666667, + "English,Vietnamese,Filipino": 0.09333333333333334, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian": 0.17333333333333334, + "English,Spanish,Filipino": 0.14, + "English,Spanish,Chinese": 0.34, + "English,Indonesian,Filipino": 0.10666666666666667, + "English,Indonesian,Chinese": 0.20666666666666667, + "English,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Filipino,Chinese": 0.12, + "Spanish,Indonesian,Filipino": 0.07333333333333333, + "Spanish,Indonesian,Chinese": 0.17333333333333334, + "Spanish,Filipino,Chinese": 0.16, + "Indonesian,Filipino,Chinese": 0.1 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.04, + "Malay,English,Vietnamese,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian": 0.08666666666666667, + "Malay,English,Spanish,Filipino": 0.08666666666666667, + "Malay,English,Spanish,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Indonesian,Chinese": 0.1, + "Malay,English,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Filipino": 0.06, + "Malay,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.04, + "Malay,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.1, + "Malay,Spanish,Filipino,Chinese": 0.1, + "Malay,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino": 0.04666666666666667, + "English,Spanish,Indonesian,Chinese": 0.14, + "English,Spanish,Filipino,Chinese": 0.12, + "English,Indonesian,Filipino,Chinese": 0.08, + "Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.04, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino": 0.04, + "Malay,English,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.08, + "Malay,English,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + } + }, + "AC3_2": 0.35361384656309086, + "AC3_3": 0.20823154276085787, + "AC3_4": 0.12470888225011811, + "AC3_5": 0.08195811252146089, + "AC3_6": 0.06120218577730002, + "AC3_7": 0.049777777765333334 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.32061688311688313, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.2840909090909091, + "Chinese": 0.39204545454545453, + "Indonesian": 0.2897727272727273, + "Filipino": 0.29545454545454547, + "Spanish": 0.30113636363636365, + "Malay": 0.2840909090909091 + }, + "consistency_score_2": 0.30167748917748916, + "consistency_score_3": 0.11152597402597401, + "consistency_score_4": 0.05000000000000001, + "consistency_score_5": 0.027056277056277053, + "consistency_score_6": 0.017045454545454544, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2727272727272727, + "English,Chinese": 0.375, + "English,Indonesian": 0.3352272727272727, + "English,Filipino": 0.23863636363636365, + "English,Spanish": 0.32386363636363635, + "English,Malay": 0.3125, + "Vietnamese,Chinese": 0.29545454545454547, + "Vietnamese,Indonesian": 0.2784090909090909, + "Vietnamese,Filipino": 0.3068181818181818, + "Vietnamese,Spanish": 0.2840909090909091, + "Vietnamese,Malay": 0.24431818181818182, + "Chinese,Indonesian": 0.3068181818181818, + "Chinese,Filipino": 0.23295454545454544, + "Chinese,Spanish": 0.39204545454545453, + "Chinese,Malay": 0.3522727272727273, + "Indonesian,Filipino": 0.24431818181818182, + "Indonesian,Spanish": 0.35795454545454547, + "Indonesian,Malay": 0.25, + "Filipino,Spanish": 0.2840909090909091, + "Filipino,Malay": 0.30113636363636365, + "Spanish,Malay": 0.3465909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.09659090909090909, + "English,Vietnamese,Indonesian": 0.10795454545454546, + "English,Vietnamese,Filipino": 0.0625, + "English,Vietnamese,Spanish": 0.09659090909090909, + "English,Vietnamese,Malay": 0.08522727272727272, + "English,Chinese,Indonesian": 0.1590909090909091, + "English,Chinese,Filipino": 0.07954545454545454, + "English,Chinese,Spanish": 0.17045454545454544, + "English,Chinese,Malay": 0.14772727272727273, + "English,Indonesian,Filipino": 0.09659090909090909, + "English,Indonesian,Spanish": 0.14204545454545456, + "English,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Spanish": 0.08522727272727272, + "English,Filipino,Malay": 0.10227272727272728, + "English,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian": 0.10227272727272728, + "Vietnamese,Chinese,Filipino": 0.09090909090909091, + "Vietnamese,Chinese,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,Malay": 0.0625, + "Vietnamese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino": 0.08522727272727272, + "Chinese,Indonesian,Spanish": 0.17045454545454544, + "Chinese,Indonesian,Malay": 0.11931818181818182, + "Chinese,Filipino,Spanish": 0.09659090909090909, + "Chinese,Filipino,Malay": 0.10227272727272728, + "Chinese,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish": 0.125, + "Indonesian,Filipino,Malay": 0.09090909090909091, + "Indonesian,Spanish,Malay": 0.13068181818181818, + "Filipino,Spanish,Malay": 0.125 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino": 0.045454545454545456, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.07386363636363637, + "English,Chinese,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Filipino,Malay": 0.045454545454545456, + "English,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.31085897792858946, + "AC3_3": 0.1654874520287968, + "AC3_4": 0.08650897938971112, + "AC3_5": 0.049901460392944, + "AC3_6": 0.032369973766637285, + "AC3_7": 0.021949322064959833 + }, + "prompt_2": { + "overall_acc": 0.2922077922077922, + "language_acc": { + "English": 0.3125, + "Vietnamese": 0.25, + "Chinese": 0.3522727272727273, + "Indonesian": 0.29545454545454547, + "Filipino": 0.26704545454545453, + "Spanish": 0.3068181818181818, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.26948051948051943, + "consistency_score_3": 0.08019480519480522, + "consistency_score_4": 0.0275974025974026, + "consistency_score_5": 0.011363636363636359, + "consistency_score_6": 0.006493506493506495, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.22727272727272727, + "English,Chinese": 0.32386363636363635, + "English,Indonesian": 0.3181818181818182, + "English,Filipino": 0.2840909090909091, + "English,Spanish": 0.29545454545454547, + "English,Malay": 0.3068181818181818, + "Vietnamese,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian": 0.2159090909090909, + "Vietnamese,Filipino": 0.25, + "Vietnamese,Spanish": 0.24431818181818182, + "Vietnamese,Malay": 0.26704545454545453, + "Chinese,Indonesian": 0.2556818181818182, + "Chinese,Filipino": 0.19886363636363635, + "Chinese,Spanish": 0.3409090909090909, + "Chinese,Malay": 0.26704545454545453, + "Indonesian,Filipino": 0.2727272727272727, + "Indonesian,Spanish": 0.2784090909090909, + "Indonesian,Malay": 0.2159090909090909, + "Filipino,Spanish": 0.26136363636363635, + "Filipino,Malay": 0.2784090909090909, + "Spanish,Malay": 0.3125 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.07386363636363637, + "English,Vietnamese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Filipino": 0.056818181818181816, + "English,Vietnamese,Spanish": 0.056818181818181816, + "English,Vietnamese,Malay": 0.06818181818181818, + "English,Chinese,Indonesian": 0.10227272727272728, + "English,Chinese,Filipino": 0.07954545454545454, + "English,Chinese,Spanish": 0.1534090909090909, + "English,Chinese,Malay": 0.10795454545454546, + "English,Indonesian,Filipino": 0.10227272727272728, + "English,Indonesian,Spanish": 0.11363636363636363, + "English,Indonesian,Malay": 0.09659090909090909, + "English,Filipino,Spanish": 0.07386363636363637, + "English,Filipino,Malay": 0.08522727272727272, + "English,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian": 0.056818181818181816, + "Vietnamese,Chinese,Filipino": 0.03977272727272727, + "Vietnamese,Chinese,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino": 0.0625, + "Vietnamese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Indonesian,Malay": 0.03977272727272727, + "Vietnamese,Filipino,Spanish": 0.0625, + "Vietnamese,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino": 0.07386363636363637, + "Chinese,Indonesian,Spanish": 0.09659090909090909, + "Chinese,Indonesian,Malay": 0.0625, + "Chinese,Filipino,Spanish": 0.08522727272727272, + "Chinese,Filipino,Malay": 0.056818181818181816, + "Chinese,Spanish,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish": 0.07386363636363637, + "Indonesian,Filipino,Malay": 0.08522727272727272, + "Indonesian,Spanish,Malay": 0.09090909090909091, + "Filipino,Spanish,Malay": 0.08522727272727272 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.028409090909090908, + "English,Vietnamese,Indonesian,Malay": 0.011363636363636364, + "English,Vietnamese,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish": 0.03409090909090909, + "English,Chinese,Filipino,Malay": 0.022727272727272728, + "English,Chinese,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Malay": 0.017045454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.028409090909090908, + "Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + } + }, + "AC3_2": 0.280384355478946, + "AC3_3": 0.12585060968778325, + "AC3_4": 0.050431801685067695, + "AC3_5": 0.02187651919551602, + "AC3_6": 0.01270468661347679, + "AC3_7": 0.011146891252141128 + }, + "prompt_3": { + "overall_acc": 0.3141233766233766, + "language_acc": { + "English": 0.3522727272727273, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.4034090909090909, + "Indonesian": 0.2784090909090909, + "Filipino": 0.23295454545454544, + "Spanish": 0.32386363636363635, + "Malay": 0.3465909090909091 + }, + "consistency_score_2": 0.288961038961039, + "consistency_score_3": 0.0961038961038961, + "consistency_score_4": 0.03733766233766233, + "consistency_score_5": 0.01758658008658008, + "consistency_score_6": 0.00974025974025974, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3068181818181818, + "English,Chinese": 0.3693181818181818, + "English,Indonesian": 0.30113636363636365, + "English,Filipino": 0.21022727272727273, + "English,Spanish": 0.3465909090909091, + "English,Malay": 0.25, + "Vietnamese,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian": 0.24431818181818182, + "Vietnamese,Filipino": 0.22727272727272727, + "Vietnamese,Spanish": 0.3125, + "Vietnamese,Malay": 0.26136363636363635, + "Chinese,Indonesian": 0.3181818181818182, + "Chinese,Filipino": 0.19886363636363635, + "Chinese,Spanish": 0.42613636363636365, + "Chinese,Malay": 0.3125, + "Indonesian,Filipino": 0.3068181818181818, + "Indonesian,Spanish": 0.26136363636363635, + "Indonesian,Malay": 0.32386363636363635, + "Filipino,Spanish": 0.2556818181818182, + "Filipino,Malay": 0.25, + "Spanish,Malay": 0.26704545454545453 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13636363636363635, + "English,Vietnamese,Indonesian": 0.07954545454545454, + "English,Vietnamese,Filipino": 0.0625, + "English,Vietnamese,Spanish": 0.14204545454545456, + "English,Vietnamese,Malay": 0.09659090909090909, + "English,Chinese,Indonesian": 0.125, + "English,Chinese,Filipino": 0.06818181818181818, + "English,Chinese,Spanish": 0.1875, + "English,Chinese,Malay": 0.09659090909090909, + "English,Indonesian,Filipino": 0.07386363636363637, + "English,Indonesian,Spanish": 0.09659090909090909, + "English,Indonesian,Malay": 0.07954545454545454, + "English,Filipino,Spanish": 0.06818181818181818, + "English,Filipino,Malay": 0.05113636363636364, + "English,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian": 0.07386363636363637, + "Vietnamese,Chinese,Filipino": 0.07386363636363637, + "Vietnamese,Chinese,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino": 0.0625, + "Vietnamese,Indonesian,Spanish": 0.10227272727272728, + "Vietnamese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Spanish,Malay": 0.10795454545454546, + "Chinese,Indonesian,Filipino": 0.056818181818181816, + "Chinese,Indonesian,Spanish": 0.13636363636363635, + "Chinese,Indonesian,Malay": 0.11931818181818182, + "Chinese,Filipino,Spanish": 0.08522727272727272, + "Chinese,Filipino,Malay": 0.07386363636363637, + "Chinese,Spanish,Malay": 0.14204545454545456, + "Indonesian,Filipino,Spanish": 0.07954545454545454, + "Indonesian,Filipino,Malay": 0.11931818181818182, + "Indonesian,Spanish,Malay": 0.10795454545454546, + "Filipino,Spanish,Malay": 0.07386363636363637 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.045454545454545456, + "English,Vietnamese,Indonesian,Malay": 0.022727272727272728, + "English,Vietnamese,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino": 0.022727272727272728, + "English,Chinese,Indonesian,Spanish": 0.06818181818181818, + "English,Chinese,Indonesian,Malay": 0.028409090909090908, + "English,Chinese,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Filipino,Malay": 0.022727272727272728, + "English,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Indonesian,Spanish,Malay": 0.0625, + "Vietnamese,Filipino,Spanish,Malay": 0.022727272727272728, + "Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.022727272727272728, + "English,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + } + }, + "AC3_2": 0.30101728683547624, + "AC3_3": 0.14717929474927402, + "AC3_4": 0.06674214928666977, + "AC3_5": 0.033308351507934895, + "AC3_6": 0.01889463918955694, + "AC3_7": 0.011161744343533364 + }, + "prompt_4": { + "overall_acc": 0.31493506493506496, + "language_acc": { + "English": 0.3465909090909091, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.42045454545454547, + "Indonesian": 0.32386363636363635, + "Filipino": 0.2159090909090909, + "Spanish": 0.32954545454545453, + "Malay": 0.23863636363636365 + }, + "consistency_score_2": 0.31114718614718617, + "consistency_score_3": 0.12305194805194808, + "consistency_score_4": 0.0599025974025974, + "consistency_score_5": 0.03382034632034632, + "consistency_score_6": 0.021915584415584412, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.26136363636363635, + "English,Chinese": 0.3806818181818182, + "English,Indonesian": 0.30113636363636365, + "English,Filipino": 0.26136363636363635, + "English,Spanish": 0.3977272727272727, + "English,Malay": 0.2784090909090909, + "Vietnamese,Chinese": 0.2840909090909091, + "Vietnamese,Indonesian": 0.3068181818181818, + "Vietnamese,Filipino": 0.32954545454545453, + "Vietnamese,Spanish": 0.30113636363636365, + "Vietnamese,Malay": 0.26704545454545453, + "Chinese,Indonesian": 0.38636363636363635, + "Chinese,Filipino": 0.3409090909090909, + "Chinese,Spanish": 0.4602272727272727, + "Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino": 0.2556818181818182, + "Indonesian,Spanish": 0.29545454545454547, + "Indonesian,Malay": 0.2897727272727273, + "Filipino,Spanish": 0.3352272727272727, + "Filipino,Malay": 0.2556818181818182, + "Spanish,Malay": 0.24431818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.125, + "English,Vietnamese,Indonesian": 0.10227272727272728, + "English,Vietnamese,Filipino": 0.08522727272727272, + "English,Vietnamese,Spanish": 0.13068181818181818, + "English,Vietnamese,Malay": 0.08522727272727272, + "English,Chinese,Indonesian": 0.17613636363636365, + "English,Chinese,Filipino": 0.14204545454545456, + "English,Chinese,Spanish": 0.23295454545454544, + "English,Chinese,Malay": 0.11931818181818182, + "English,Indonesian,Filipino": 0.10227272727272728, + "English,Indonesian,Spanish": 0.16477272727272727, + "English,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Spanish": 0.14204545454545456, + "English,Filipino,Malay": 0.06818181818181818, + "English,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "Vietnamese,Chinese,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Malay": 0.11363636363636363, + "Vietnamese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino": 0.14204545454545456, + "Chinese,Indonesian,Spanish": 0.1875, + "Chinese,Indonesian,Malay": 0.14204545454545456, + "Chinese,Filipino,Spanish": 0.19886363636363635, + "Chinese,Filipino,Malay": 0.11363636363636363, + "Chinese,Spanish,Malay": 0.14204545454545456, + "Indonesian,Filipino,Spanish": 0.125, + "Indonesian,Filipino,Malay": 0.06818181818181818, + "Indonesian,Spanish,Malay": 0.07386363636363637, + "Filipino,Spanish,Malay": 0.08522727272727272 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.03409090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Malay": 0.0625, + "English,Vietnamese,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Chinese,Indonesian,Spanish": 0.125, + "English,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish": 0.09659090909090909, + "English,Chinese,Filipino,Malay": 0.03977272727272727, + "English,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Indonesian,Spanish,Malay": 0.0625, + "English,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.028409090909090908, + "Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.3130296669122531, + "AC3_3": 0.1769612892262974, + "AC3_4": 0.10065919352969764, + "AC3_5": 0.0610812771227235, + "AC3_6": 0.040979502413120074, + "AC3_7": 0.03234052010583955 + }, + "prompt_5": { + "overall_acc": 0.30681818181818177, + "language_acc": { + "English": 0.3465909090909091, + "Vietnamese": 0.30113636363636365, + "Chinese": 0.42613636363636365, + "Indonesian": 0.22727272727272727, + "Filipino": 0.2784090909090909, + "Spanish": 0.3181818181818182, + "Malay": 0.25 + }, + "consistency_score_2": 0.29545454545454547, + "consistency_score_3": 0.10324675324675321, + "consistency_score_4": 0.04107142857142856, + "consistency_score_5": 0.01677489177489177, + "consistency_score_6": 0.005681818181818182, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2556818181818182, + "English,Chinese": 0.39204545454545453, + "English,Indonesian": 0.2784090909090909, + "English,Filipino": 0.23295454545454544, + "English,Spanish": 0.36363636363636365, + "English,Malay": 0.2784090909090909, + "Vietnamese,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian": 0.29545454545454547, + "Vietnamese,Filipino": 0.2840909090909091, + "Vietnamese,Spanish": 0.2897727272727273, + "Vietnamese,Malay": 0.30113636363636365, + "Chinese,Indonesian": 0.3125, + "Chinese,Filipino": 0.29545454545454547, + "Chinese,Spanish": 0.39204545454545453, + "Chinese,Malay": 0.32386363636363635, + "Indonesian,Filipino": 0.2215909090909091, + "Indonesian,Spanish": 0.29545454545454547, + "Indonesian,Malay": 0.2556818181818182, + "Filipino,Spanish": 0.26136363636363635, + "Filipino,Malay": 0.25, + "Spanish,Malay": 0.3068181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.125, + "English,Vietnamese,Indonesian": 0.07954545454545454, + "English,Vietnamese,Filipino": 0.0625, + "English,Vietnamese,Spanish": 0.11363636363636363, + "English,Vietnamese,Malay": 0.08522727272727272, + "English,Chinese,Indonesian": 0.13636363636363635, + "English,Chinese,Filipino": 0.09659090909090909, + "English,Chinese,Spanish": 0.19886363636363635, + "English,Chinese,Malay": 0.11363636363636363, + "English,Indonesian,Filipino": 0.06818181818181818, + "English,Indonesian,Spanish": 0.125, + "English,Indonesian,Malay": 0.10227272727272728, + "English,Filipino,Spanish": 0.09659090909090909, + "English,Filipino,Malay": 0.06818181818181818, + "English,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "Vietnamese,Chinese,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Malay": 0.07386363636363637, + "Vietnamese,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino": 0.09659090909090909, + "Chinese,Indonesian,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Malay": 0.10227272727272728, + "Chinese,Filipino,Spanish": 0.14204545454545456, + "Chinese,Filipino,Malay": 0.09090909090909091, + "Chinese,Spanish,Malay": 0.14204545454545456, + "Indonesian,Filipino,Spanish": 0.056818181818181816, + "Indonesian,Filipino,Malay": 0.045454545454545456, + "Indonesian,Spanish,Malay": 0.08522727272727272, + "Filipino,Spanish,Malay": 0.10227272727272728 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.017045454545454544, + "English,Vietnamese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Indonesian,Malay": 0.03409090909090909, + "English,Vietnamese,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Filipino,Spanish,Malay": 0.011363636363636364, + "Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.0625, + "Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Malay": 0.005681818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Filipino,Spanish,Malay": 0.017045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.005681818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.005681818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0 + } + }, + "AC3_2": 0.30102915946974335, + "AC3_3": 0.1545022672911931, + "AC3_4": 0.07244517030115433, + "AC3_5": 0.03181058071379376, + "AC3_6": 0.011157024789818183, + "AC3_7": 0.0 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4854368932038835 + }, + "prompt_2": { + "accuracy": 0.47572815533980584 + }, + "prompt_3": { + "accuracy": 0.36893203883495146 + }, + "prompt_4": { + "accuracy": 0.49514563106796117 + }, + "prompt_5": { + "accuracy": 0.4854368932038835 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "indommlu": { + "prompt_1": { + "accuracy": 0.5142857142857142 + }, + "prompt_2": { + "accuracy": 0.5523809523809524 + }, + "prompt_3": { + "accuracy": 0.45714285714285713 + }, + "prompt_4": { + "accuracy": 0.5333333333333333 + }, + "prompt_5": { + "accuracy": 0.44761904761904764 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.5046728971962616 + }, + "prompt_2": { + "accuracy": 0.5046728971962616 + }, + "prompt_3": { + "accuracy": 0.411214953271028 + }, + "prompt_4": { + "accuracy": 0.5420560747663551 + }, + "prompt_5": { + "accuracy": 0.6074766355140186 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.7, + "culture": 0.4, + "film": 0.2, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.27, + "category_acc": { + "brand": 0.1, + "demographics": 0.6, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.4, + "culture": 0.6, + "film": 0.1, + "law": 0.1, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.1, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.5, + "culture": 0.3, + "film": 0.1, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.6, + "biology": 0.3, + "history": 0.2, + "literature": 0.3, + "politics": 0.5, + "culture": 0.5, + "film": 0.3, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.3, + "history": 0.5333333333333333, + "literature": 0.3, + "politics": 0.5, + "culture": 0.4, + "film": 0.2, + "law": 0.2, + "geography": 0.3 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.05135955042137952 + }, + "prompt_2": { + "bleu_score": 0.04665386853067187 + }, + "prompt_3": { + "bleu_score": 0.06072396333900724 + }, + "prompt_4": { + "bleu_score": 0.055158709227604796 + }, + "prompt_5": { + "bleu_score": 0.03028144411690789 + } + }, + "indommlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, @@ -6820,179 +58399,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21576065630334945 + }, + "prompt_2": { + "bleu_score": 0.22746933355547022 + }, + "prompt_3": { + "bleu_score": 0.21846526865682628 + }, + "prompt_4": { + "bleu_score": 0.2180025571294565 + }, + "prompt_5": { + "bleu_score": 0.20775291109258173 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0955705213226969 + }, + "prompt_2": { + "bleu_score": 0.09752255730017337 + }, + "prompt_3": { + "bleu_score": 0.09568938237063238 + }, + "prompt_4": { + "bleu_score": 0.09408791685712983 + }, + "prompt_5": { + "bleu_score": 0.09055053682770213 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.19297114800274695 + }, + "prompt_2": { + "bleu_score": 0.2015714843133947 + }, + "prompt_3": { + "bleu_score": 0.1931282154783705 + }, + "prompt_4": { + "bleu_score": 0.18726947040130554 + }, + "prompt_5": { + "bleu_score": 0.1988242911758256 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.18703785852491603 + }, + "prompt_2": { + "bleu_score": 0.19388537459962543 + }, + "prompt_3": { + "bleu_score": 0.18419666481731237 + }, + "prompt_4": { + "bleu_score": 0.1907756253576178 + }, + "prompt_5": { + "bleu_score": 0.17865821281278932 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4329054842473746 + }, + "prompt_2": { + "accuracy": 0.4364060676779463 + }, + "prompt_3": { + "accuracy": 0.4130688448074679 + }, + "prompt_4": { + "accuracy": 0.441073512252042 + }, + "prompt_5": { + "accuracy": 0.43173862310385064 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.440757954951734, + "category_acc": { + "high_school_european_history": 0.5670731707317073, + "business_ethics": 0.47474747474747475, + "clinical_knowledge": 0.4810606060606061, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.458128078817734, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.4788135593220339, + "virology": 0.43636363636363634, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.4692556634304207, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.2775800711743772, + "philosophy": 0.4806451612903226, + "professional_medicine": 0.5018450184501845, + "nutrition": 0.5147540983606558, + "global_facts": 0.25252525252525254, + "machine_learning": 0.2972972972972973, + "security_studies": 0.48360655737704916, + "public_relations": 0.42201834862385323, + "professional_psychology": 0.4402618657937807, + "prehistory": 0.49226006191950467, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.5384615384615384, + "college_medicine": 0.38953488372093026, + "high_school_government_and_politics": 0.640625, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5370370370370371, + "high_school_geography": 0.5736040609137056, + "elementary_mathematics": 0.259946949602122, + "human_aging": 0.42342342342342343, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.6691176470588235, + "formal_logic": 0.296, + "high_school_statistics": 0.3813953488372093, + "international_law": 0.6416666666666667, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3418803418803419, + "miscellaneous": 0.6739130434782609, + "high_school_chemistry": 0.40594059405940597, + "marketing": 0.5622317596566524, + "professional_law": 0.33659491193737767, + "management": 0.5784313725490197, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.6411764705882353, + "sociology": 0.685, + "us_foreign_policy": 0.6868686868686869, + "high_school_macroeconomics": 0.43444730077120824, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.26733780760626397, + "moral_disputes": 0.4811594202898551, + "electrical_engineering": 0.3680555555555556, + "astronomy": 0.44370860927152317, + "college_biology": 0.5034965034965035 + } + }, + "prompt_2": { + "accuracy": 0.42745799070432605, + "category_acc": { + "high_school_european_history": 0.5426829268292683, + "business_ethics": 0.47474747474747475, + "clinical_knowledge": 0.4621212121212121, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.5073891625615764, + "high_school_physics": 0.24666666666666667, + "high_school_world_history": 0.4830508474576271, + "virology": 0.42424242424242425, + "high_school_microeconomics": 0.37130801687763715, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.4401294498381877, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3238434163701068, + "philosophy": 0.4645161290322581, + "professional_medicine": 0.2915129151291513, + "nutrition": 0.4885245901639344, + "global_facts": 0.2727272727272727, + "machine_learning": 0.23423423423423423, + "security_studies": 0.3729508196721312, + "public_relations": 0.42201834862385323, + "professional_psychology": 0.41734860883797054, + "prehistory": 0.5263157894736842, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.5461538461538461, + "college_medicine": 0.38372093023255816, + "high_school_government_and_politics": 0.6041666666666666, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.5061728395061729, + "high_school_geography": 0.5685279187817259, + "elementary_mathematics": 0.29708222811671087, + "human_aging": 0.47297297297297297, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.6213235294117647, + "formal_logic": 0.36, + "high_school_statistics": 0.2558139534883721, + "international_law": 0.6416666666666667, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.6675191815856778, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.5622317596566524, + "professional_law": 0.34442270058708413, + "management": 0.6176470588235294, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.5046728971962616, + "world_religions": 0.6705882352941176, + "sociology": 0.595, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.41131105398457585, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.5101449275362319, + "electrical_engineering": 0.3333333333333333, + "astronomy": 0.47019867549668876, + "college_biology": 0.45454545454545453 + } + }, + "prompt_3": { + "accuracy": 0.41637468716481946, + "category_acc": { + "high_school_european_history": 0.4329268292682927, + "business_ethics": 0.5151515151515151, + "clinical_knowledge": 0.4621212121212121, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.47783251231527096, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.4830508474576271, + "virology": 0.4303030303030303, + "high_school_microeconomics": 0.3628691983122363, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.47249190938511326, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3202846975088968, + "philosophy": 0.4032258064516129, + "professional_medicine": 0.3800738007380074, + "nutrition": 0.4557377049180328, + "global_facts": 0.2828282828282828, + "machine_learning": 0.32432432432432434, + "security_studies": 0.46311475409836067, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.38461538461538464, + "prehistory": 0.5139318885448917, + "anatomy": 0.417910447761194, + "human_sexuality": 0.4076923076923077, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.625, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.49382716049382713, + "high_school_geography": 0.5634517766497462, + "elementary_mathematics": 0.33421750663129973, + "human_aging": 0.47297297297297297, + "college_mathematics": 0.21212121212121213, + "high_school_psychology": 0.5919117647058824, + "formal_logic": 0.328, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.45, + "high_school_mathematics": 0.1970260223048327, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.34615384615384615, + "miscellaneous": 0.670076726342711, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.6351931330472103, + "professional_law": 0.294194390084801, + "management": 0.5686274509803921, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.4392523364485981, + "world_religions": 0.6529411764705882, + "sociology": 0.565, + "us_foreign_policy": 0.5959595959595959, + "high_school_macroeconomics": 0.38303341902313626, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.4115942028985507, + "electrical_engineering": 0.3888888888888889, + "astronomy": 0.46357615894039733, + "college_biology": 0.48951048951048953 + } + }, + "prompt_4": { + "accuracy": 0.41580264569181263, + "category_acc": { + "high_school_european_history": 0.45121951219512196, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.4659090909090909, + "medical_genetics": 0.43434343434343436, + "high_school_us_history": 0.4827586206896552, + "high_school_physics": 0.34, + "high_school_world_history": 0.4661016949152542, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.35443037974683544, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.4563106796116505, + "abstract_algebra": 0.15151515151515152, + "professional_accounting": 0.33451957295373663, + "philosophy": 0.4258064516129032, + "professional_medicine": 0.30996309963099633, + "nutrition": 0.42295081967213116, + "global_facts": 0.24242424242424243, + "machine_learning": 0.3063063063063063, + "security_studies": 0.45081967213114754, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.3993453355155483, + "prehistory": 0.4891640866873065, + "anatomy": 0.417910447761194, + "human_sexuality": 0.4846153846153846, + "college_medicine": 0.45930232558139533, + "high_school_government_and_politics": 0.640625, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.6142131979695431, + "elementary_mathematics": 0.259946949602122, + "human_aging": 0.4369369369369369, + "college_mathematics": 0.20202020202020202, + "high_school_psychology": 0.5753676470588235, + "formal_logic": 0.344, + "high_school_statistics": 0.3395348837209302, + "international_law": 0.525, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.43434343434343436, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.680306905370844, + "high_school_chemistry": 0.3811881188118812, + "marketing": 0.6008583690987125, + "professional_law": 0.30071754729288974, + "management": 0.6372549019607843, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.514018691588785, + "world_religions": 0.6470588235294118, + "sociology": 0.575, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.4087403598971722, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.2539149888143177, + "moral_disputes": 0.43478260869565216, + "electrical_engineering": 0.3402777777777778, + "astronomy": 0.4966887417218543, + "college_biology": 0.4125874125874126 + } + }, + "prompt_5": { + "accuracy": 0.4155166249553093, + "category_acc": { + "high_school_european_history": 0.4573170731707317, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.4810606060606061, + "medical_genetics": 0.42424242424242425, + "high_school_us_history": 0.4088669950738916, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.4661016949152542, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3755274261603376, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.46601941747572817, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.302491103202847, + "philosophy": 0.4290322580645161, + "professional_medicine": 0.3505535055350554, + "nutrition": 0.4360655737704918, + "global_facts": 0.30303030303030304, + "machine_learning": 0.27927927927927926, + "security_studies": 0.48360655737704916, + "public_relations": 0.4954128440366973, + "professional_psychology": 0.37643207855973815, + "prehistory": 0.5046439628482973, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.47692307692307695, + "college_medicine": 0.3953488372093023, + "high_school_government_and_politics": 0.6145833333333334, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.5432098765432098, + "high_school_geography": 0.5888324873096447, + "elementary_mathematics": 0.3183023872679045, + "human_aging": 0.4774774774774775, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.5974264705882353, + "formal_logic": 0.4, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.5, + "high_school_mathematics": 0.26394052044609667, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3418803418803419, + "miscellaneous": 0.6611253196930946, + "high_school_chemistry": 0.3069306930693069, + "marketing": 0.6523605150214592, + "professional_law": 0.2948467058056099, + "management": 0.6176470588235294, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.3925233644859813, + "world_religions": 0.6470588235294118, + "sociology": 0.6, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.3496143958868895, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.4057971014492754, + "electrical_engineering": 0.3611111111111111, + "astronomy": 0.45695364238410596, + "college_biology": 0.4195804195804196 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4606240713224368 + }, + "prompt_2": { + "accuracy": 0.4866270430906389 + }, + "prompt_3": { + "accuracy": 0.475482912332838 + }, + "prompt_4": { + "accuracy": 0.4784546805349183 + }, + "prompt_5": { + "accuracy": 0.4888558692421991 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48567870485678705, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.5, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.8333333333333334, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.38333333333333336, + "business_administration": 0.42105263157894735, + "marxism": 0.7083333333333334, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.4444444444444444, + "law": 0.5172413793103449, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.6176470588235294, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.64, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.38461538461538464, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.5, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.46296296296296297, + "physician": 0.48148148148148145 + } + }, + "prompt_2": { + "accuracy": 0.47696139476961397, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.4523809523809524, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.7083333333333334, + "middle_school_chemistry": 0.6, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.45, + "business_administration": 0.3684210526315789, + "marxism": 0.6666666666666666, + "mao_zedong_thought": 0.7931034482758621, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.7346938775510204, + "high_school_politics": 0.5, + "high_school_geography": 0.5, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.8235294117647058, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4444444444444444, + "law": 0.5172413793103449, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6842105263157895, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.6, + "middle_school_history": 0.7407407407407407, + "civil_servant": 0.34615384615384615, + "sports_science": 0.375, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.5, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.5555555555555556, + "tax_accountant": 0.3888888888888889, + "physician": 0.5185185185185185 + } + }, + "prompt_3": { + "accuracy": 0.475093399750934, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.42857142857142855, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.375, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.4166666666666667, + "middle_school_biology": 0.7307692307692307, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.31666666666666665, + "business_administration": 0.42105263157894735, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.7241379310344828, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.625, + "high_school_geography": 0.5, + "middle_school_politics": 0.7307692307692307, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.4074074074074074, + "law": 0.4827586206896552, + "chinese_language_and_literature": 0.6071428571428571, + "art_studies": 0.6842105263157895, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.52, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.4423076923076923, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.5294117647058824, + "accountant": 0.5185185185185185, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.3888888888888889, + "physician": 0.48148148148148145 + } + }, + "prompt_4": { + "accuracy": 0.4775840597758406, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.4523809523809524, + "college_physics": 0.125, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.5416666666666666, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.75, + "middle_school_chemistry": 0.68, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.4, + "business_administration": 0.39473684210526316, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.37037037037037035, + "law": 0.5862068965517241, + "chinese_language_and_literature": 0.6785714285714286, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.25, + "high_school_history": 0.6, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.36538461538461536, + "sports_science": 0.4583333333333333, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.48148148148148145, + "physician": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.4850560398505604, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.5714285714285714, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.043478260869565216, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.68, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.43333333333333335, + "business_administration": 0.39473684210526316, + "marxism": 0.75, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.6176470588235294, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.6666666666666666, + "logic": 0.5185185185185185, + "law": 0.5172413793103449, + "chinese_language_and_literature": 0.5714285714285714, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.6, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.38461538461538464, + "sports_science": 0.5416666666666666, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.4444444444444444, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.5277777777777778, + "tax_accountant": 0.3333333333333333, + "physician": 0.4074074074074074 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4731182795698925 + }, + "prompt_2": { + "accuracy": 0.5089605734767025 + }, + "prompt_3": { + "accuracy": 0.4982078853046595 + }, + "prompt_4": { + "accuracy": 0.4982078853046595 + }, + "prompt_5": { + "accuracy": 0.5376344086021505 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5018131583491625, + "category_acc": { + "agronomy": 0.44970414201183434, + "anatomy": 0.3918918918918919, + "ancient_chinese": 0.3170731707317073, + "arts": 0.8125, + "astronomy": 0.3939393939393939, + "business_ethics": 0.49760765550239233, + "chinese_civil_service_exam": 0.475, + "chinese_driving_rule": 0.6793893129770993, + "chinese_food_culture": 0.5514705882352942, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.5139318885448917, + "chinese_literature": 0.5637254901960784, + "chinese_teacher_qualification": 0.6312849162011173, + "clinical_knowledge": 0.42616033755274263, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.5420560747663551, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.42592592592592593, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.4542124542124542, + "computer_science": 0.5637254901960784, + "computer_security": 0.5906432748538012, + "conceptual_physics": 0.4897959183673469, + "construction_project_management": 0.381294964028777, + "economics": 0.5220125786163522, + "education": 0.5153374233128835, + "electrical_engineering": 0.5116279069767442, + "elementary_chinese": 0.5357142857142857, + "elementary_commonsense": 0.5404040404040404, + "elementary_information_and_technology": 0.6764705882352942, + "elementary_mathematics": 0.3565217391304348, + "ethnology": 0.5851851851851851, + "food_science": 0.5314685314685315, + "genetics": 0.4318181818181818, + "global_facts": 0.5503355704697986, + "high_school_biology": 0.4556213017751479, + "high_school_chemistry": 0.4090909090909091, + "high_school_geography": 0.5677966101694916, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.39090909090909093, + "high_school_politics": 0.38461538461538464, + "human_sexuality": 0.5, + "international_law": 0.42162162162162165, + "journalism": 0.5232558139534884, + "jurisprudence": 0.46715328467153283, + "legal_and_moral_basis": 0.7850467289719626, + "logical": 0.4634146341463415, + "machine_learning": 0.48360655737704916, + "management": 0.580952380952381, + "marketing": 0.5611111111111111, + "marxist_theory": 0.544973544973545, + "modern_chinese": 0.4224137931034483, + "nutrition": 0.5172413793103449, + "philosophy": 0.6285714285714286, + "professional_accounting": 0.5428571428571428, + "professional_law": 0.4028436018957346, + "professional_medicine": 0.40425531914893614, + "professional_psychology": 0.5818965517241379, + "public_relations": 0.5114942528735632, + "security_study": 0.6370370370370371, + "sociology": 0.5619469026548672, + "sports_science": 0.4909090909090909, + "traditional_chinese_medicine": 0.5081081081081081, + "virology": 0.4911242603550296, + "world_history": 0.4658385093167702, + "world_religions": 0.68125 + } + }, + "prompt_2": { + "accuracy": 0.49766879640821965, + "category_acc": { + "agronomy": 0.4556213017751479, + "anatomy": 0.3783783783783784, + "ancient_chinese": 0.29878048780487804, + "arts": 0.80625, + "astronomy": 0.3575757575757576, + "business_ethics": 0.49282296650717705, + "chinese_civil_service_exam": 0.425, + "chinese_driving_rule": 0.7022900763358778, + "chinese_food_culture": 0.5588235294117647, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.49226006191950467, + "chinese_literature": 0.5637254901960784, + "chinese_teacher_qualification": 0.6703910614525139, + "clinical_knowledge": 0.4810126582278481, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.5794392523364486, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.4166666666666667, + "college_mathematics": 0.2, + "college_medical_statistics": 0.42452830188679247, + "college_medicine": 0.45787545787545786, + "computer_science": 0.5784313725490197, + "computer_security": 0.5847953216374269, + "conceptual_physics": 0.46258503401360546, + "construction_project_management": 0.37410071942446044, + "economics": 0.5345911949685535, + "education": 0.558282208588957, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.5634920634920635, + "elementary_commonsense": 0.5606060606060606, + "elementary_information_and_technology": 0.6932773109243697, + "elementary_mathematics": 0.34782608695652173, + "ethnology": 0.5333333333333333, + "food_science": 0.5244755244755245, + "genetics": 0.4318181818181818, + "global_facts": 0.5436241610738255, + "high_school_biology": 0.42011834319526625, + "high_school_chemistry": 0.3787878787878788, + "high_school_geography": 0.559322033898305, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.38181818181818183, + "high_school_politics": 0.45454545454545453, + "human_sexuality": 0.5, + "international_law": 0.4, + "journalism": 0.5581395348837209, + "jurisprudence": 0.4744525547445255, + "legal_and_moral_basis": 0.7663551401869159, + "logical": 0.4634146341463415, + "machine_learning": 0.4098360655737705, + "management": 0.5761904761904761, + "marketing": 0.55, + "marxist_theory": 0.5661375661375662, + "modern_chinese": 0.3793103448275862, + "nutrition": 0.496551724137931, + "philosophy": 0.6, + "professional_accounting": 0.56, + "professional_law": 0.4265402843601896, + "professional_medicine": 0.39361702127659576, + "professional_psychology": 0.5732758620689655, + "public_relations": 0.4827586206896552, + "security_study": 0.6074074074074074, + "sociology": 0.5663716814159292, + "sports_science": 0.45454545454545453, + "traditional_chinese_medicine": 0.43783783783783786, + "virology": 0.4970414201183432, + "world_history": 0.45962732919254656, + "world_religions": 0.6625 + } + }, + "prompt_3": { + "accuracy": 0.48963909514764287, + "category_acc": { + "agronomy": 0.47928994082840237, + "anatomy": 0.3716216216216216, + "ancient_chinese": 0.2865853658536585, + "arts": 0.80625, + "astronomy": 0.3939393939393939, + "business_ethics": 0.48325358851674644, + "chinese_civil_service_exam": 0.45, + "chinese_driving_rule": 0.6870229007633588, + "chinese_food_culture": 0.5661764705882353, + "chinese_foreign_policy": 0.411214953271028, + "chinese_history": 0.43653250773993807, + "chinese_literature": 0.5245098039215687, + "chinese_teacher_qualification": 0.6368715083798883, + "clinical_knowledge": 0.39662447257383965, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.5607476635514018, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.4166666666666667, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.4358974358974359, + "computer_science": 0.5882352941176471, + "computer_security": 0.5614035087719298, + "conceptual_physics": 0.46938775510204084, + "construction_project_management": 0.38848920863309355, + "economics": 0.5345911949685535, + "education": 0.5705521472392638, + "electrical_engineering": 0.45930232558139533, + "elementary_chinese": 0.5238095238095238, + "elementary_commonsense": 0.5252525252525253, + "elementary_information_and_technology": 0.6554621848739496, + "elementary_mathematics": 0.3869565217391304, + "ethnology": 0.5703703703703704, + "food_science": 0.5244755244755245, + "genetics": 0.4375, + "global_facts": 0.5570469798657718, + "high_school_biology": 0.40236686390532544, + "high_school_chemistry": 0.3712121212121212, + "high_school_geography": 0.559322033898305, + "high_school_mathematics": 0.3231707317073171, + "high_school_physics": 0.4, + "high_school_politics": 0.42657342657342656, + "human_sexuality": 0.4603174603174603, + "international_law": 0.4, + "journalism": 0.5465116279069767, + "jurisprudence": 0.46472019464720193, + "legal_and_moral_basis": 0.7757009345794392, + "logical": 0.4715447154471545, + "machine_learning": 0.4262295081967213, + "management": 0.6, + "marketing": 0.55, + "marxist_theory": 0.5238095238095238, + "modern_chinese": 0.39655172413793105, + "nutrition": 0.503448275862069, + "philosophy": 0.580952380952381, + "professional_accounting": 0.56, + "professional_law": 0.3696682464454976, + "professional_medicine": 0.39361702127659576, + "professional_psychology": 0.5560344827586207, + "public_relations": 0.5114942528735632, + "security_study": 0.6148148148148148, + "sociology": 0.5398230088495575, + "sports_science": 0.4727272727272727, + "traditional_chinese_medicine": 0.4918918918918919, + "virology": 0.48520710059171596, + "world_history": 0.45962732919254656, + "world_religions": 0.64375 + } + }, + "prompt_4": { + "accuracy": 0.5012951131065446, + "category_acc": { + "agronomy": 0.47337278106508873, + "anatomy": 0.3918918918918919, + "ancient_chinese": 0.35365853658536583, + "arts": 0.80625, + "astronomy": 0.34545454545454546, + "business_ethics": 0.49282296650717705, + "chinese_civil_service_exam": 0.4875, + "chinese_driving_rule": 0.6259541984732825, + "chinese_food_culture": 0.5735294117647058, + "chinese_foreign_policy": 0.5233644859813084, + "chinese_history": 0.5913312693498453, + "chinese_literature": 0.5392156862745098, + "chinese_teacher_qualification": 0.6424581005586593, + "clinical_knowledge": 0.4767932489451477, + "college_actuarial_science": 0.3018867924528302, + "college_education": 0.5794392523364486, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.5, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.45787545787545786, + "computer_science": 0.5392156862745098, + "computer_security": 0.5906432748538012, + "conceptual_physics": 0.54421768707483, + "construction_project_management": 0.41007194244604317, + "economics": 0.559748427672956, + "education": 0.5214723926380368, + "electrical_engineering": 0.4883720930232558, + "elementary_chinese": 0.5357142857142857, + "elementary_commonsense": 0.5353535353535354, + "elementary_information_and_technology": 0.6638655462184874, + "elementary_mathematics": 0.3391304347826087, + "ethnology": 0.5555555555555556, + "food_science": 0.4755244755244755, + "genetics": 0.3977272727272727, + "global_facts": 0.5369127516778524, + "high_school_biology": 0.378698224852071, + "high_school_chemistry": 0.4015151515151515, + "high_school_geography": 0.5338983050847458, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.4, + "high_school_politics": 0.4965034965034965, + "human_sexuality": 0.5079365079365079, + "international_law": 0.42162162162162165, + "journalism": 0.5348837209302325, + "jurisprudence": 0.49635036496350365, + "legal_and_moral_basis": 0.7990654205607477, + "logical": 0.5121951219512195, + "machine_learning": 0.4426229508196721, + "management": 0.5857142857142857, + "marketing": 0.55, + "marxist_theory": 0.5555555555555556, + "modern_chinese": 0.4051724137931034, + "nutrition": 0.5241379310344828, + "philosophy": 0.6285714285714286, + "professional_accounting": 0.5257142857142857, + "professional_law": 0.3791469194312796, + "professional_medicine": 0.3829787234042553, + "professional_psychology": 0.5818965517241379, + "public_relations": 0.4827586206896552, + "security_study": 0.6222222222222222, + "sociology": 0.5707964601769911, + "sports_science": 0.44242424242424244, + "traditional_chinese_medicine": 0.43783783783783786, + "virology": 0.48520710059171596, + "world_history": 0.4968944099378882, + "world_religions": 0.64375 + } + }, + "prompt_5": { + "accuracy": 0.516663788637541, + "category_acc": { + "agronomy": 0.44970414201183434, + "anatomy": 0.4189189189189189, + "ancient_chinese": 0.3048780487804878, + "arts": 0.80625, + "astronomy": 0.37575757575757573, + "business_ethics": 0.5454545454545454, + "chinese_civil_service_exam": 0.4625, + "chinese_driving_rule": 0.6946564885496184, + "chinese_food_culture": 0.5514705882352942, + "chinese_foreign_policy": 0.5514018691588785, + "chinese_history": 0.6346749226006192, + "chinese_literature": 0.5343137254901961, + "chinese_teacher_qualification": 0.7206703910614525, + "clinical_knowledge": 0.45147679324894513, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.6635514018691588, + "college_engineering_hydrology": 0.42452830188679247, + "college_law": 0.4444444444444444, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.4725274725274725, + "computer_science": 0.5294117647058824, + "computer_security": 0.5847953216374269, + "conceptual_physics": 0.5306122448979592, + "construction_project_management": 0.3669064748201439, + "economics": 0.559748427672956, + "education": 0.5705521472392638, + "electrical_engineering": 0.46511627906976744, + "elementary_chinese": 0.5396825396825397, + "elementary_commonsense": 0.5454545454545454, + "elementary_information_and_technology": 0.680672268907563, + "elementary_mathematics": 0.3695652173913043, + "ethnology": 0.562962962962963, + "food_science": 0.5174825174825175, + "genetics": 0.4602272727272727, + "global_facts": 0.5503355704697986, + "high_school_biology": 0.38461538461538464, + "high_school_chemistry": 0.3787878787878788, + "high_school_geography": 0.559322033898305, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.42727272727272725, + "high_school_politics": 0.48951048951048953, + "human_sexuality": 0.47619047619047616, + "international_law": 0.4594594594594595, + "journalism": 0.5872093023255814, + "jurisprudence": 0.49148418491484186, + "legal_and_moral_basis": 0.822429906542056, + "logical": 0.5121951219512195, + "machine_learning": 0.4098360655737705, + "management": 0.6095238095238096, + "marketing": 0.5833333333333334, + "marxist_theory": 0.6190476190476191, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.5517241379310345, + "philosophy": 0.6857142857142857, + "professional_accounting": 0.5657142857142857, + "professional_law": 0.3412322274881517, + "professional_medicine": 0.42819148936170215, + "professional_psychology": 0.5905172413793104, + "public_relations": 0.5172413793103449, + "security_study": 0.6148148148148148, + "sociology": 0.5796460176991151, + "sports_science": 0.47878787878787876, + "traditional_chinese_medicine": 0.518918918918919, + "virology": 0.5502958579881657, + "world_history": 0.5403726708074534, + "world_religions": 0.675 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3939393939393939 + }, + "prompt_2": { + "accuracy": 0.3333333333333333 + }, + "prompt_3": { + "accuracy": 0.3333333333333333 + }, + "prompt_4": { + "accuracy": 0.2727272727272727 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2818181818181818 + }, + "prompt_2": { + "accuracy": 0.26136363636363635 + }, + "prompt_3": { + "accuracy": 0.27954545454545454 + }, + "prompt_4": { + "accuracy": 0.4431818181818182 + }, + "prompt_5": { + "accuracy": 0.33636363636363636 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36033898305084744 + }, + "prompt_2": { + "accuracy": 0.36474576271186443 + }, + "prompt_3": { + "accuracy": 0.3566101694915254 + }, + "prompt_4": { + "accuracy": 0.3983050847457627 + }, + "prompt_5": { + "accuracy": 0.4010169491525424 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7643979057591623 + }, + "prompt_2": { + "accuracy": 0.7744951383694839 + }, + "prompt_3": { + "accuracy": 0.7572924457741211 + }, + "prompt_4": { + "accuracy": 0.7756170531039641 + }, + "prompt_5": { + "accuracy": 0.7288706058339566 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6771190592846644 + }, + "prompt_2": { + "accuracy": 0.6153846153846154 + }, + "prompt_3": { + "accuracy": 0.6678098971092602 + }, + "prompt_4": { + "accuracy": 0.6918177364037237 + }, + "prompt_5": { + "accuracy": 0.7172954434100931 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2206559248918071, + "rouge2": 0.04547037262802864, + "rougeL": 0.1568201123649577, + "avg_rouge": 0.14098213662826448 + }, + "prompt_2": { + "rouge1": 0.24378232983556597, + "rouge2": 0.04816210007059821, + "rougeL": 0.17867042180005663, + "avg_rouge": 0.15687161723540696 + }, + "prompt_3": { + "rouge1": 0.23515422819186563, + "rouge2": 0.051553558422738864, + "rougeL": 0.17124721329427042, + "avg_rouge": 0.15265166663629162 + }, + "prompt_4": { + "rouge1": 0.22205046039228635, + "rouge2": 0.04397942764328708, + "rougeL": 0.16002346447218707, + "avg_rouge": 0.1420177841692535 + }, + "prompt_5": { + "rouge1": 0.22493500568041366, + "rouge2": 0.03933105101795148, + "rougeL": 0.16316773894068273, + "avg_rouge": 0.1424779318796826 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.12907960040837113, + "rouge2": 0.02193595745447887, + "rougeL": 0.09446148261736474, + "avg_rouge": 0.08182568016007158 + }, + "prompt_2": { + "rouge1": 0.12884313556929558, + "rouge2": 0.019011700278715567, + "rougeL": 0.093859867646741, + "avg_rouge": 0.08057156783158405 + }, + "prompt_3": { + "rouge1": 0.1451642384740487, + "rouge2": 0.031300189339175936, + "rougeL": 0.10688632332588778, + "avg_rouge": 0.09445025037970413 + }, + "prompt_4": { + "rouge1": 0.1337579656954374, + "rouge2": 0.026666881579080894, + "rougeL": 0.09933145764124457, + "avg_rouge": 0.08658543497192095 + }, + "prompt_5": { + "rouge1": 0.11948269870432729, + "rouge2": 0.012834806504803413, + "rougeL": 0.0899854488250709, + "avg_rouge": 0.07410098467806721 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.783256880733945 + }, + "prompt_2": { + "accuracy": 0.676605504587156 + }, + "prompt_3": { + "accuracy": 0.7557339449541285 + }, + "prompt_4": { + "accuracy": 0.7786697247706422 + }, + "prompt_5": { + "accuracy": 0.7672018348623854 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.538830297219559 + }, + "prompt_2": { + "accuracy": 0.5062320230105465 + }, + "prompt_3": { + "accuracy": 0.5455417066155321 + }, + "prompt_4": { + "accuracy": 0.5560882070949185 + }, + "prompt_5": { + "accuracy": 0.6912751677852349 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4935 + }, + "prompt_2": { + "accuracy": 0.494 + }, + "prompt_3": { + "accuracy": 0.4945 + }, + "prompt_4": { + "accuracy": 0.5245 + }, + "prompt_5": { + "accuracy": 0.531 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.439 + }, + "prompt_2": { + "accuracy": 0.337 + }, + "prompt_3": { + "accuracy": 0.3545 + }, + "prompt_4": { + "accuracy": 0.36 + }, + "prompt_5": { + "accuracy": 0.3675 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.569 + }, + "prompt_2": { + "accuracy": 0.5085 + }, + "prompt_3": { + "accuracy": 0.4985 + }, + "prompt_4": { + "accuracy": 0.545 + }, + "prompt_5": { + "accuracy": 0.5495 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4507042253521127 + }, + "prompt_2": { + "accuracy": 0.5211267605633803 + }, + "prompt_3": { + "accuracy": 0.5352112676056338 + }, + "prompt_4": { + "accuracy": 0.4647887323943662 + }, + "prompt_5": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5451263537906137 + }, + "prompt_2": { + "accuracy": 0.5342960288808665 + }, + "prompt_3": { + "accuracy": 0.5018050541516246 + }, + "prompt_4": { + "accuracy": 0.5631768953068592 + }, + "prompt_5": { + "accuracy": 0.5379061371841155 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4803921568627451 + }, + "prompt_2": { + "accuracy": 0.47794117647058826 + }, + "prompt_3": { + "accuracy": 0.5049019607843137 + }, + "prompt_4": { + "accuracy": 0.4950980392156863 + }, + "prompt_5": { + "accuracy": 0.5024509803921569 + } } }, "five_shot": { @@ -7102,53 +59871,1733 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-7B-Base", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4438095238095238, + "language_acc": { + "Malay": 0.34, + "English": 0.5533333333333333, + "Vietnamese": 0.36666666666666664, + "Spanish": 0.5333333333333333, + "Indonesian": 0.38, + "Filipino": 0.37333333333333335, + "Chinese": 0.56 + }, + "consistency_score_2": 0.4847619047619047, + "consistency_score_3": 0.29390476190476195, + "consistency_score_4": 0.19828571428571426, + "consistency_score_5": 0.14634920634920634, + "consistency_score_6": 0.11714285714285713, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.47333333333333333, + "Malay,Vietnamese": 0.41333333333333333, + "Malay,Spanish": 0.44666666666666666, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Filipino": 0.49333333333333335, + "Malay,Chinese": 0.4666666666666667, + "English,Vietnamese": 0.41333333333333333, + "English,Spanish": 0.6466666666666666, + "English,Indonesian": 0.49333333333333335, + "English,Filipino": 0.44, + "English,Chinese": 0.6533333333333333, + "Vietnamese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Filipino": 0.44, + "Vietnamese,Chinese": 0.42, + "Spanish,Indonesian": 0.4866666666666667, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.58, + "Indonesian,Filipino": 0.4866666666666667, + "Indonesian,Chinese": 0.48, + "Filipino,Chinese": 0.42 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26, + "Malay,English,Spanish": 0.3333333333333333, + "Malay,English,Indonesian": 0.31333333333333335, + "Malay,English,Filipino": 0.26, + "Malay,English,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.32, + "Malay,Vietnamese,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian": 0.30666666666666664, + "Malay,Spanish,Filipino": 0.23333333333333334, + "Malay,Spanish,Chinese": 0.30666666666666664, + "Malay,Indonesian,Filipino": 0.34, + "Malay,Indonesian,Chinese": 0.31333333333333335, + "Malay,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish": 0.32, + "English,Vietnamese,Indonesian": 0.2866666666666667, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.30666666666666664, + "English,Spanish,Indonesian": 0.3466666666666667, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.49333333333333335, + "English,Indonesian,Filipino": 0.2733333333333333, + "English,Indonesian,Chinese": 0.36, + "English,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino": 0.24, + "Spanish,Indonesian,Chinese": 0.3333333333333333, + "Spanish,Filipino,Chinese": 0.26666666666666666, + "Indonesian,Filipino,Chinese": 0.2733333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.19333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.18, + "Malay,English,Vietnamese,Chinese": 0.2, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.18666666666666668, + "Malay,English,Spanish,Chinese": 0.26666666666666666, + "Malay,English,Indonesian,Filipino": 0.2, + "Malay,English,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino": 0.18, + "Malay,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Filipino,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Spanish,Filipino,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14, + "Malay,English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Spanish,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.463382661732759, + "AC3_3": 0.3536266951434383, + "AC3_4": 0.27410603038490744, + "AC3_5": 0.2201142446923319, + "AC3_6": 0.18536017459707985, + "AC3_7": 0.1632224167825948 + }, + "prompt_2": { + "overall_acc": 0.42, + "language_acc": { + "Malay": 0.36, + "English": 0.5133333333333333, + "Vietnamese": 0.36666666666666664, + "Spanish": 0.4533333333333333, + "Indonesian": 0.3333333333333333, + "Filipino": 0.4, + "Chinese": 0.5133333333333333 + }, + "consistency_score_2": 0.4028571428571428, + "consistency_score_3": 0.21123809523809528, + "consistency_score_4": 0.13047619047619047, + "consistency_score_5": 0.08984126984126982, + "consistency_score_6": 0.06666666666666667, + "consistency_score_7": 0.05333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38666666666666666, + "Malay,Vietnamese": 0.3466666666666667, + "Malay,Spanish": 0.3933333333333333, + "Malay,Indonesian": 0.5, + "Malay,Filipino": 0.4, + "Malay,Chinese": 0.3, + "English,Vietnamese": 0.34, + "English,Spanish": 0.49333333333333335, + "English,Indonesian": 0.4066666666666667, + "English,Filipino": 0.35333333333333333, + "English,Chinese": 0.5133333333333333, + "Vietnamese,Spanish": 0.36, + "Vietnamese,Indonesian": 0.36666666666666664, + "Vietnamese,Filipino": 0.3466666666666667, + "Vietnamese,Chinese": 0.4066666666666667, + "Spanish,Indonesian": 0.4533333333333333, + "Spanish,Filipino": 0.44666666666666666, + "Spanish,Chinese": 0.44666666666666666, + "Indonesian,Filipino": 0.5133333333333333, + "Indonesian,Chinese": 0.32666666666666666, + "Filipino,Chinese": 0.36 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.15333333333333332, + "Malay,English,Spanish": 0.23333333333333334, + "Malay,English,Indonesian": 0.23333333333333334, + "Malay,English,Filipino": 0.17333333333333334, + "Malay,English,Chinese": 0.2, + "Malay,Vietnamese,Spanish": 0.17333333333333334, + "Malay,Vietnamese,Indonesian": 0.22, + "Malay,Vietnamese,Filipino": 0.16666666666666666, + "Malay,Vietnamese,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian": 0.25333333333333335, + "Malay,Spanish,Filipino": 0.22, + "Malay,Spanish,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino": 0.2866666666666667, + "Malay,Indonesian,Chinese": 0.17333333333333334, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.21333333333333335, + "English,Vietnamese,Indonesian": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.14, + "English,Vietnamese,Chinese": 0.24666666666666667, + "English,Spanish,Indonesian": 0.26666666666666666, + "English,Spanish,Filipino": 0.24666666666666667, + "English,Spanish,Chinese": 0.32, + "English,Indonesian,Filipino": 0.21333333333333335, + "English,Indonesian,Chinese": 0.24, + "English,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Vietnamese,Spanish,Chinese": 0.24, + "Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Filipino,Chinese": 0.18, + "Spanish,Indonesian,Filipino": 0.28, + "Spanish,Indonesian,Chinese": 0.23333333333333334, + "Spanish,Filipino,Chinese": 0.24666666666666667, + "Indonesian,Filipino,Chinese": 0.18666666666666668 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino": 0.08, + "Malay,English,Vietnamese,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian": 0.17333333333333334, + "Malay,English,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.14, + "Malay,English,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian": 0.12, + "Malay,Vietnamese,Spanish,Filipino": 0.1, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.14, + "English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Spanish,Indonesian,Chinese": 0.18, + "English,Spanish,Filipino,Chinese": 0.18, + "English,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + } + }, + "AC3_2": 0.41124999995002165, + "AC3_3": 0.281098370504654, + "AC3_4": 0.19910034598459248, + "AC3_5": 0.14801992525116667, + "AC3_6": 0.11506849312704072, + "AC3_7": 0.09464788730394763 + }, + "prompt_3": { + "overall_acc": 0.40761904761904766, + "language_acc": { + "Malay": 0.30666666666666664, + "English": 0.54, + "Vietnamese": 0.34, + "Spanish": 0.46, + "Indonesian": 0.36, + "Filipino": 0.35333333333333333, + "Chinese": 0.49333333333333335 + }, + "consistency_score_2": 0.4212698412698413, + "consistency_score_3": 0.22819047619047622, + "consistency_score_4": 0.14342857142857138, + "consistency_score_5": 0.09714285714285714, + "consistency_score_6": 0.06761904761904762, + "consistency_score_7": 0.04666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38666666666666666, + "Malay,Vietnamese": 0.38666666666666666, + "Malay,Spanish": 0.4066666666666667, + "Malay,Indonesian": 0.48, + "Malay,Filipino": 0.38666666666666666, + "Malay,Chinese": 0.38, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.5466666666666666, + "English,Indonesian": 0.44666666666666666, + "English,Filipino": 0.38666666666666666, + "English,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.36, + "Vietnamese,Indonesian": 0.4, + "Vietnamese,Filipino": 0.37333333333333335, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.52, + "Spanish,Filipino": 0.3466666666666667, + "Spanish,Chinese": 0.48, + "Indonesian,Filipino": 0.43333333333333335, + "Indonesian,Chinese": 0.43333333333333335, + "Filipino,Chinese": 0.3466666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.19333333333333333, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.25333333333333335, + "Malay,English,Filipino": 0.18666666666666668, + "Malay,English,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish": 0.2, + "Malay,Vietnamese,Indonesian": 0.24666666666666667, + "Malay,Vietnamese,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian": 0.2733333333333333, + "Malay,Spanish,Filipino": 0.17333333333333334, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.24, + "Malay,Indonesian,Chinese": 0.23333333333333334, + "Malay,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.22, + "English,Vietnamese,Filipino": 0.16, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian": 0.3333333333333333, + "English,Spanish,Filipino": 0.22, + "English,Spanish,Chinese": 0.3466666666666667, + "English,Indonesian,Filipino": 0.22, + "English,Indonesian,Chinese": 0.31333333333333335, + "English,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian": 0.22666666666666666, + "Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino": 0.2, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino": 0.20666666666666667, + "Spanish,Indonesian,Chinese": 0.30666666666666664, + "Spanish,Filipino,Chinese": 0.19333333333333333, + "Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.14, + "Malay,English,Vietnamese,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino": 0.1, + "Malay,English,Vietnamese,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Spanish,Filipino": 0.11333333333333333, + "Malay,English,Spanish,Chinese": 0.19333333333333333, + "Malay,English,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.14, + "Malay,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.12, + "Malay,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.12, + "English,Vietnamese,Indonesian,Chinese": 0.16, + "English,Vietnamese,Filipino,Chinese": 0.12, + "English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Spanish,Filipino,Chinese": 0.14, + "English,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Filipino,Chinese": 0.08, + "Malay,English,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667 + } + }, + "AC3_2": 0.4143320383954185, + "AC3_3": 0.2925869496529749, + "AC3_4": 0.21219297808602983, + "AC3_5": 0.15689487867511653, + "AC3_6": 0.11599580110165371, + "AC3_7": 0.08374563240644313 + }, + "prompt_4": { + "overall_acc": 0.4114285714285714, + "language_acc": { + "Malay": 0.32, + "English": 0.5333333333333333, + "Vietnamese": 0.37333333333333335, + "Spanish": 0.49333333333333335, + "Indonesian": 0.35333333333333333, + "Filipino": 0.30666666666666664, + "Chinese": 0.5 + }, + "consistency_score_2": 0.40730158730158716, + "consistency_score_3": 0.21657142857142858, + "consistency_score_4": 0.13599999999999998, + "consistency_score_5": 0.09714285714285714, + "consistency_score_6": 0.07523809523809524, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3933333333333333, + "Malay,Vietnamese": 0.3466666666666667, + "Malay,Spanish": 0.3333333333333333, + "Malay,Indonesian": 0.54, + "Malay,Filipino": 0.44, + "Malay,Chinese": 0.37333333333333335, + "English,Vietnamese": 0.35333333333333333, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.4066666666666667, + "English,Filipino": 0.32, + "English,Chinese": 0.5333333333333333, + "Vietnamese,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian": 0.37333333333333335, + "Vietnamese,Filipino": 0.36666666666666664, + "Vietnamese,Chinese": 0.37333333333333335, + "Spanish,Indonesian": 0.34, + "Spanish,Filipino": 0.38, + "Spanish,Chinese": 0.5333333333333333, + "Indonesian,Filipino": 0.44666666666666666, + "Indonesian,Chinese": 0.3933333333333333, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.17333333333333334, + "Malay,English,Spanish": 0.23333333333333334, + "Malay,English,Indonesian": 0.26666666666666666, + "Malay,English,Filipino": 0.17333333333333334, + "Malay,English,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish": 0.16, + "Malay,Vietnamese,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Filipino": 0.18, + "Malay,Vietnamese,Chinese": 0.17333333333333334, + "Malay,Spanish,Indonesian": 0.22666666666666666, + "Malay,Spanish,Filipino": 0.18, + "Malay,Spanish,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino": 0.2866666666666667, + "Malay,Indonesian,Chinese": 0.28, + "Malay,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.17333333333333334, + "English,Vietnamese,Filipino": 0.13333333333333333, + "English,Vietnamese,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian": 0.24, + "English,Spanish,Filipino": 0.22666666666666666, + "English,Spanish,Chinese": 0.36, + "English,Indonesian,Filipino": 0.18, + "English,Indonesian,Chinese": 0.26, + "English,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Vietnamese,Spanish,Filipino": 0.16, + "Vietnamese,Spanish,Chinese": 0.26, + "Vietnamese,Indonesian,Filipino": 0.22, + "Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino": 0.21333333333333335, + "Spanish,Indonesian,Chinese": 0.22, + "Spanish,Filipino,Chinese": 0.23333333333333334, + "Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.13333333333333333, + "Malay,English,Vietnamese,Filipino": 0.08, + "Malay,English,Vietnamese,Chinese": 0.12, + "Malay,English,Spanish,Indonesian": 0.18, + "Malay,English,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Spanish,Chinese": 0.18, + "Malay,English,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "English,Spanish,Filipino,Chinese": 0.18, + "English,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.4093546778428722, + "AC3_3": 0.28376966068534804, + "AC3_4": 0.20442588722779276, + "AC3_5": 0.1571749598406839, + "AC3_6": 0.1272127480868042, + "AC3_7": 0.10472727270505784 + }, + "prompt_5": { + "overall_acc": 0.43523809523809526, + "language_acc": { + "Malay": 0.32666666666666666, + "English": 0.58, + "Vietnamese": 0.37333333333333335, + "Spanish": 0.5266666666666666, + "Indonesian": 0.38666666666666666, + "Filipino": 0.32666666666666666, + "Chinese": 0.5266666666666666 + }, + "consistency_score_2": 0.48666666666666664, + "consistency_score_3": 0.3060952380952381, + "consistency_score_4": 0.21180952380952384, + "consistency_score_5": 0.15269841269841267, + "consistency_score_6": 0.11142857142857143, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.46, + "Malay,Vietnamese": 0.4066666666666667, + "Malay,Spanish": 0.48, + "Malay,Indonesian": 0.5533333333333333, + "Malay,Filipino": 0.46, + "Malay,Chinese": 0.43333333333333335, + "English,Vietnamese": 0.42, + "English,Spanish": 0.6133333333333333, + "English,Indonesian": 0.5133333333333333, + "English,Filipino": 0.44, + "English,Chinese": 0.7133333333333334, + "Vietnamese,Spanish": 0.4866666666666667, + "Vietnamese,Indonesian": 0.44666666666666666, + "Vietnamese,Filipino": 0.38, + "Vietnamese,Chinese": 0.44666666666666666, + "Spanish,Indonesian": 0.52, + "Spanish,Filipino": 0.4266666666666667, + "Spanish,Chinese": 0.6, + "Indonesian,Filipino": 0.48, + "Indonesian,Chinese": 0.5066666666666667, + "Filipino,Chinese": 0.43333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.3466666666666667, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.2733333333333333, + "Malay,English,Chinese": 0.36, + "Malay,Vietnamese,Spanish": 0.28, + "Malay,Vietnamese,Indonesian": 0.2866666666666667, + "Malay,Vietnamese,Filipino": 0.24, + "Malay,Vietnamese,Chinese": 0.24, + "Malay,Spanish,Indonesian": 0.3466666666666667, + "Malay,Spanish,Filipino": 0.2733333333333333, + "Malay,Spanish,Chinese": 0.32666666666666666, + "Malay,Indonesian,Filipino": 0.3333333333333333, + "Malay,Indonesian,Chinese": 0.32, + "Malay,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish": 0.32666666666666666, + "English,Vietnamese,Indonesian": 0.28, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.32666666666666666, + "English,Spanish,Indonesian": 0.38, + "English,Spanish,Filipino": 0.3, + "English,Spanish,Chinese": 0.5, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.4066666666666667, + "English,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Chinese": 0.32666666666666666, + "Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Filipino,Chinese": 0.21333333333333335, + "Spanish,Indonesian,Filipino": 0.28, + "Spanish,Indonesian,Chinese": 0.36666666666666664, + "Spanish,Filipino,Chinese": 0.29333333333333333, + "Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.20666666666666667, + "Malay,English,Vietnamese,Filipino": 0.18, + "Malay,English,Vietnamese,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian": 0.26, + "Malay,English,Spanish,Filipino": 0.20666666666666667, + "Malay,English,Spanish,Chinese": 0.28, + "Malay,English,Indonesian,Filipino": 0.2, + "Malay,English,Indonesian,Chinese": 0.26, + "Malay,English,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.2, + "Malay,Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.18, + "Malay,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian": 0.23333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.22, + "English,Vietnamese,Filipino,Chinese": 0.18, + "English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Indonesian,Chinese": 0.3, + "English,Spanish,Filipino,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.18, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.16, + "Malay,English,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + } + }, + "AC3_2": 0.4595179062862437, + "AC3_3": 0.35941809812603887, + "AC3_4": 0.2849483437326835, + "AC3_5": 0.22607939932388363, + "AC3_6": 0.1774315579565923, + "AC3_7": 0.13515711642478467 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.362012987012987, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.32386363636363635, + "Chinese": 0.4318181818181818, + "Indonesian": 0.35795454545454547, + "Filipino": 0.30113636363636365, + "Spanish": 0.3409090909090909, + "Malay": 0.32954545454545453 + }, + "consistency_score_2": 0.4691558441558441, + "consistency_score_3": 0.2772727272727272, + "consistency_score_4": 0.18068181818181817, + "consistency_score_5": 0.12527056277056275, + "consistency_score_6": 0.09090909090909093, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.5227272727272727, + "English,Indonesian": 0.4602272727272727, + "English,Filipino": 0.42613636363636365, + "English,Spanish": 0.5397727272727273, + "English,Malay": 0.4375, + "Vietnamese,Chinese": 0.3977272727272727, + "Vietnamese,Indonesian": 0.4943181818181818, + "Vietnamese,Filipino": 0.4772727272727273, + "Vietnamese,Spanish": 0.4147727272727273, + "Vietnamese,Malay": 0.4715909090909091, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Filipino": 0.3806818181818182, + "Chinese,Spanish": 0.39204545454545453, + "Chinese,Malay": 0.38636363636363635, + "Indonesian,Filipino": 0.5738636363636364, + "Indonesian,Spanish": 0.5, + "Indonesian,Malay": 0.6647727272727273, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.5113636363636364, + "Spanish,Malay": 0.4772727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2556818181818182, + "English,Vietnamese,Indonesian": 0.2727272727272727, + "English,Vietnamese,Filipino": 0.25, + "English,Vietnamese,Spanish": 0.2556818181818182, + "English,Vietnamese,Malay": 0.25, + "English,Chinese,Indonesian": 0.2840909090909091, + "English,Chinese,Filipino": 0.23863636363636365, + "English,Chinese,Spanish": 0.2784090909090909, + "English,Chinese,Malay": 0.22727272727272727, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.3181818181818182, + "English,Indonesian,Malay": 0.3409090909090909, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.25, + "English,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian": 0.2556818181818182, + "Vietnamese,Chinese,Filipino": 0.22727272727272727, + "Vietnamese,Chinese,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.3693181818181818, + "Vietnamese,Filipino,Spanish": 0.24431818181818182, + "Vietnamese,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Spanish,Malay": 0.26704545454545453, + "Chinese,Indonesian,Filipino": 0.2784090909090909, + "Chinese,Indonesian,Spanish": 0.2556818181818182, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Filipino,Spanish": 0.2215909090909091, + "Chinese,Filipino,Malay": 0.2159090909090909, + "Chinese,Spanish,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.42045454545454547, + "Indonesian,Spanish,Malay": 0.3977272727272727, + "Filipino,Spanish,Malay": 0.29545454545454547 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "English,Vietnamese,Chinese,Filipino": 0.1590909090909091, + "English,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay": 0.21022727272727273, + "English,Vietnamese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Filipino,Malay": 0.17045454545454544, + "English,Vietnamese,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino": 0.17613636363636365, + "English,Chinese,Indonesian,Spanish": 0.1875, + "English,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Filipino,Malay": 0.13636363636363635, + "English,Chinese,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish": 0.20454545454545456, + "English,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1875, + "Vietnamese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.26704545454545453 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.125, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + } + }, + "AC3_2": 0.4086787235709607, + "AC3_3": 0.3140265016360902, + "AC3_4": 0.24105321834951246, + "AC3_5": 0.18613216319002301, + "AC3_6": 0.14532420980825295, + "AC3_7": 0.11475128642272539 + }, + "prompt_2": { + "overall_acc": 0.3376623376623377, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.3125, + "Chinese": 0.38636363636363635, + "Indonesian": 0.3068181818181818, + "Filipino": 0.26136363636363635, + "Spanish": 0.39204545454545453, + "Malay": 0.2897727272727273 + }, + "consistency_score_2": 0.37743506493506496, + "consistency_score_3": 0.18230519480519478, + "consistency_score_4": 0.10081168831168831, + "consistency_score_5": 0.06033549783549784, + "consistency_score_6": 0.037337662337662336, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3352272727272727, + "English,Chinese": 0.3806818181818182, + "English,Indonesian": 0.4034090909090909, + "English,Filipino": 0.2897727272727273, + "English,Spanish": 0.42045454545454547, + "English,Malay": 0.3409090909090909, + "Vietnamese,Chinese": 0.3352272727272727, + "Vietnamese,Indonesian": 0.4375, + "Vietnamese,Filipino": 0.3693181818181818, + "Vietnamese,Spanish": 0.375, + "Vietnamese,Malay": 0.3409090909090909, + "Chinese,Indonesian": 0.3806818181818182, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,Spanish": 0.4090909090909091, + "Chinese,Malay": 0.3465909090909091, + "Indonesian,Filipino": 0.38636363636363635, + "Indonesian,Spanish": 0.42613636363636365, + "Indonesian,Malay": 0.4943181818181818, + "Filipino,Spanish": 0.32386363636363635, + "Filipino,Malay": 0.42045454545454547, + "Spanish,Malay": 0.375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.19886363636363635, + "English,Vietnamese,Malay": 0.14204545454545456, + "English,Chinese,Indonesian": 0.19886363636363635, + "English,Chinese,Filipino": 0.14204545454545456, + "English,Chinese,Spanish": 0.2215909090909091, + "English,Chinese,Malay": 0.14772727272727273, + "English,Indonesian,Filipino": 0.1590909090909091, + "English,Indonesian,Spanish": 0.23863636363636365, + "English,Indonesian,Malay": 0.22727272727272727, + "English,Filipino,Spanish": 0.1590909090909091, + "English,Filipino,Malay": 0.14204545454545456, + "English,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "Vietnamese,Chinese,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.1875, + "Vietnamese,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino": 0.18181818181818182, + "Chinese,Indonesian,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.14204545454545456, + "Chinese,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish": 0.19318181818181818, + "Indonesian,Filipino,Malay": 0.23863636363636365, + "Indonesian,Spanish,Malay": 0.23863636363636365, + "Filipino,Spanish,Malay": 0.17045454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish": 0.13068181818181818, + "English,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Filipino,Malay": 0.05113636363636364, + "English,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.3564426492418392, + "AC3_3": 0.23677477684787923, + "AC3_4": 0.1552671689303883, + "AC3_5": 0.10237756802145741, + "AC3_6": 0.06724011917024043, + "AC3_7": 0.042588042576225366 + }, + "prompt_3": { + "overall_acc": 0.35064935064935066, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.42613636363636365, + "Indonesian": 0.3522727272727273, + "Filipino": 0.24431818181818182, + "Spanish": 0.3693181818181818, + "Malay": 0.3181818181818182 + }, + "consistency_score_2": 0.47132034632034636, + "consistency_score_3": 0.28116883116883123, + "consistency_score_4": 0.1862012987012987, + "consistency_score_5": 0.1312229437229437, + "consistency_score_6": 0.09659090909090907, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.5284090909090909, + "English,Indonesian": 0.4659090909090909, + "English,Filipino": 0.3125, + "English,Spanish": 0.48295454545454547, + "English,Malay": 0.4147727272727273, + "Vietnamese,Chinese": 0.44886363636363635, + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Filipino": 0.4034090909090909, + "Vietnamese,Spanish": 0.4431818181818182, + "Vietnamese,Malay": 0.42613636363636365, + "Chinese,Indonesian": 0.5511363636363636, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,Spanish": 0.5, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.5454545454545454, + "Indonesian,Malay": 0.5852272727272727, + "Filipino,Spanish": 0.45454545454545453, + "Filipino,Malay": 0.4659090909090909, + "Spanish,Malay": 0.48295454545454547 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2840909090909091, + "English,Vietnamese,Indonesian": 0.3181818181818182, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.25, + "English,Vietnamese,Malay": 0.22727272727272727, + "English,Chinese,Indonesian": 0.3352272727272727, + "English,Chinese,Filipino": 0.19886363636363635, + "English,Chinese,Spanish": 0.3068181818181818, + "English,Chinese,Malay": 0.2727272727272727, + "English,Indonesian,Filipino": 0.23863636363636365, + "English,Indonesian,Spanish": 0.29545454545454547, + "English,Indonesian,Malay": 0.29545454545454547, + "English,Filipino,Spanish": 0.20454545454545456, + "English,Filipino,Malay": 0.18181818181818182, + "English,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian": 0.36363636363636365, + "Vietnamese,Chinese,Filipino": 0.23295454545454544, + "Vietnamese,Chinese,Spanish": 0.2784090909090909, + "Vietnamese,Chinese,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,Spanish": 0.3409090909090909, + "Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Spanish,Malay": 0.2556818181818182, + "Chinese,Indonesian,Filipino": 0.3068181818181818, + "Chinese,Indonesian,Spanish": 0.3465909090909091, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Filipino,Spanish": 0.26136363636363635, + "Chinese,Filipino,Malay": 0.24431818181818182, + "Chinese,Spanish,Malay": 0.3068181818181818, + "Indonesian,Filipino,Spanish": 0.3409090909090909, + "Indonesian,Filipino,Malay": 0.3522727272727273, + "Indonesian,Spanish,Malay": 0.35795454545454547, + "Filipino,Spanish,Malay": 0.2784090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "English,Vietnamese,Chinese,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "English,Vietnamese,Indonesian,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino": 0.17045454545454544, + "English,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Malay": 0.2215909090909091, + "English,Chinese,Filipino,Spanish": 0.14772727272727273, + "English,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Spanish,Malay": 0.18181818181818182, + "English,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.125, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + } + }, + "AC3_2": 0.4021271683718603, + "AC3_3": 0.31208873341652216, + "AC3_4": 0.24323846696092397, + "AC3_5": 0.19097690626574115, + "AC3_6": 0.15146015505605365, + "AC3_7": 0.12202329216663203 + }, + "prompt_4": { + "overall_acc": 0.3327922077922078, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.3465909090909091, + "Chinese": 0.3522727272727273, + "Indonesian": 0.2897727272727273, + "Filipino": 0.26136363636363635, + "Spanish": 0.3693181818181818, + "Malay": 0.3125 + }, + "consistency_score_2": 0.41450216450216454, + "consistency_score_3": 0.2258116883116883, + "consistency_score_4": 0.14366883116883117, + "consistency_score_5": 0.10146103896103895, + "consistency_score_6": 0.07711038961038962, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.35795454545454547, + "English,Chinese": 0.4318181818181818, + "English,Indonesian": 0.44886363636363635, + "English,Filipino": 0.3693181818181818, + "English,Spanish": 0.39204545454545453, + "English,Malay": 0.35795454545454547, + "Vietnamese,Chinese": 0.35795454545454547, + "Vietnamese,Indonesian": 0.4375, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.36363636363636365, + "Vietnamese,Malay": 0.38636363636363635, + "Chinese,Indonesian": 0.44886363636363635, + "Chinese,Filipino": 0.35795454545454547, + "Chinese,Spanish": 0.3693181818181818, + "Chinese,Malay": 0.42613636363636365, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.4431818181818182, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.44886363636363635, + "Spanish,Malay": 0.38636363636363635 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.18181818181818182, + "English,Vietnamese,Indonesian": 0.21022727272727273, + "English,Vietnamese,Filipino": 0.17613636363636365, + "English,Vietnamese,Spanish": 0.17613636363636365, + "English,Vietnamese,Malay": 0.17045454545454544, + "English,Chinese,Indonesian": 0.25, + "English,Chinese,Filipino": 0.2159090909090909, + "English,Chinese,Spanish": 0.2159090909090909, + "English,Chinese,Malay": 0.22727272727272727, + "English,Indonesian,Filipino": 0.24431818181818182, + "English,Indonesian,Spanish": 0.25, + "English,Indonesian,Malay": 0.26136363636363635, + "English,Filipino,Spanish": 0.21022727272727273, + "English,Filipino,Malay": 0.19886363636363635, + "English,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "Vietnamese,Chinese,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino": 0.2784090909090909, + "Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Malay": 0.2897727272727273, + "Vietnamese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Filipino,Malay": 0.25, + "Vietnamese,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish": 0.18181818181818182, + "Chinese,Filipino,Malay": 0.22727272727272727, + "Chinese,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Malay": 0.32954545454545453, + "Indonesian,Spanish,Malay": 0.2784090909090909, + "Filipino,Spanish,Malay": 0.23863636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino": 0.125, + "English,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.16477272727272727, + "Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.125, + "English,Chinese,Indonesian,Spanish,Malay": 0.125, + "English,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + } + }, + "AC3_2": 0.3691800595723663, + "AC3_3": 0.2690578093322239, + "AC3_4": 0.20069581181212237, + "AC3_5": 0.1555103774371833, + "AC3_6": 0.1252089494358292, + "AC3_7": 0.10523613960376778 + }, + "prompt_5": { + "overall_acc": 0.3547077922077922, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.3522727272727273, + "Chinese": 0.3977272727272727, + "Indonesian": 0.3352272727272727, + "Filipino": 0.2727272727272727, + "Spanish": 0.3522727272727273, + "Malay": 0.32386363636363635 + }, + "consistency_score_2": 0.4707792207792207, + "consistency_score_3": 0.2810064935064935, + "consistency_score_4": 0.18035714285714288, + "consistency_score_5": 0.11823593073593071, + "consistency_score_6": 0.07792207792207793, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4602272727272727, + "English,Chinese": 0.4943181818181818, + "English,Indonesian": 0.4715909090909091, + "English,Filipino": 0.4034090909090909, + "English,Spanish": 0.5113636363636364, + "English,Malay": 0.4602272727272727, + "Vietnamese,Chinese": 0.375, + "Vietnamese,Indonesian": 0.5113636363636364, + "Vietnamese,Filipino": 0.39204545454545453, + "Vietnamese,Spanish": 0.45454545454545453, + "Vietnamese,Malay": 0.4659090909090909, + "Chinese,Indonesian": 0.5170454545454546, + "Chinese,Filipino": 0.42045454545454547, + "Chinese,Spanish": 0.4715909090909091, + "Chinese,Malay": 0.4375, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.5511363636363636, + "Indonesian,Malay": 0.6306818181818182, + "Filipino,Spanish": 0.38636363636363635, + "Filipino,Malay": 0.4659090909090909, + "Spanish,Malay": 0.4715909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.24431818181818182, + "English,Vietnamese,Indonesian": 0.3068181818181818, + "English,Vietnamese,Filipino": 0.23295454545454544, + "English,Vietnamese,Spanish": 0.2840909090909091, + "English,Vietnamese,Malay": 0.2556818181818182, + "English,Chinese,Indonesian": 0.2897727272727273, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.29545454545454547, + "English,Chinese,Malay": 0.25, + "English,Indonesian,Filipino": 0.2784090909090909, + "English,Indonesian,Spanish": 0.3352272727272727, + "English,Indonesian,Malay": 0.3352272727272727, + "English,Filipino,Spanish": 0.23863636363636365, + "English,Filipino,Malay": 0.25, + "English,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "Vietnamese,Chinese,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Malay": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino": 0.2897727272727273, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino": 0.30113636363636365, + "Chinese,Indonesian,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.3352272727272727, + "Chinese,Filipino,Spanish": 0.23863636363636365, + "Chinese,Filipino,Malay": 0.25, + "Chinese,Spanish,Malay": 0.26704545454545453, + "Indonesian,Filipino,Spanish": 0.29545454545454547, + "Indonesian,Filipino,Malay": 0.375, + "Indonesian,Spanish,Malay": 0.375, + "Filipino,Spanish,Malay": 0.23295454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "English,Vietnamese,Chinese,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Chinese,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Filipino,Malay": 0.1590909090909091, + "English,Chinese,Spanish,Malay": 0.17045454545454544, + "English,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Indonesian,Filipino,Malay": 0.21022727272727273, + "English,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.125, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13636363636363635 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.4045831257730524, + "AC3_3": 0.3135848765143535, + "AC3_4": 0.2391264302604407, + "AC3_5": 0.17735389606639607, + "AC3_6": 0.1277746643304958, + "AC3_7": 0.08938636361433884 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5631067961165048 + }, + "prompt_2": { + "accuracy": 0.42718446601941745 + }, + "prompt_3": { + "accuracy": 0.49514563106796117 + }, + "prompt_4": { + "accuracy": 0.5048543689320388 + }, + "prompt_5": { + "accuracy": 0.49514563106796117 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.45714285714285713 + }, + "prompt_2": { + "accuracy": 0.4 + }, + "prompt_3": { + "accuracy": 0.3619047619047619 + }, + "prompt_4": { + "accuracy": 0.38095238095238093 + }, + "prompt_5": { + "accuracy": 0.4095238095238095 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4485981308411215 + }, + "prompt_2": { + "accuracy": 0.3925233644859813 + }, + "prompt_3": { + "accuracy": 0.3925233644859813 + }, + "prompt_4": { + "accuracy": 0.5046728971962616 + }, + "prompt_5": { + "accuracy": 0.4953271028037383 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.3, + "history": 0.4, + "literature": 0.2, + "politics": 0.5, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.4, + "history": 0.4, + "literature": 0.2, + "politics": 0.5, + "culture": 0.6, + "film": 0.5, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.43, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.2, + "history": 0.5333333333333333, + "literature": 0.4, + "politics": 0.6, + "culture": 0.6, + "film": 0.5, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.47, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.6, + "culture": 0.6, + "film": 0.5, + "law": 0.6, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.46, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.3, + "history": 0.4666666666666667, + "literature": 0.4, + "politics": 0.5, + "culture": 0.6, + "film": 0.5, + "law": 0.6, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.032482702289577135 + }, + "prompt_2": { + "bleu_score": 0.038794160796413776 + }, + "prompt_3": { + "bleu_score": 0.04303413649846339 + }, + "prompt_4": { + "bleu_score": 0.04273623849088891 + }, + "prompt_5": { + "bleu_score": 0.04704760871993138 + } }, "indommlu": { "prompt_1": -1, @@ -7158,179 +61607,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1234224600075249 + }, + "prompt_2": { + "bleu_score": 0.12720532398561427 + }, + "prompt_3": { + "bleu_score": 0.058218932238453154 + }, + "prompt_4": { + "bleu_score": 0.15968185258534648 + }, + "prompt_5": { + "bleu_score": 0.14189976316390143 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.064010671800851 + }, + "prompt_2": { + "bleu_score": 0.060828162597321676 + }, + "prompt_3": { + "bleu_score": 0.03250569787988255 + }, + "prompt_4": { + "bleu_score": 0.111566300419673 + }, + "prompt_5": { + "bleu_score": 0.09449740534415094 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.15850180337113418 + }, + "prompt_2": { + "bleu_score": 0.1624474737236067 + }, + "prompt_3": { + "bleu_score": 0.07946608870034674 + }, + "prompt_4": { + "bleu_score": 0.1518779168246733 + }, + "prompt_5": { + "bleu_score": 0.13963531403235094 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11882358297195049 + }, + "prompt_2": { + "bleu_score": 0.10236892488347499 + }, + "prompt_3": { + "bleu_score": 0.042489668592323414 + }, + "prompt_4": { + "bleu_score": 0.1454691365297427 + }, + "prompt_5": { + "bleu_score": 0.12166015797615783 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3990665110851809 + }, + "prompt_2": { + "accuracy": 0.38273045507584597 + }, + "prompt_3": { + "accuracy": 0.4329054842473746 + }, + "prompt_4": { + "accuracy": 0.485414235705951 + }, + "prompt_5": { + "accuracy": 0.47607934655775963 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38484090096532, + "category_acc": { + "high_school_european_history": 0.4634146341463415, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.3409090909090909, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.5615763546798029, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.4703389830508475, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.31645569620253167, + "econometrics": 0.21238938053097345, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.44336569579288027, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.33451957295373663, + "philosophy": 0.432258064516129, + "professional_medicine": 0.45387453874538747, + "nutrition": 0.39672131147540984, + "global_facts": 0.29292929292929293, + "machine_learning": 0.3153153153153153, + "security_studies": 0.4672131147540984, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.38461538461538464, + "prehistory": 0.3684210526315789, + "anatomy": 0.35074626865671643, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.313953488372093, + "high_school_government_and_politics": 0.484375, + "college_chemistry": 0.29292929292929293, + "logical_fallacies": 0.4444444444444444, + "high_school_geography": 0.38578680203045684, + "elementary_mathematics": 0.27586206896551724, + "human_aging": 0.40540540540540543, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.5202205882352942, + "formal_logic": 0.296, + "high_school_statistics": 0.3209302325581395, + "international_law": 0.35, + "high_school_mathematics": 0.26394052044609667, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.3418803418803419, + "miscellaneous": 0.4578005115089514, + "high_school_chemistry": 0.3910891089108911, + "marketing": 0.5364806866952789, + "professional_law": 0.36007827788649704, + "management": 0.4411764705882353, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.40186915887850466, + "world_religions": 0.5588235294117647, + "sociology": 0.595, + "us_foreign_policy": 0.5050505050505051, + "high_school_macroeconomics": 0.2982005141388175, + "computer_security": 0.46464646464646464, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.4579710144927536, + "electrical_engineering": 0.3402777777777778, + "astronomy": 0.33774834437086093, + "college_biology": 0.3776223776223776 + } + }, + "prompt_2": { + "accuracy": 0.35016088666428313, + "category_acc": { + "high_school_european_history": 0.34146341463414637, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.2803030303030303, + "medical_genetics": 0.35353535353535354, + "high_school_us_history": 0.43349753694581283, + "high_school_physics": 0.3, + "high_school_world_history": 0.3389830508474576, + "virology": 0.2606060606060606, + "high_school_microeconomics": 0.35864978902953587, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.3786407766990291, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.2669039145907473, + "philosophy": 0.4129032258064516, + "professional_medicine": 0.36531365313653136, + "nutrition": 0.3475409836065574, + "global_facts": 0.25252525252525254, + "machine_learning": 0.25225225225225223, + "security_studies": 0.4098360655737705, + "public_relations": 0.3486238532110092, + "professional_psychology": 0.36824877250409166, + "prehistory": 0.34055727554179566, + "anatomy": 0.417910447761194, + "human_sexuality": 0.3230769230769231, + "college_medicine": 0.27325581395348836, + "high_school_government_and_politics": 0.46875, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.4444444444444444, + "high_school_geography": 0.28426395939086296, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.5, + "formal_logic": 0.24, + "high_school_statistics": 0.30697674418604654, + "international_law": 0.3333333333333333, + "high_school_mathematics": 0.26394052044609667, + "high_school_computer_science": 0.36363636363636365, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.5127877237851662, + "high_school_chemistry": 0.3712871287128713, + "marketing": 0.4206008583690987, + "professional_law": 0.3176777560339204, + "management": 0.4215686274509804, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.3177570093457944, + "world_religions": 0.5, + "sociology": 0.44, + "us_foreign_policy": 0.41414141414141414, + "high_school_macroeconomics": 0.2467866323907455, + "computer_security": 0.47474747474747475, + "moral_scenarios": 0.2651006711409396, + "moral_disputes": 0.39710144927536234, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.23841059602649006, + "college_biology": 0.3356643356643357 + } + }, + "prompt_3": { + "accuracy": 0.4318913121201287, + "category_acc": { + "high_school_european_history": 0.5548780487804879, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.4621212121212121, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.47783251231527096, + "high_school_physics": 0.34, + "high_school_world_history": 0.4364406779661017, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.4430379746835443, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.5566343042071198, + "abstract_algebra": 0.21212121212121213, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.5129032258064516, + "professional_medicine": 0.4132841328413284, + "nutrition": 0.5409836065573771, + "global_facts": 0.24242424242424243, + "machine_learning": 0.36936936936936937, + "security_studies": 0.5286885245901639, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.44353518821603927, + "prehistory": 0.5046439628482973, + "anatomy": 0.40298507462686567, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.45348837209302323, + "high_school_government_and_politics": 0.5989583333333334, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.5185185185185185, + "high_school_geography": 0.47715736040609136, + "elementary_mathematics": 0.29973474801061006, + "human_aging": 0.4954954954954955, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.5974264705882353, + "formal_logic": 0.272, + "high_school_statistics": 0.33488372093023255, + "international_law": 0.425, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.3162393162393162, + "miscellaneous": 0.6227621483375959, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.6051502145922747, + "professional_law": 0.33985649054142203, + "management": 0.5, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.5233644859813084, + "world_religions": 0.6647058823529411, + "sociology": 0.54, + "us_foreign_policy": 0.5151515151515151, + "high_school_macroeconomics": 0.3856041131105398, + "computer_security": 0.5050505050505051, + "moral_scenarios": 0.2550335570469799, + "moral_disputes": 0.48405797101449277, + "electrical_engineering": 0.3263888888888889, + "astronomy": 0.4304635761589404, + "college_biology": 0.46153846153846156 + } + }, + "prompt_4": { + "accuracy": 0.4796567751161959, + "category_acc": { + "high_school_european_history": 0.5060975609756098, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.5492424242424242, + "medical_genetics": 0.5353535353535354, + "high_school_us_history": 0.5270935960591133, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.461864406779661, + "virology": 0.4909090909090909, + "high_school_microeconomics": 0.4810126582278481, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.5631067961165048, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.398576512455516, + "philosophy": 0.5967741935483871, + "professional_medicine": 0.4797047970479705, + "nutrition": 0.5442622950819672, + "global_facts": 0.2222222222222222, + "machine_learning": 0.34234234234234234, + "security_studies": 0.5778688524590164, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.46153846153846156, + "prehistory": 0.544891640866873, + "anatomy": 0.5, + "human_sexuality": 0.4846153846153846, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.703125, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5740740740740741, + "high_school_geography": 0.6395939086294417, + "elementary_mathematics": 0.3448275862068966, + "human_aging": 0.5495495495495496, + "college_mathematics": 0.1919191919191919, + "high_school_psychology": 0.6875, + "formal_logic": 0.32, + "high_school_statistics": 0.3581395348837209, + "international_law": 0.5666666666666667, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.38461538461538464, + "miscellaneous": 0.6764705882352942, + "high_school_chemistry": 0.3910891089108911, + "marketing": 0.6909871244635193, + "professional_law": 0.3731245923026745, + "management": 0.6666666666666666, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.5981308411214953, + "world_religions": 0.7, + "sociology": 0.67, + "us_foreign_policy": 0.6868686868686869, + "high_school_macroeconomics": 0.4498714652956298, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.2796420581655481, + "moral_disputes": 0.4927536231884058, + "electrical_engineering": 0.4236111111111111, + "astronomy": 0.5562913907284768, + "college_biology": 0.46153846153846156 + } + }, + "prompt_5": { + "accuracy": 0.4842331069002503, + "category_acc": { + "high_school_european_history": 0.5670731707317073, + "business_ethics": 0.45454545454545453, + "clinical_knowledge": 0.5151515151515151, + "medical_genetics": 0.5353535353535354, + "high_school_us_history": 0.6206896551724138, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.5211864406779662, + "virology": 0.42424242424242425, + "high_school_microeconomics": 0.4641350210970464, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.4128113879003559, + "philosophy": 0.5516129032258065, + "professional_medicine": 0.5018450184501845, + "nutrition": 0.5770491803278689, + "global_facts": 0.24242424242424243, + "machine_learning": 0.3063063063063063, + "security_studies": 0.5655737704918032, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.45662847790507366, + "prehistory": 0.544891640866873, + "anatomy": 0.5074626865671642, + "human_sexuality": 0.5538461538461539, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.703125, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.5123456790123457, + "high_school_geography": 0.6091370558375635, + "elementary_mathematics": 0.29708222811671087, + "human_aging": 0.5180180180180181, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.6654411764705882, + "formal_logic": 0.296, + "high_school_statistics": 0.40930232558139534, + "international_law": 0.5333333333333333, + "high_school_mathematics": 0.35687732342007433, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.6790281329923273, + "high_school_chemistry": 0.4207920792079208, + "marketing": 0.7339055793991416, + "professional_law": 0.3789954337899543, + "management": 0.6568627450980392, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.5700934579439252, + "world_religions": 0.7235294117647059, + "sociology": 0.665, + "us_foreign_policy": 0.7171717171717171, + "high_school_macroeconomics": 0.4524421593830334, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.2595078299776286, + "moral_disputes": 0.5246376811594203, + "electrical_engineering": 0.4791666666666667, + "astronomy": 0.5562913907284768, + "college_biology": 0.46853146853146854 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39078751857355126 + }, + "prompt_2": { + "accuracy": 0.35958395245170877 + }, + "prompt_3": { + "accuracy": 0.34026745913818723 + }, + "prompt_4": { + "accuracy": 0.41901931649331353 + }, + "prompt_5": { + "accuracy": 0.3224368499257058 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3816936488169365, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.30952380952380953, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.38333333333333336, + "business_administration": 0.2631578947368421, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.5, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.2962962962962963, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.5294117647058824, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.375, + "high_school_history": 0.48, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.46153846153846156, + "sports_science": 0.375, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3333333333333333, + "physician": 0.35185185185185186 + } + }, + "prompt_2": { + "accuracy": 0.3437110834371108, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.25, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.4166666666666667, + "business_administration": 0.3684210526315789, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.7241379310344828, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.375, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.24, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.4230769230769231, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.3333333333333333, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.2962962962962963, + "physician": 0.25925925925925924 + } + }, + "prompt_3": { + "accuracy": 0.3424657534246575, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.08333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.42857142857142855, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.041666666666666664, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.36666666666666664, + "business_administration": 0.21052631578947367, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.5, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5, + "art_studies": 0.5789473684210527, + "professional_tour_guide": 0.5, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.44, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.38461538461538464, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.3148148148148148, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.2777777777777778, + "physician": 0.3148148148148148 + } + }, + "prompt_4": { + "accuracy": 0.4259028642590286, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.375, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.38095238095238093, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.25, + "college_economics": 0.31666666666666665, + "business_administration": 0.34210526315789475, + "marxism": 0.6666666666666666, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.5714285714285714, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.6071428571428571, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.4074074074074074, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.5, + "art_studies": 0.5789473684210527, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.4807692307692308, + "sports_science": 0.4583333333333333, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.3888888888888889, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.5, + "tax_accountant": 0.4074074074074074, + "physician": 0.42592592592592593 + } + }, + "prompt_5": { + "accuracy": 0.3318804483188045, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.35714285714285715, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.18421052631578946, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.42857142857142855, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.5, + "professional_tour_guide": 0.5294117647058824, + "legal_professional": 0.25, + "high_school_chinese": 0.4583333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.3076923076923077, + "sports_science": 0.16666666666666666, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.24074074074074073, + "physician": 0.3148148148148148 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4121863799283154 + }, + "prompt_2": { + "accuracy": 0.3906810035842294 + }, + "prompt_3": { + "accuracy": 0.36917562724014336 + }, + "prompt_4": { + "accuracy": 0.46236559139784944 + }, + "prompt_5": { + "accuracy": 0.3763440860215054 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4042479709894664, + "category_acc": { + "agronomy": 0.40828402366863903, + "anatomy": 0.3581081081081081, + "ancient_chinese": 0.31097560975609756, + "arts": 0.49375, + "astronomy": 0.3212121212121212, + "business_ethics": 0.3875598086124402, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.48091603053435117, + "chinese_food_culture": 0.4485294117647059, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.5789473684210527, + "chinese_literature": 0.35294117647058826, + "chinese_teacher_qualification": 0.6201117318435754, + "clinical_knowledge": 0.4767932489451477, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.42990654205607476, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.42592592592592593, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.358974358974359, + "computer_science": 0.37745098039215685, + "computer_security": 0.4269005847953216, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.34532374100719426, + "economics": 0.34591194968553457, + "education": 0.3987730061349693, + "electrical_engineering": 0.36046511627906974, + "elementary_chinese": 0.4642857142857143, + "elementary_commonsense": 0.4393939393939394, + "elementary_information_and_technology": 0.42436974789915966, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.4222222222222222, + "food_science": 0.43356643356643354, + "genetics": 0.3409090909090909, + "global_facts": 0.3624161073825503, + "high_school_biology": 0.33727810650887574, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.5, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.3, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.40476190476190477, + "international_law": 0.33513513513513515, + "journalism": 0.3313953488372093, + "jurisprudence": 0.41119221411192214, + "legal_and_moral_basis": 0.6308411214953271, + "logical": 0.3821138211382114, + "machine_learning": 0.4016393442622951, + "management": 0.46190476190476193, + "marketing": 0.4, + "marxist_theory": 0.48677248677248675, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.3724137931034483, + "philosophy": 0.5142857142857142, + "professional_accounting": 0.37142857142857144, + "professional_law": 0.4028436018957346, + "professional_medicine": 0.34308510638297873, + "professional_psychology": 0.46120689655172414, + "public_relations": 0.40229885057471265, + "security_study": 0.42962962962962964, + "sociology": 0.3805309734513274, + "sports_science": 0.3939393939393939, + "traditional_chinese_medicine": 0.4540540540540541, + "virology": 0.3727810650887574, + "world_history": 0.5341614906832298, + "world_religions": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.3638404420652737, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.3716216216216216, + "ancient_chinese": 0.31097560975609756, + "arts": 0.40625, + "astronomy": 0.2727272727272727, + "business_ethics": 0.33014354066985646, + "chinese_civil_service_exam": 0.36875, + "chinese_driving_rule": 0.5343511450381679, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.4766355140186916, + "chinese_history": 0.44891640866873067, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.46368715083798884, + "clinical_knowledge": 0.4388185654008439, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.37383177570093457, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.37962962962962965, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.34798534798534797, + "computer_science": 0.4019607843137255, + "computer_security": 0.36257309941520466, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.26618705035971224, + "economics": 0.37735849056603776, + "education": 0.34355828220858897, + "electrical_engineering": 0.28488372093023256, + "elementary_chinese": 0.38095238095238093, + "elementary_commonsense": 0.42424242424242425, + "elementary_information_and_technology": 0.4369747899159664, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.42962962962962964, + "food_science": 0.38461538461538464, + "genetics": 0.2897727272727273, + "global_facts": 0.3422818791946309, + "high_school_biology": 0.35502958579881655, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.4661016949152542, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.3, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.3253968253968254, + "international_law": 0.2864864864864865, + "journalism": 0.29651162790697677, + "jurisprudence": 0.35523114355231145, + "legal_and_moral_basis": 0.5747663551401869, + "logical": 0.34959349593495936, + "machine_learning": 0.30327868852459017, + "management": 0.3904761904761905, + "marketing": 0.3888888888888889, + "marxist_theory": 0.43915343915343913, + "modern_chinese": 0.3448275862068966, + "nutrition": 0.32413793103448274, + "philosophy": 0.4095238095238095, + "professional_accounting": 0.36, + "professional_law": 0.36018957345971564, + "professional_medicine": 0.3404255319148936, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.3275862068965517, + "security_study": 0.37037037037037035, + "sociology": 0.30973451327433627, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.3891891891891892, + "virology": 0.4260355029585799, + "world_history": 0.4409937888198758, + "world_religions": 0.4125 + } + }, + "prompt_3": { + "accuracy": 0.3482127439129684, + "category_acc": { + "agronomy": 0.34911242603550297, + "anatomy": 0.33783783783783783, + "ancient_chinese": 0.2865853658536585, + "arts": 0.40625, + "astronomy": 0.2545454545454545, + "business_ethics": 0.3349282296650718, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.5190839694656488, + "chinese_food_culture": 0.36764705882352944, + "chinese_foreign_policy": 0.4953271028037383, + "chinese_history": 0.5015479876160991, + "chinese_literature": 0.28921568627450983, + "chinese_teacher_qualification": 0.49162011173184356, + "clinical_knowledge": 0.4430379746835443, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.4205607476635514, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.24074074074074073, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.3663003663003663, + "computer_science": 0.29411764705882354, + "computer_security": 0.2631578947368421, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.2446043165467626, + "economics": 0.3018867924528302, + "education": 0.34355828220858897, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.38492063492063494, + "elementary_commonsense": 0.3686868686868687, + "elementary_information_and_technology": 0.36554621848739494, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.3925925925925926, + "food_science": 0.34965034965034963, + "genetics": 0.3068181818181818, + "global_facts": 0.3624161073825503, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.3898305084745763, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.46153846153846156, + "human_sexuality": 0.31746031746031744, + "international_law": 0.3027027027027027, + "journalism": 0.313953488372093, + "jurisprudence": 0.29683698296836986, + "legal_and_moral_basis": 0.5981308411214953, + "logical": 0.3089430894308943, + "machine_learning": 0.23770491803278687, + "management": 0.3619047619047619, + "marketing": 0.3111111111111111, + "marxist_theory": 0.4444444444444444, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.30344827586206896, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.33714285714285713, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.3404255319148936, + "professional_psychology": 0.4051724137931034, + "public_relations": 0.3160919540229885, + "security_study": 0.34074074074074073, + "sociology": 0.3407079646017699, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.42162162162162165, + "virology": 0.3668639053254438, + "world_history": 0.4720496894409938, + "world_religions": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.44456915903988947, + "category_acc": { + "agronomy": 0.4319526627218935, + "anatomy": 0.40540540540540543, + "ancient_chinese": 0.3231707317073171, + "arts": 0.5125, + "astronomy": 0.3212121212121212, + "business_ethics": 0.44976076555023925, + "chinese_civil_service_exam": 0.4, + "chinese_driving_rule": 0.5725190839694656, + "chinese_food_culture": 0.47794117647058826, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.6006191950464397, + "chinese_literature": 0.45098039215686275, + "chinese_teacher_qualification": 0.664804469273743, + "clinical_knowledge": 0.46835443037974683, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.40186915887850466, + "college_engineering_hydrology": 0.44339622641509435, + "college_law": 0.3611111111111111, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.4468864468864469, + "computer_science": 0.37254901960784315, + "computer_security": 0.43859649122807015, + "conceptual_physics": 0.4217687074829932, + "construction_project_management": 0.3237410071942446, + "economics": 0.4025157232704403, + "education": 0.4662576687116564, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.48412698412698413, + "elementary_commonsense": 0.45454545454545453, + "elementary_information_and_technology": 0.5336134453781513, + "elementary_mathematics": 0.33043478260869563, + "ethnology": 0.5185185185185185, + "food_science": 0.48951048951048953, + "genetics": 0.4147727272727273, + "global_facts": 0.4429530201342282, + "high_school_biology": 0.378698224852071, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.5169491525423728, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.42857142857142855, + "international_law": 0.4, + "journalism": 0.42441860465116277, + "jurisprudence": 0.49148418491484186, + "legal_and_moral_basis": 0.705607476635514, + "logical": 0.3821138211382114, + "machine_learning": 0.36885245901639346, + "management": 0.48095238095238096, + "marketing": 0.4666666666666667, + "marxist_theory": 0.582010582010582, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.43448275862068964, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.38285714285714284, + "professional_law": 0.35545023696682465, + "professional_medicine": 0.425531914893617, + "professional_psychology": 0.5560344827586207, + "public_relations": 0.41954022988505746, + "security_study": 0.4962962962962963, + "sociology": 0.4424778761061947, + "sports_science": 0.3939393939393939, + "traditional_chinese_medicine": 0.5135135135135135, + "virology": 0.47928994082840237, + "world_history": 0.5031055900621118, + "world_religions": 0.4375 + } + }, + "prompt_5": { + "accuracy": 0.3477810395441202, + "category_acc": { + "agronomy": 0.33136094674556216, + "anatomy": 0.3108108108108108, + "ancient_chinese": 0.24390243902439024, + "arts": 0.54375, + "astronomy": 0.34545454545454546, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.3, + "chinese_driving_rule": 0.40458015267175573, + "chinese_food_culture": 0.4485294117647059, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.3622291021671827, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.43575418994413406, + "clinical_knowledge": 0.4092827004219409, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.2897196261682243, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.36792452830188677, + "college_medicine": 0.3443223443223443, + "computer_science": 0.3137254901960784, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.3469387755102041, + "construction_project_management": 0.3237410071942446, + "economics": 0.3270440251572327, + "education": 0.3558282208588957, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.5079365079365079, + "elementary_commonsense": 0.3838383838383838, + "elementary_information_and_technology": 0.42436974789915966, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.37037037037037035, + "food_science": 0.3776223776223776, + "genetics": 0.3125, + "global_facts": 0.3825503355704698, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.25, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.3146853146853147, + "human_sexuality": 0.31746031746031744, + "international_law": 0.3675675675675676, + "journalism": 0.3372093023255814, + "jurisprudence": 0.32116788321167883, + "legal_and_moral_basis": 0.4719626168224299, + "logical": 0.3252032520325203, + "machine_learning": 0.4098360655737705, + "management": 0.35714285714285715, + "marketing": 0.36666666666666664, + "marxist_theory": 0.36507936507936506, + "modern_chinese": 0.43103448275862066, + "nutrition": 0.32413793103448274, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.26540284360189575, + "professional_medicine": 0.3058510638297872, + "professional_psychology": 0.36637931034482757, + "public_relations": 0.3390804597701149, + "security_study": 0.3851851851851852, + "sociology": 0.3185840707964602, + "sports_science": 0.34545454545454546, + "traditional_chinese_medicine": 0.372972972972973, + "virology": 0.4260355029585799, + "world_history": 0.3105590062111801, + "world_religions": 0.425 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.24242424242424243 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.36363636363636365 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.16590909090909092 + }, + "prompt_2": { + "accuracy": 0.16590909090909092 + }, + "prompt_3": { + "accuracy": 0.1431818181818182 + }, + "prompt_4": { + "accuracy": 0.3977272727272727 + }, + "prompt_5": { + "accuracy": 0.3795454545454545 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.316271186440678 + }, + "prompt_2": { + "accuracy": 0.3359322033898305 + }, + "prompt_3": { + "accuracy": 0.33084745762711865 + }, + "prompt_4": { + "accuracy": 0.33322033898305087 + }, + "prompt_5": { + "accuracy": 0.3206779661016949 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7595362752430815 + }, + "prompt_2": { + "accuracy": 0.6544502617801047 + }, + "prompt_3": { + "accuracy": 0.6727748691099477 + }, + "prompt_4": { + "accuracy": 0.7393418100224383 + }, + "prompt_5": { + "accuracy": 0.6843679880329095 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.746202841744243 + }, + "prompt_2": { + "accuracy": 0.3806957373836355 + }, + "prompt_3": { + "accuracy": 0.6315531602155806 + }, + "prompt_4": { + "accuracy": 0.7413032827045566 + }, + "prompt_5": { + "accuracy": 0.5957863792258696 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.18584961486664905, + "rouge2": 0.02785783731475492, + "rougeL": 0.12344167689895469, + "avg_rouge": 0.11238304302678621 + }, + "prompt_2": { + "rouge1": 0.14033510761188592, + "rouge2": 0.019156262619744322, + "rougeL": 0.09305314512597507, + "avg_rouge": 0.08418150511920176 + }, + "prompt_3": { + "rouge1": 0.16169644230685543, + "rouge2": 0.02137765705963491, + "rougeL": 0.1079712896870687, + "avg_rouge": 0.09701512968451968 + }, + "prompt_4": { + "rouge1": 0.17024567372771598, + "rouge2": 0.023065249871530094, + "rougeL": 0.11120478332124126, + "avg_rouge": 0.10150523564016245 + }, + "prompt_5": { + "rouge1": 0.15052649329014947, + "rouge2": 0.019442799286862045, + "rougeL": 0.10030094876300699, + "avg_rouge": 0.09009008044667284 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.14688522587856012, + "rouge2": 0.030115184041075247, + "rougeL": 0.11181182247217723, + "avg_rouge": 0.0962707441306042 + }, + "prompt_2": { + "rouge1": 0.14241057605800392, + "rouge2": 0.02635513696692834, + "rougeL": 0.10360025812616681, + "avg_rouge": 0.09078865705036636 + }, + "prompt_3": { + "rouge1": 0.15140734298955014, + "rouge2": 0.03125358198905279, + "rougeL": 0.1081629914894675, + "avg_rouge": 0.09694130548935681 + }, + "prompt_4": { + "rouge1": 0.14502607123723982, + "rouge2": 0.0301740876323854, + "rougeL": 0.10631646162621711, + "avg_rouge": 0.09383887349861413 + }, + "prompt_5": { + "rouge1": 0.12787272980269718, + "rouge2": 0.028596526309901706, + "rougeL": 0.10071769496338355, + "avg_rouge": 0.08572898369199415 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7947247706422018 + }, + "prompt_2": { + "accuracy": 0.7293577981651376 + }, + "prompt_3": { + "accuracy": 0.6731651376146789 + }, + "prompt_4": { + "accuracy": 0.713302752293578 + }, + "prompt_5": { + "accuracy": 0.5 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5743048897411314 + }, + "prompt_2": { + "accuracy": 0.6116970278044104 + }, + "prompt_3": { + "accuracy": 0.6548418024928092 + }, + "prompt_4": { + "accuracy": 0.5800575263662512 + }, + "prompt_5": { + "accuracy": 0.551294343240652 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5375 + }, + "prompt_2": { + "accuracy": 0.543 + }, + "prompt_3": { + "accuracy": 0.5305 + }, + "prompt_4": { + "accuracy": 0.5665 + }, + "prompt_5": { + "accuracy": 0.4995 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.343 + }, + "prompt_2": { + "accuracy": 0.355 + }, + "prompt_3": { + "accuracy": 0.372 + }, + "prompt_4": { + "accuracy": 0.368 + }, + "prompt_5": { + "accuracy": 0.376 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.501 + }, + "prompt_2": { + "accuracy": 0.506 + }, + "prompt_3": { + "accuracy": 0.485 + }, + "prompt_4": { + "accuracy": 0.521 + }, + "prompt_5": { + "accuracy": 0.534 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39436619718309857 + }, + "prompt_2": { + "accuracy": 0.4647887323943662 + }, + "prompt_3": { + "accuracy": 0.5774647887323944 + }, + "prompt_4": { + "accuracy": 0.49295774647887325 + }, + "prompt_5": { + "accuracy": 0.5211267605633803 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5379061371841155 + }, + "prompt_2": { + "accuracy": 0.5018050541516246 + }, + "prompt_3": { + "accuracy": 0.4729241877256318 + }, + "prompt_4": { + "accuracy": 0.5451263537906137 + }, + "prompt_5": { + "accuracy": 0.5451263537906137 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.47549019607843135 + }, + "prompt_2": { + "accuracy": 0.5049019607843137 + }, + "prompt_3": { + "accuracy": 0.48284313725490197 + }, + "prompt_4": { + "accuracy": 0.4681372549019608 + }, + "prompt_5": { + "accuracy": 0.46078431372549017 + } } }, "five_shot": { @@ -7440,53 +63079,1733 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4276190476190476, + "language_acc": { + "Malay": 0.2866666666666667, + "English": 0.5866666666666667, + "Vietnamese": 0.38, + "Spanish": 0.43333333333333335, + "Indonesian": 0.36666666666666664, + "Filipino": 0.37333333333333335, + "Chinese": 0.5666666666666667 + }, + "consistency_score_2": 0.4085714285714286, + "consistency_score_3": 0.21257142857142855, + "consistency_score_4": 0.12552380952380957, + "consistency_score_5": 0.07904761904761906, + "consistency_score_6": 0.051428571428571435, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3466666666666667, + "Malay,Vietnamese": 0.36, + "Malay,Spanish": 0.35333333333333333, + "Malay,Indonesian": 0.42, + "Malay,Filipino": 0.30666666666666664, + "Malay,Chinese": 0.2866666666666667, + "English,Vietnamese": 0.38, + "English,Spanish": 0.52, + "English,Indonesian": 0.41333333333333333, + "English,Filipino": 0.44, + "English,Chinese": 0.62, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.4066666666666667, + "Vietnamese,Filipino": 0.4066666666666667, + "Vietnamese,Chinese": 0.37333333333333335, + "Spanish,Indonesian": 0.4066666666666667, + "Spanish,Filipino": 0.36, + "Spanish,Chinese": 0.5, + "Indonesian,Filipino": 0.41333333333333333, + "Indonesian,Chinese": 0.46, + "Filipino,Chinese": 0.41333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.18, + "Malay,English,Spanish": 0.2, + "Malay,English,Indonesian": 0.2, + "Malay,English,Filipino": 0.16666666666666666, + "Malay,English,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish": 0.17333333333333334, + "Malay,Vietnamese,Indonesian": 0.22666666666666666, + "Malay,Vietnamese,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Chinese": 0.14, + "Malay,Spanish,Indonesian": 0.18, + "Malay,Spanish,Filipino": 0.14, + "Malay,Spanish,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino": 0.19333333333333333, + "Malay,Indonesian,Chinese": 0.20666666666666667, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.22, + "English,Vietnamese,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian": 0.26, + "English,Spanish,Filipino": 0.23333333333333334, + "English,Spanish,Chinese": 0.38, + "English,Indonesian,Filipino": 0.23333333333333334, + "English,Indonesian,Chinese": 0.30666666666666664, + "English,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian": 0.2, + "Vietnamese,Spanish,Filipino": 0.18, + "Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Indonesian,Filipino": 0.2, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino": 0.18, + "Spanish,Indonesian,Chinese": 0.26, + "Spanish,Filipino,Chinese": 0.21333333333333335, + "Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.12, + "Malay,English,Vietnamese,Indonesian": 0.12, + "Malay,English,Vietnamese,Filipino": 0.1, + "Malay,English,Vietnamese,Chinese": 0.1, + "Malay,English,Spanish,Indonesian": 0.12, + "Malay,English,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Chinese": 0.14, + "Malay,English,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Indonesian,Chinese": 0.14, + "Malay,English,Filipino,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.12, + "Malay,Spanish,Filipino,Chinese": 0.1, + "Malay,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.14, + "English,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.12, + "English,Vietnamese,Indonesian,Chinese": 0.14, + "English,Vietnamese,Filipino,Chinese": 0.16, + "English,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "English,Spanish,Filipino,Chinese": 0.18, + "English,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino": 0.08, + "Malay,English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.08, + "Malay,English,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + } + }, + "AC3_2": 0.417878294775929, + "AC3_3": 0.28397670756817206, + "AC3_4": 0.1940777908608864, + "AC3_5": 0.13343000355404466, + "AC3_6": 0.0918148253145459, + "AC3_7": 0.06184573001413126 + }, + "prompt_2": { + "overall_acc": 0.42857142857142866, + "language_acc": { + "Malay": 0.31333333333333335, + "English": 0.58, + "Vietnamese": 0.38, + "Spanish": 0.4866666666666667, + "Indonesian": 0.36666666666666664, + "Filipino": 0.37333333333333335, + "Chinese": 0.5 + }, + "consistency_score_2": 0.4082539682539683, + "consistency_score_3": 0.20990476190476187, + "consistency_score_4": 0.124, + "consistency_score_5": 0.08285714285714285, + "consistency_score_6": 0.06285714285714286, + "consistency_score_7": 0.05333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38, + "Malay,Vietnamese": 0.4533333333333333, + "Malay,Spanish": 0.32, + "Malay,Indonesian": 0.5066666666666667, + "Malay,Filipino": 0.4, + "Malay,Chinese": 0.36666666666666664, + "English,Vietnamese": 0.3933333333333333, + "English,Spanish": 0.4866666666666667, + "English,Indonesian": 0.36666666666666664, + "English,Filipino": 0.41333333333333333, + "English,Chinese": 0.6133333333333333, + "Vietnamese,Spanish": 0.32666666666666666, + "Vietnamese,Indonesian": 0.4, + "Vietnamese,Filipino": 0.42, + "Vietnamese,Chinese": 0.37333333333333335, + "Spanish,Indonesian": 0.41333333333333333, + "Spanish,Filipino": 0.36666666666666664, + "Spanish,Chinese": 0.48, + "Indonesian,Filipino": 0.3333333333333333, + "Indonesian,Chinese": 0.3933333333333333, + "Filipino,Chinese": 0.36666666666666664 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24666666666666667, + "Malay,English,Spanish": 0.18, + "Malay,English,Indonesian": 0.22, + "Malay,English,Filipino": 0.2, + "Malay,English,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish": 0.17333333333333334, + "Malay,Vietnamese,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian": 0.22, + "Malay,Spanish,Filipino": 0.18666666666666668, + "Malay,Spanish,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino": 0.2, + "Malay,Indonesian,Chinese": 0.24666666666666667, + "Malay,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish": 0.20666666666666667, + "English,Vietnamese,Indonesian": 0.18666666666666668, + "English,Vietnamese,Filipino": 0.22, + "English,Vietnamese,Chinese": 0.26, + "English,Spanish,Indonesian": 0.22, + "English,Spanish,Filipino": 0.22, + "English,Spanish,Chinese": 0.36666666666666664, + "English,Indonesian,Filipino": 0.16, + "English,Indonesian,Chinese": 0.26, + "English,Filipino,Chinese": 0.26, + "Vietnamese,Spanish,Indonesian": 0.18, + "Vietnamese,Spanish,Filipino": 0.17333333333333334, + "Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino": 0.18666666666666668, + "Spanish,Indonesian,Chinese": 0.22666666666666666, + "Spanish,Filipino,Chinese": 0.19333333333333333, + "Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.12, + "Malay,English,Vietnamese,Indonesian": 0.15333333333333332, + "Malay,English,Vietnamese,Filipino": 0.14, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.13333333333333333, + "Malay,English,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian": 0.12, + "Malay,Vietnamese,Spanish,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.14, + "Malay,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.1, + "English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino": 0.10666666666666667, + "English,Spanish,Indonesian,Chinese": 0.18666666666666668, + "English,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.08, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "English,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + } + }, + "AC3_2": 0.41816605241046745, + "AC3_3": 0.28179338556786604, + "AC3_4": 0.19234746635609015, + "AC3_5": 0.13886671984515372, + "AC3_6": 0.10963455147270726, + "AC3_7": 0.09486166005936666 + }, + "prompt_3": { + "overall_acc": 0.42857142857142855, + "language_acc": { + "Malay": 0.29333333333333333, + "English": 0.6066666666666667, + "Vietnamese": 0.38, + "Spanish": 0.5, + "Indonesian": 0.38, + "Filipino": 0.3466666666666667, + "Chinese": 0.49333333333333335 + }, + "consistency_score_2": 0.3866666666666667, + "consistency_score_3": 0.18704761904761902, + "consistency_score_4": 0.10247619047619047, + "consistency_score_5": 0.061269841269841266, + "consistency_score_6": 0.03904761904761905, + "consistency_score_7": 0.02666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.36666666666666664, + "Malay,Vietnamese": 0.38, + "Malay,Spanish": 0.3466666666666667, + "Malay,Indonesian": 0.4533333333333333, + "Malay,Filipino": 0.30666666666666664, + "Malay,Chinese": 0.36666666666666664, + "English,Vietnamese": 0.4066666666666667, + "English,Spanish": 0.4266666666666667, + "English,Indonesian": 0.41333333333333333, + "English,Filipino": 0.4, + "English,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.34, + "Vietnamese,Indonesian": 0.3933333333333333, + "Vietnamese,Filipino": 0.31333333333333335, + "Vietnamese,Chinese": 0.38, + "Spanish,Indonesian": 0.35333333333333333, + "Spanish,Filipino": 0.32666666666666666, + "Spanish,Chinese": 0.42, + "Indonesian,Filipino": 0.3333333333333333, + "Indonesian,Chinese": 0.41333333333333333, + "Filipino,Chinese": 0.41333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.16666666666666666, + "Malay,English,Spanish": 0.16, + "Malay,English,Indonesian": 0.20666666666666667, + "Malay,English,Filipino": 0.15333333333333332, + "Malay,English,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish": 0.15333333333333332, + "Malay,Vietnamese,Indonesian": 0.21333333333333335, + "Malay,Vietnamese,Filipino": 0.12, + "Malay,Vietnamese,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian": 0.2, + "Malay,Spanish,Filipino": 0.12, + "Malay,Spanish,Chinese": 0.18666666666666668, + "Malay,Indonesian,Filipino": 0.17333333333333334, + "Malay,Indonesian,Chinese": 0.23333333333333334, + "Malay,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish": 0.19333333333333333, + "English,Vietnamese,Indonesian": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.16666666666666666, + "English,Vietnamese,Chinese": 0.23333333333333334, + "English,Spanish,Indonesian": 0.18, + "English,Spanish,Filipino": 0.18, + "English,Spanish,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino": 0.18, + "English,Indonesian,Chinese": 0.28, + "English,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Vietnamese,Spanish,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "Vietnamese,Indonesian,Chinese": 0.18, + "Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino": 0.14, + "Spanish,Indonesian,Chinese": 0.21333333333333335, + "Spanish,Filipino,Chinese": 0.16, + "Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.08, + "Malay,English,Vietnamese,Indonesian": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino": 0.08, + "Malay,English,Vietnamese,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian": 0.09333333333333334, + "Malay,English,Spanish,Filipino": 0.06, + "Malay,English,Spanish,Chinese": 0.12666666666666668, + "Malay,English,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Indonesian,Chinese": 0.16, + "Malay,English,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.1, + "Malay,Vietnamese,Spanish,Filipino": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.08, + "Malay,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.12, + "Malay,Spanish,Filipino,Chinese": 0.08, + "Malay,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian": 0.08666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino": 0.1, + "English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Spanish,Indonesian,Chinese": 0.16, + "English,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667 + } + }, + "AC3_2": 0.40654205602489846, + "AC3_3": 0.2604314002405814, + "AC3_4": 0.16540274643329292, + "AC3_5": 0.1072122951359686, + "AC3_6": 0.07157404711882255, + "AC3_7": 0.05020920500989128 + }, + "prompt_4": { + "overall_acc": 0.4428571428571428, + "language_acc": { + "Malay": 0.3466666666666667, + "English": 0.6266666666666667, + "Vietnamese": 0.3933333333333333, + "Spanish": 0.4533333333333333, + "Indonesian": 0.37333333333333335, + "Filipino": 0.38, + "Chinese": 0.5266666666666666 + }, + "consistency_score_2": 0.42603174603174604, + "consistency_score_3": 0.2331428571428572, + "consistency_score_4": 0.14666666666666664, + "consistency_score_5": 0.10253968253968254, + "consistency_score_6": 0.07714285714285715, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.37333333333333335, + "Malay,Vietnamese": 0.41333333333333333, + "Malay,Spanish": 0.37333333333333335, + "Malay,Indonesian": 0.47333333333333333, + "Malay,Filipino": 0.37333333333333335, + "Malay,Chinese": 0.36666666666666664, + "English,Vietnamese": 0.41333333333333333, + "English,Spanish": 0.5066666666666667, + "English,Indonesian": 0.4066666666666667, + "English,Filipino": 0.4266666666666667, + "English,Chinese": 0.6133333333333333, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian": 0.44, + "Vietnamese,Filipino": 0.42, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.4066666666666667, + "Spanish,Filipino": 0.38, + "Spanish,Chinese": 0.47333333333333333, + "Indonesian,Filipino": 0.38, + "Indonesian,Chinese": 0.4066666666666667, + "Filipino,Chinese": 0.43333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.21333333333333335, + "Malay,English,Spanish": 0.2, + "Malay,English,Indonesian": 0.22666666666666666, + "Malay,English,Filipino": 0.18666666666666668, + "Malay,English,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish": 0.23333333333333334, + "Malay,Vietnamese,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Chinese": 0.22, + "Malay,Spanish,Indonesian": 0.22, + "Malay,Spanish,Filipino": 0.18, + "Malay,Spanish,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.20666666666666667, + "Malay,Indonesian,Chinese": 0.24666666666666667, + "Malay,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.21333333333333335, + "English,Vietnamese,Filipino": 0.20666666666666667, + "English,Vietnamese,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian": 0.24666666666666667, + "English,Spanish,Filipino": 0.24666666666666667, + "English,Spanish,Chinese": 0.36666666666666664, + "English,Indonesian,Filipino": 0.2, + "English,Indonesian,Chinese": 0.2866666666666667, + "English,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Vietnamese,Spanish,Filipino": 0.2, + "Vietnamese,Spanish,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Chinese": 0.24, + "Vietnamese,Filipino,Chinese": 0.21333333333333335, + "Spanish,Indonesian,Filipino": 0.19333333333333333, + "Spanish,Indonesian,Chinese": 0.24, + "Spanish,Filipino,Chinese": 0.23333333333333334, + "Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.14666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.15333333333333332, + "Malay,English,Vietnamese,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian": 0.15333333333333332, + "Malay,English,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.12, + "Malay,English,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.16, + "Malay,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.12, + "Malay,Spanish,Indonesian,Filipino": 0.12, + "Malay,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.12, + "English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.16, + "English,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Spanish,Filipino,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.08, + "Malay,English,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Spanish,Filipino,Chinese": 0.08, + "Malay,English,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + } + }, + "AC3_2": 0.4342815386524562, + "AC3_3": 0.3054703537768143, + "AC3_4": 0.22035541191738714, + "AC3_5": 0.1665225344780324, + "AC3_6": 0.13139717422904842, + "AC3_7": 0.1056818181608019 + }, + "prompt_5": { + "overall_acc": 0.4476190476190477, + "language_acc": { + "Malay": 0.36, + "English": 0.5666666666666667, + "Vietnamese": 0.32666666666666666, + "Spanish": 0.52, + "Indonesian": 0.4266666666666667, + "Filipino": 0.3933333333333333, + "Chinese": 0.54 + }, + "consistency_score_2": 0.4149206349206348, + "consistency_score_3": 0.21638095238095234, + "consistency_score_4": 0.12761904761904763, + "consistency_score_5": 0.08095238095238096, + "consistency_score_6": 0.05333333333333333, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.38, + "Malay,Vietnamese": 0.3333333333333333, + "Malay,Spanish": 0.36, + "Malay,Indonesian": 0.5066666666666667, + "Malay,Filipino": 0.36, + "Malay,Chinese": 0.38666666666666666, + "English,Vietnamese": 0.36666666666666664, + "English,Spanish": 0.5066666666666667, + "English,Indonesian": 0.44, + "English,Filipino": 0.4266666666666667, + "English,Chinese": 0.6466666666666666, + "Vietnamese,Spanish": 0.37333333333333335, + "Vietnamese,Indonesian": 0.38666666666666666, + "Vietnamese,Filipino": 0.36666666666666664, + "Vietnamese,Chinese": 0.37333333333333335, + "Spanish,Indonesian": 0.4066666666666667, + "Spanish,Filipino": 0.36666666666666664, + "Spanish,Chinese": 0.5066666666666667, + "Indonesian,Filipino": 0.36, + "Indonesian,Chinese": 0.43333333333333335, + "Filipino,Chinese": 0.4266666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.15333333333333332, + "Malay,English,Spanish": 0.22, + "Malay,English,Indonesian": 0.25333333333333335, + "Malay,English,Filipino": 0.18, + "Malay,English,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish": 0.14666666666666667, + "Malay,Vietnamese,Indonesian": 0.22666666666666666, + "Malay,Vietnamese,Filipino": 0.16, + "Malay,Vietnamese,Chinese": 0.16, + "Malay,Spanish,Indonesian": 0.23333333333333334, + "Malay,Spanish,Filipino": 0.13333333333333333, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino": 0.18666666666666668, + "Malay,Indonesian,Chinese": 0.26, + "Malay,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish": 0.21333333333333335, + "English,Vietnamese,Indonesian": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.19333333333333333, + "English,Vietnamese,Chinese": 0.24, + "English,Spanish,Indonesian": 0.26, + "English,Spanish,Filipino": 0.23333333333333334, + "English,Spanish,Chinese": 0.3933333333333333, + "English,Indonesian,Filipino": 0.21333333333333335, + "English,Indonesian,Chinese": 0.3333333333333333, + "English,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian": 0.18, + "Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese": 0.2, + "Vietnamese,Filipino,Chinese": 0.18, + "Spanish,Indonesian,Filipino": 0.17333333333333334, + "Spanish,Indonesian,Chinese": 0.26666666666666666, + "Spanish,Filipino,Chinese": 0.23333333333333334, + "Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Spanish,Filipino": 0.08666666666666667, + "Malay,English,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.1, + "Malay,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Filipino,Chinese": 0.08, + "Malay,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.16, + "Malay,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.12, + "English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino": 0.12666666666666668, + "English,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Spanish,Filipino,Chinese": 0.2, + "English,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.1, + "Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.06, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + } + }, + "AC3_2": 0.4306500516527551, + "AC3_3": 0.2917356500393904, + "AC3_4": 0.19861242506796437, + "AC3_5": 0.13710853708259765, + "AC3_6": 0.09531051962609453, + "AC3_7": 0.06204620460756135 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3141233766233766, + "language_acc": { + "English": 0.4318181818181818, + "Vietnamese": 0.23863636363636365, + "Chinese": 0.4318181818181818, + "Indonesian": 0.26704545454545453, + "Filipino": 0.2159090909090909, + "Spanish": 0.3522727272727273, + "Malay": 0.26136363636363635 + }, + "consistency_score_2": 0.3541666666666667, + "consistency_score_3": 0.16233766233766234, + "consistency_score_4": 0.09074675324675326, + "consistency_score_5": 0.05898268398268397, + "consistency_score_6": 0.04301948051948052, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.36363636363636365, + "English,Chinese": 0.44886363636363635, + "English,Indonesian": 0.3693181818181818, + "English,Filipino": 0.2784090909090909, + "English,Spanish": 0.38636363636363635, + "English,Malay": 0.3352272727272727, + "Vietnamese,Chinese": 0.3068181818181818, + "Vietnamese,Indonesian": 0.3806818181818182, + "Vietnamese,Filipino": 0.4034090909090909, + "Vietnamese,Spanish": 0.3125, + "Vietnamese,Malay": 0.3693181818181818, + "Chinese,Indonesian": 0.3465909090909091, + "Chinese,Filipino": 0.2784090909090909, + "Chinese,Spanish": 0.42045454545454547, + "Chinese,Malay": 0.32954545454545453, + "Indonesian,Filipino": 0.3409090909090909, + "Indonesian,Spanish": 0.3352272727272727, + "Indonesian,Malay": 0.4147727272727273, + "Filipino,Spanish": 0.29545454545454547, + "Filipino,Malay": 0.3806818181818182, + "Spanish,Malay": 0.3409090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17613636363636365, + "English,Vietnamese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Filipino": 0.17045454545454544, + "English,Vietnamese,Spanish": 0.17045454545454544, + "English,Vietnamese,Malay": 0.16477272727272727, + "English,Chinese,Indonesian": 0.19318181818181818, + "English,Chinese,Filipino": 0.14772727272727273, + "English,Chinese,Spanish": 0.23295454545454544, + "English,Chinese,Malay": 0.18181818181818182, + "English,Indonesian,Filipino": 0.13636363636363635, + "English,Indonesian,Spanish": 0.1875, + "English,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Spanish": 0.13068181818181818, + "English,Filipino,Malay": 0.14772727272727273, + "English,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "Vietnamese,Chinese,Filipino": 0.13068181818181818, + "Vietnamese,Chinese,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino": 0.10795454545454546, + "Chinese,Indonesian,Spanish": 0.17045454545454544, + "Chinese,Indonesian,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish": 0.11931818181818182, + "Chinese,Filipino,Malay": 0.14204545454545456, + "Chinese,Spanish,Malay": 0.1875, + "Indonesian,Filipino,Spanish": 0.13068181818181818, + "Indonesian,Filipino,Malay": 0.17613636363636365, + "Indonesian,Spanish,Malay": 0.18181818181818182, + "Filipino,Spanish,Malay": 0.14204545454545456 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.33294534407973414, + "AC3_3": 0.21405340821631902, + "AC3_4": 0.14081392741707846, + "AC3_5": 0.09931674559179456, + "AC3_6": 0.07567517707444259, + "AC3_7": 0.06150667512537532 + }, + "prompt_2": { + "overall_acc": 0.3538961038961039, + "language_acc": { + "English": 0.4147727272727273, + "Vietnamese": 0.3181818181818182, + "Chinese": 0.4431818181818182, + "Indonesian": 0.29545454545454547, + "Filipino": 0.3125, + "Spanish": 0.3352272727272727, + "Malay": 0.35795454545454547 + }, + "consistency_score_2": 0.3844696969696971, + "consistency_score_3": 0.187987012987013, + "consistency_score_4": 0.10616883116883116, + "consistency_score_5": 0.06412337662337661, + "consistency_score_6": 0.03896103896103896, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4034090909090909, + "English,Chinese": 0.5454545454545454, + "English,Indonesian": 0.4090909090909091, + "English,Filipino": 0.3465909090909091, + "English,Spanish": 0.4772727272727273, + "English,Malay": 0.42613636363636365, + "Vietnamese,Chinese": 0.4147727272727273, + "Vietnamese,Indonesian": 0.35795454545454547, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,Spanish": 0.30113636363636365, + "Vietnamese,Malay": 0.4034090909090909, + "Chinese,Indonesian": 0.3409090909090909, + "Chinese,Filipino": 0.4090909090909091, + "Chinese,Spanish": 0.375, + "Chinese,Malay": 0.3806818181818182, + "Indonesian,Filipino": 0.29545454545454547, + "Indonesian,Spanish": 0.38636363636363635, + "Indonesian,Malay": 0.35795454545454547, + "Filipino,Spanish": 0.3125, + "Filipino,Malay": 0.29545454545454547, + "Spanish,Malay": 0.36363636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2727272727272727, + "English,Vietnamese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Filipino": 0.21022727272727273, + "English,Vietnamese,Spanish": 0.20454545454545456, + "English,Vietnamese,Malay": 0.22727272727272727, + "English,Chinese,Indonesian": 0.23863636363636365, + "English,Chinese,Filipino": 0.22727272727272727, + "English,Chinese,Spanish": 0.26704545454545453, + "English,Chinese,Malay": 0.2784090909090909, + "English,Indonesian,Filipino": 0.1590909090909091, + "English,Indonesian,Spanish": 0.24431818181818182, + "English,Indonesian,Malay": 0.2215909090909091, + "English,Filipino,Spanish": 0.1875, + "English,Filipino,Malay": 0.17045454545454544, + "English,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "Vietnamese,Chinese,Filipino": 0.2215909090909091, + "Vietnamese,Chinese,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Malay": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Vietnamese,Filipino,Spanish": 0.125, + "Vietnamese,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish": 0.16477272727272727, + "Chinese,Filipino,Malay": 0.1534090909090909, + "Chinese,Spanish,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish": 0.10795454545454546, + "Indonesian,Filipino,Malay": 0.11363636363636363, + "Indonesian,Spanish,Malay": 0.19318181818181818, + "Filipino,Spanish,Malay": 0.11363636363636363 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Chinese,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Malay": 0.1590909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.3685499183351086, + "AC3_3": 0.24554325238829297, + "AC3_4": 0.16333666330116037, + "AC3_5": 0.10857395029554971, + "AC3_6": 0.07019426852350834, + "AC3_7": 0.04271159873474085 + }, + "prompt_3": { + "overall_acc": 0.3474025974025974, + "language_acc": { + "English": 0.3693181818181818, + "Vietnamese": 0.32386363636363635, + "Chinese": 0.4431818181818182, + "Indonesian": 0.2897727272727273, + "Filipino": 0.32386363636363635, + "Spanish": 0.3409090909090909, + "Malay": 0.3409090909090909 + }, + "consistency_score_2": 0.4017857142857143, + "consistency_score_3": 0.2032467532467533, + "consistency_score_4": 0.11801948051948052, + "consistency_score_5": 0.0744047619047619, + "consistency_score_6": 0.049512987012987016, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.48863636363636365, + "English,Indonesian": 0.42613636363636365, + "English,Filipino": 0.4090909090909091, + "English,Spanish": 0.45454545454545453, + "English,Malay": 0.38636363636363635, + "Vietnamese,Chinese": 0.4034090909090909, + "Vietnamese,Indonesian": 0.3806818181818182, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Malay": 0.35795454545454547, + "Chinese,Indonesian": 0.39204545454545453, + "Chinese,Filipino": 0.42045454545454547, + "Chinese,Spanish": 0.42045454545454547, + "Chinese,Malay": 0.4602272727272727, + "Indonesian,Filipino": 0.3693181818181818, + "Indonesian,Spanish": 0.3977272727272727, + "Indonesian,Malay": 0.3806818181818182, + "Filipino,Spanish": 0.3409090909090909, + "Filipino,Malay": 0.3806818181818182, + "Spanish,Malay": 0.375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23295454545454544, + "English,Vietnamese,Indonesian": 0.19886363636363635, + "English,Vietnamese,Filipino": 0.2215909090909091, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.18181818181818182, + "English,Chinese,Indonesian": 0.23863636363636365, + "English,Chinese,Filipino": 0.2556818181818182, + "English,Chinese,Spanish": 0.2556818181818182, + "English,Chinese,Malay": 0.26136363636363635, + "English,Indonesian,Filipino": 0.19318181818181818, + "English,Indonesian,Spanish": 0.23863636363636365, + "English,Indonesian,Malay": 0.21022727272727273, + "English,Filipino,Spanish": 0.19886363636363635, + "English,Filipino,Malay": 0.21022727272727273, + "English,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "Vietnamese,Chinese,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino": 0.1875, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Vietnamese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino": 0.19318181818181818, + "Chinese,Indonesian,Spanish": 0.2159090909090909, + "Chinese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Filipino,Spanish": 0.1875, + "Chinese,Filipino,Malay": 0.23295454545454544, + "Chinese,Spanish,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish": 0.17045454545454544, + "Indonesian,Filipino,Malay": 0.1590909090909091, + "Indonesian,Spanish,Malay": 0.1875, + "Filipino,Spanish,Malay": 0.1534090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.125, + "English,Vietnamese,Chinese,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.125, + "English,Chinese,Indonesian,Spanish": 0.1590909090909091, + "English,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Chinese,Filipino,Malay": 0.1590909090909091, + "English,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.125, + "Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.125, + "Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.37262033735933325, + "AC3_3": 0.2564552192624197, + "AC3_4": 0.1761853423490919, + "AC3_5": 0.1225602492149649, + "AC3_6": 0.08667304063893408, + "AC3_7": 0.06208897483865706 + }, + "prompt_4": { + "overall_acc": 0.35714285714285715, + "language_acc": { + "English": 0.4431818181818182, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.4659090909090909, + "Indonesian": 0.2897727272727273, + "Filipino": 0.2840909090909091, + "Spanish": 0.375, + "Malay": 0.3125 + }, + "consistency_score_2": 0.39015151515151514, + "consistency_score_3": 0.19090909090909092, + "consistency_score_4": 0.10551948051948054, + "consistency_score_5": 0.06195887445887444, + "consistency_score_6": 0.037337662337662336, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38636363636363635, + "English,Chinese": 0.5113636363636364, + "English,Indonesian": 0.4090909090909091, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.48295454545454547, + "English,Malay": 0.3522727272727273, + "Vietnamese,Chinese": 0.4034090909090909, + "Vietnamese,Indonesian": 0.4090909090909091, + "Vietnamese,Filipino": 0.48863636363636365, + "Vietnamese,Spanish": 0.3181818181818182, + "Vietnamese,Malay": 0.4090909090909091, + "Chinese,Indonesian": 0.39204545454545453, + "Chinese,Filipino": 0.3806818181818182, + "Chinese,Spanish": 0.4090909090909091, + "Chinese,Malay": 0.3409090909090909, + "Indonesian,Filipino": 0.39204545454545453, + "Indonesian,Spanish": 0.36363636363636365, + "Indonesian,Malay": 0.4147727272727273, + "Filipino,Spanish": 0.2897727272727273, + "Filipino,Malay": 0.38636363636363635, + "Spanish,Malay": 0.30113636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.25, + "English,Vietnamese,Indonesian": 0.19886363636363635, + "English,Vietnamese,Filipino": 0.20454545454545456, + "English,Vietnamese,Spanish": 0.19886363636363635, + "English,Vietnamese,Malay": 0.1875, + "English,Chinese,Indonesian": 0.22727272727272727, + "English,Chinese,Filipino": 0.2159090909090909, + "English,Chinese,Spanish": 0.2784090909090909, + "English,Chinese,Malay": 0.19886363636363635, + "English,Indonesian,Filipino": 0.19318181818181818, + "English,Indonesian,Spanish": 0.22727272727272727, + "English,Indonesian,Malay": 0.1875, + "English,Filipino,Spanish": 0.19886363636363635, + "English,Filipino,Malay": 0.1590909090909091, + "English,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "Vietnamese,Chinese,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino": 0.17045454545454544, + "Chinese,Indonesian,Spanish": 0.1875, + "Chinese,Indonesian,Malay": 0.1590909090909091, + "Chinese,Filipino,Spanish": 0.1590909090909091, + "Chinese,Filipino,Malay": 0.1534090909090909, + "Chinese,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish": 0.1534090909090909, + "Indonesian,Filipino,Malay": 0.19886363636363635, + "Indonesian,Spanish,Malay": 0.1590909090909091, + "Filipino,Spanish,Malay": 0.14772727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.125, + "English,Vietnamese,Chinese,Spanish": 0.125, + "English,Vietnamese,Chinese,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Spanish,Malay": 0.125, + "English,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.37291817518543424, + "AC3_3": 0.24881516583137733, + "AC3_4": 0.16290726813521528, + "AC3_5": 0.10559808168650708, + "AC3_6": 0.06760728981237374, + "AC3_7": 0.04273504272379283 + }, + "prompt_5": { + "overall_acc": 0.33116883116883117, + "language_acc": { + "English": 0.3977272727272727, + "Vietnamese": 0.29545454545454547, + "Chinese": 0.45454545454545453, + "Indonesian": 0.25, + "Filipino": 0.26704545454545453, + "Spanish": 0.30113636363636365, + "Malay": 0.3522727272727273 + }, + "consistency_score_2": 0.3758116883116883, + "consistency_score_3": 0.17759740259740261, + "consistency_score_4": 0.09350649350649352, + "consistency_score_5": 0.04924242424242425, + "consistency_score_6": 0.022727272727272724, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4090909090909091, + "English,Chinese": 0.4659090909090909, + "English,Indonesian": 0.3125, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.375, + "English,Malay": 0.36363636363636365, + "Vietnamese,Chinese": 0.42613636363636365, + "Vietnamese,Indonesian": 0.4034090909090909, + "Vietnamese,Filipino": 0.4147727272727273, + "Vietnamese,Spanish": 0.32386363636363635, + "Vietnamese,Malay": 0.42613636363636365, + "Chinese,Indonesian": 0.3806818181818182, + "Chinese,Filipino": 0.3465909090909091, + "Chinese,Spanish": 0.3806818181818182, + "Chinese,Malay": 0.42045454545454547, + "Indonesian,Filipino": 0.32386363636363635, + "Indonesian,Spanish": 0.32954545454545453, + "Indonesian,Malay": 0.45454545454545453, + "Filipino,Spanish": 0.3181818181818182, + "Filipino,Malay": 0.32954545454545453, + "Spanish,Malay": 0.3352272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.24431818181818182, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.16477272727272727, + "English,Vietnamese,Malay": 0.19886363636363635, + "English,Chinese,Indonesian": 0.19318181818181818, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.2215909090909091, + "English,Chinese,Malay": 0.23863636363636365, + "English,Indonesian,Filipino": 0.14204545454545456, + "English,Indonesian,Spanish": 0.13068181818181818, + "English,Indonesian,Malay": 0.1875, + "English,Filipino,Spanish": 0.14772727272727273, + "English,Filipino,Malay": 0.14772727272727273, + "English,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "Vietnamese,Chinese,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Malay": 0.25, + "Vietnamese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Filipino,Spanish": 0.1534090909090909, + "Vietnamese,Filipino,Malay": 0.1875, + "Vietnamese,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino": 0.14772727272727273, + "Chinese,Indonesian,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Malay": 0.2159090909090909, + "Chinese,Filipino,Spanish": 0.1534090909090909, + "Chinese,Filipino,Malay": 0.17045454545454544, + "Chinese,Spanish,Malay": 0.17045454545454544, + "Indonesian,Filipino,Spanish": 0.11931818181818182, + "Indonesian,Filipino,Malay": 0.16477272727272727, + "Indonesian,Spanish,Malay": 0.17613636363636365, + "Filipino,Spanish,Malay": 0.11931818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.017045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.005681818181818182 + } + }, + "AC3_2": 0.3520807550160291, + "AC3_3": 0.231205297528711, + "AC3_4": 0.14583581552624567, + "AC3_5": 0.08573645413496896, + "AC3_6": 0.04253544619315178, + "AC3_7": 0.01117196056623434 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5533980582524272 + }, + "prompt_2": { + "accuracy": 0.5242718446601942 + }, + "prompt_3": { + "accuracy": 0.49514563106796117 + }, + "prompt_4": { + "accuracy": 0.5922330097087378 + }, + "prompt_5": { + "accuracy": 0.5436893203883495 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6190476190476191 + }, + "prompt_2": { + "accuracy": 0.5904761904761905 + }, + "prompt_3": { + "accuracy": 0.6 + }, + "prompt_4": { + "accuracy": 0.6285714285714286 + }, + "prompt_5": { + "accuracy": 0.6571428571428571 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5514018691588785 + }, + "prompt_2": { + "accuracy": 0.5327102803738317 + }, + "prompt_3": { + "accuracy": 0.5420560747663551 + }, + "prompt_4": { + "accuracy": 0.5700934579439252 + }, + "prompt_5": { + "accuracy": 0.5514018691588785 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.43, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.3, + "history": 0.5333333333333333, + "literature": 0.3, + "politics": 0.6, + "culture": 0.4, + "film": 0.6, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.2, + "history": 0.4, + "literature": 0.2, + "politics": 0.5, + "culture": 0.4, + "film": 0.6, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.5, + "culture": 0.5, + "film": 0.5, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.3, + "history": 0.4, + "literature": 0.3, + "politics": 0.6, + "culture": 0.4, + "film": 0.6, + "law": 0.4, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.6, + "culture": 0.5, + "film": 0.6, + "law": 0.3, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.19956552468355584 + }, + "prompt_2": { + "bleu_score": 0.19247481140586278 + }, + "prompt_3": { + "bleu_score": 0.207943541336382 + }, + "prompt_4": { + "bleu_score": 0.21379792586246837 + }, + "prompt_5": { + "bleu_score": 0.1533961734231915 + } }, "indommlu": { "prompt_1": -1, @@ -7496,179 +64815,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2546496413072678 + }, + "prompt_2": { + "bleu_score": 0.26012465747946306 + }, + "prompt_3": { + "bleu_score": 0.2609338182169855 + }, + "prompt_4": { + "bleu_score": 0.2579920449034081 + }, + "prompt_5": { + "bleu_score": 0.2422056321348279 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.16822817258453684 + }, + "prompt_2": { + "bleu_score": 0.17750270654441427 + }, + "prompt_3": { + "bleu_score": 0.1749983172111752 + }, + "prompt_4": { + "bleu_score": 0.17369387876269585 + }, + "prompt_5": { + "bleu_score": 0.1637592005122268 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2091251346952727 + }, + "prompt_2": { + "bleu_score": 0.21675772074960933 + }, + "prompt_3": { + "bleu_score": 0.2123688473081141 + }, + "prompt_4": { + "bleu_score": 0.21015870482468962 + }, + "prompt_5": { + "bleu_score": 0.21022909393234615 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2334759972145647 + }, + "prompt_2": { + "bleu_score": 0.23392294465236774 + }, + "prompt_3": { + "bleu_score": 0.23237535868073753 + }, + "prompt_4": { + "bleu_score": 0.2298415486227988 + }, + "prompt_5": { + "bleu_score": 0.21288407444173865 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48424737456242706 + }, + "prompt_2": { + "accuracy": 0.5075845974329055 + }, + "prompt_3": { + "accuracy": 0.5110851808634772 + }, + "prompt_4": { + "accuracy": 0.5029171528588098 + }, + "prompt_5": { + "accuracy": 0.5110851808634772 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4699320700750804, + "category_acc": { + "high_school_european_history": 0.6158536585365854, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.42045454545454547, + "medical_genetics": 0.6060606060606061, + "high_school_us_history": 0.6551724137931034, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.6398305084745762, + "virology": 0.45454545454545453, + "high_school_microeconomics": 0.5316455696202531, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.5566343042071198, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.3309608540925267, + "philosophy": 0.49032258064516127, + "professional_medicine": 0.4575645756457565, + "nutrition": 0.5278688524590164, + "global_facts": 0.3434343434343434, + "machine_learning": 0.2882882882882883, + "security_studies": 0.4057377049180328, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.4451718494271686, + "prehistory": 0.5479876160990712, + "anatomy": 0.417910447761194, + "human_sexuality": 0.6, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.6770833333333334, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.5987654320987654, + "high_school_geography": 0.5888324873096447, + "elementary_mathematics": 0.40848806366047746, + "human_aging": 0.5540540540540541, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.6544117647058824, + "formal_logic": 0.36, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.6, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.43434343434343436, + "conceptual_physics": 0.38461538461538464, + "miscellaneous": 0.710997442455243, + "high_school_chemistry": 0.400990099009901, + "marketing": 0.5493562231759657, + "professional_law": 0.3411611219830398, + "management": 0.6176470588235294, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.4672897196261682, + "world_religions": 0.7411764705882353, + "sociology": 0.68, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.42159383033419023, + "computer_security": 0.6060606060606061, + "moral_scenarios": 0.27069351230425054, + "moral_disputes": 0.48695652173913045, + "electrical_engineering": 0.3958333333333333, + "astronomy": 0.5298013245033113, + "college_biology": 0.4965034965034965 + } + }, + "prompt_2": { + "accuracy": 0.4840900965319986, + "category_acc": { + "high_school_european_history": 0.6158536585365854, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.4734848484848485, + "medical_genetics": 0.6060606060606061, + "high_school_us_history": 0.6650246305418719, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.6652542372881356, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.4810126582278481, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.34519572953736655, + "philosophy": 0.5258064516129032, + "professional_medicine": 0.46863468634686345, + "nutrition": 0.5377049180327869, + "global_facts": 0.23232323232323232, + "machine_learning": 0.32432432432432434, + "security_studies": 0.45491803278688525, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.4533551554828151, + "prehistory": 0.5603715170278638, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.5, + "high_school_government_and_politics": 0.7604166666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6345177664974619, + "elementary_mathematics": 0.376657824933687, + "human_aging": 0.5315315315315315, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.6930147058823529, + "formal_logic": 0.368, + "high_school_statistics": 0.3813953488372093, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.21933085501858737, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.7135549872122762, + "high_school_chemistry": 0.42574257425742573, + "marketing": 0.6824034334763949, + "professional_law": 0.35746901500326156, + "management": 0.6078431372549019, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.6941176470588235, + "sociology": 0.68, + "us_foreign_policy": 0.696969696969697, + "high_school_macroeconomics": 0.4884318766066838, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.26174496644295303, + "moral_disputes": 0.41739130434782606, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.5298013245033113, + "college_biology": 0.5034965034965035 + } + }, + "prompt_3": { + "accuracy": 0.4899535216303182, + "category_acc": { + "high_school_european_history": 0.6158536585365854, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.4962121212121212, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.6699507389162561, + "high_school_physics": 0.26, + "high_school_world_history": 0.652542372881356, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.5147679324894515, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.5760517799352751, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.5419354838709678, + "professional_medicine": 0.44649446494464945, + "nutrition": 0.5508196721311476, + "global_facts": 0.30303030303030304, + "machine_learning": 0.36036036036036034, + "security_studies": 0.45491803278688525, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.4713584288052373, + "prehistory": 0.5913312693498453, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.42441860465116277, + "high_school_government_and_politics": 0.7552083333333334, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.6649746192893401, + "elementary_mathematics": 0.4270557029177719, + "human_aging": 0.5540540540540541, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6819852941176471, + "formal_logic": 0.392, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.5050505050505051, + "conceptual_physics": 0.405982905982906, + "miscellaneous": 0.7135549872122762, + "high_school_chemistry": 0.4306930693069307, + "marketing": 0.6437768240343348, + "professional_law": 0.3633398564905414, + "management": 0.5882352941176471, + "college_physics": 0.297029702970297, + "jurisprudence": 0.5046728971962616, + "world_religions": 0.6764705882352942, + "sociology": 0.69, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.480719794344473, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.47246376811594204, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5298013245033113, + "college_biology": 0.4825174825174825 + } + }, + "prompt_4": { + "accuracy": 0.4863067572398999, + "category_acc": { + "high_school_european_history": 0.6402439024390244, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.4734848484848485, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.6403940886699507, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.6101694915254238, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.47257383966244726, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.5760517799352751, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.3487544483985765, + "philosophy": 0.5032258064516129, + "professional_medicine": 0.4870848708487085, + "nutrition": 0.5704918032786885, + "global_facts": 0.30303030303030304, + "machine_learning": 0.34234234234234234, + "security_studies": 0.46311475409836067, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.486088379705401, + "prehistory": 0.5727554179566563, + "anatomy": 0.417910447761194, + "human_sexuality": 0.5846153846153846, + "college_medicine": 0.47093023255813954, + "high_school_government_and_politics": 0.7604166666666666, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.6700507614213198, + "elementary_mathematics": 0.41379310344827586, + "human_aging": 0.5225225225225225, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.7077205882352942, + "formal_logic": 0.408, + "high_school_statistics": 0.4, + "international_law": 0.6333333333333333, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.7225063938618926, + "high_school_chemistry": 0.400990099009901, + "marketing": 0.6566523605150214, + "professional_law": 0.3333333333333333, + "management": 0.6470588235294118, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.5700934579439252, + "world_religions": 0.7235294117647059, + "sociology": 0.695, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.480719794344473, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.22595078299776286, + "moral_disputes": 0.4753623188405797, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5562913907284768, + "college_biology": 0.5454545454545454 + } + }, + "prompt_5": { + "accuracy": 0.4845906328208795, + "category_acc": { + "high_school_european_history": 0.6341463414634146, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.5075757575757576, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.6059113300492611, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.6228813559322034, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.5443037974683544, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.5566343042071198, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.49032258064516127, + "professional_medicine": 0.47601476014760147, + "nutrition": 0.5672131147540984, + "global_facts": 0.26262626262626265, + "machine_learning": 0.36036036036036034, + "security_studies": 0.4713114754098361, + "public_relations": 0.5321100917431193, + "professional_psychology": 0.486088379705401, + "prehistory": 0.5294117647058824, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6153846153846154, + "college_medicine": 0.45930232558139533, + "high_school_government_and_politics": 0.7135416666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6358024691358025, + "high_school_geography": 0.6548223350253807, + "elementary_mathematics": 0.4376657824933687, + "human_aging": 0.5225225225225225, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.6764705882352942, + "formal_logic": 0.336, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.675, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.5555555555555556, + "conceptual_physics": 0.4017094017094017, + "miscellaneous": 0.710997442455243, + "high_school_chemistry": 0.39603960396039606, + "marketing": 0.6909871244635193, + "professional_law": 0.34181343770384864, + "management": 0.5784313725490197, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.7058823529411765, + "sociology": 0.69, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.4704370179948586, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.2539149888143177, + "moral_disputes": 0.47246376811594204, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.5099337748344371, + "college_biology": 0.5034965034965035 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5104011887072808 + }, + "prompt_2": { + "accuracy": 0.5156017830609212 + }, + "prompt_3": { + "accuracy": 0.5163447251114414 + }, + "prompt_4": { + "accuracy": 0.5118870728083209 + }, + "prompt_5": { + "accuracy": 0.5007429420505201 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5068493150684932, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.40476190476190477, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.5833333333333334, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.8461538461538461, + "middle_school_physics": 0.7916666666666666, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.4, + "business_administration": 0.5, + "marxism": 0.75, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.625, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.75, + "logic": 0.4074074074074074, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.7058823529411765, + "legal_professional": 0.5, + "high_school_chinese": 0.4583333333333333, + "high_school_history": 0.56, + "middle_school_history": 0.6666666666666666, + "civil_servant": 0.5192307692307693, + "sports_science": 0.5416666666666666, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.625, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.5098039215686274, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.5555555555555556, + "tax_accountant": 0.5, + "physician": 0.5370370370370371 + } + }, + "prompt_2": { + "accuracy": 0.523038605230386, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.625, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.4523809523809524, + "college_physics": 0.375, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.5, + "middle_school_mathematics": 0.5, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.8333333333333334, + "middle_school_chemistry": 0.6, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.45, + "business_administration": 0.5, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.7931034482758621, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.875, + "high_school_geography": 0.6666666666666666, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.6071428571428571, + "ideological_and_moral_cultivation": 0.7916666666666666, + "logic": 0.4444444444444444, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.6071428571428571, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7352941176470589, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.4583333333333333, + "high_school_history": 0.64, + "middle_school_history": 0.7037037037037037, + "civil_servant": 0.5, + "sports_science": 0.5416666666666666, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.5294117647058824, + "accountant": 0.3333333333333333, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.5, + "tax_accountant": 0.42592592592592593, + "physician": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.5180572851805728, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.4523809523809524, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.6551724137931034, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.5, + "middle_school_mathematics": 0.5416666666666666, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.875, + "middle_school_chemistry": 0.6, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.4166666666666667, + "business_administration": 0.4473684210526316, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.7083333333333334, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.7916666666666666, + "logic": 0.5185185185185185, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.6071428571428571, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7647058823529411, + "legal_professional": 0.5, + "high_school_chinese": 0.375, + "high_school_history": 0.56, + "middle_school_history": 0.7777777777777778, + "civil_servant": 0.5576923076923077, + "sports_science": 0.5416666666666666, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.49019607843137253, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.4444444444444444, + "physician": 0.48148148148148145 + } + }, + "prompt_4": { + "accuracy": 0.5149439601494396, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.625, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.42857142857142855, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.375, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.7916666666666666, + "middle_school_chemistry": 0.64, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.48333333333333334, + "business_administration": 0.4473684210526316, + "marxism": 0.7083333333333334, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.75, + "high_school_geography": 0.625, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.5714285714285714, + "ideological_and_moral_cultivation": 0.7916666666666666, + "logic": 0.5925925925925926, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.7105263157894737, + "professional_tour_guide": 0.7058823529411765, + "legal_professional": 0.5, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.56, + "middle_school_history": 0.7037037037037037, + "civil_servant": 0.5384615384615384, + "sports_science": 0.5416666666666666, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.7083333333333334, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.5294117647058824, + "accountant": 0.3888888888888889, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.42592592592592593, + "physician": 0.48148148148148145 + } + }, + "prompt_5": { + "accuracy": 0.4925280199252802, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.38095238095238093, + "college_physics": 0.25, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.375, + "high_school_chemistry": 0.375, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.5416666666666666, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.7083333333333334, + "middle_school_chemistry": 0.68, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.4, + "business_administration": 0.42105263157894735, + "marxism": 0.75, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.7083333333333334, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.7307692307692307, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.75, + "logic": 0.48148148148148145, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7058823529411765, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.52, + "middle_school_history": 0.6666666666666666, + "civil_servant": 0.4807692307692308, + "sports_science": 0.4583333333333333, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.625, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.3148148148148148, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.6111111111111112, + "tax_accountant": 0.3888888888888889, + "physician": 0.5370370370370371 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5376344086021505 + }, + "prompt_2": { + "accuracy": 0.5232974910394266 + }, + "prompt_3": { + "accuracy": 0.5197132616487455 + }, + "prompt_4": { + "accuracy": 0.5232974910394266 + }, + "prompt_5": { + "accuracy": 0.5519713261648745 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5223622863063374, + "category_acc": { + "agronomy": 0.46745562130177515, + "anatomy": 0.41216216216216217, + "ancient_chinese": 0.3475609756097561, + "arts": 0.8375, + "astronomy": 0.3575757575757576, + "business_ethics": 0.5406698564593302, + "chinese_civil_service_exam": 0.4125, + "chinese_driving_rule": 0.7404580152671756, + "chinese_food_culture": 0.6029411764705882, + "chinese_foreign_policy": 0.5420560747663551, + "chinese_history": 0.5944272445820433, + "chinese_literature": 0.5343137254901961, + "chinese_teacher_qualification": 0.7039106145251397, + "clinical_knowledge": 0.48523206751054854, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.6074766355140186, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.49074074074074076, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.4908424908424908, + "computer_science": 0.4852941176470588, + "computer_security": 0.6081871345029239, + "conceptual_physics": 0.46938775510204084, + "construction_project_management": 0.4244604316546763, + "economics": 0.4779874213836478, + "education": 0.5460122699386503, + "electrical_engineering": 0.42441860465116277, + "elementary_chinese": 0.5396825396825397, + "elementary_commonsense": 0.5353535353535354, + "elementary_information_and_technology": 0.6554621848739496, + "elementary_mathematics": 0.3782608695652174, + "ethnology": 0.5259259259259259, + "food_science": 0.5384615384615384, + "genetics": 0.42613636363636365, + "global_facts": 0.5503355704697986, + "high_school_biology": 0.40236686390532544, + "high_school_chemistry": 0.3409090909090909, + "high_school_geography": 0.5677966101694916, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.4, + "high_school_politics": 0.5174825174825175, + "human_sexuality": 0.4523809523809524, + "international_law": 0.5351351351351351, + "journalism": 0.5523255813953488, + "jurisprudence": 0.5474452554744526, + "legal_and_moral_basis": 0.8738317757009346, + "logical": 0.4959349593495935, + "machine_learning": 0.4426229508196721, + "management": 0.6190476190476191, + "marketing": 0.6166666666666667, + "marxist_theory": 0.6613756613756614, + "modern_chinese": 0.35344827586206895, + "nutrition": 0.5448275862068965, + "philosophy": 0.6285714285714286, + "professional_accounting": 0.6, + "professional_law": 0.43601895734597157, + "professional_medicine": 0.4148936170212766, + "professional_psychology": 0.6120689655172413, + "public_relations": 0.5287356321839081, + "security_study": 0.6296296296296297, + "sociology": 0.584070796460177, + "sports_science": 0.49696969696969695, + "traditional_chinese_medicine": 0.5027027027027027, + "virology": 0.5798816568047337, + "world_history": 0.6024844720496895, + "world_religions": 0.625 + } + }, + "prompt_2": { + "accuracy": 0.5229666724227249, + "category_acc": { + "agronomy": 0.4911242603550296, + "anatomy": 0.42567567567567566, + "ancient_chinese": 0.3475609756097561, + "arts": 0.8375, + "astronomy": 0.37575757575757573, + "business_ethics": 0.5550239234449761, + "chinese_civil_service_exam": 0.38125, + "chinese_driving_rule": 0.6946564885496184, + "chinese_food_culture": 0.5808823529411765, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.5851393188854489, + "chinese_literature": 0.5294117647058824, + "chinese_teacher_qualification": 0.7150837988826816, + "clinical_knowledge": 0.4810126582278481, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.6074766355140186, + "college_engineering_hydrology": 0.4811320754716981, + "college_law": 0.5, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.4908424908424908, + "computer_science": 0.4852941176470588, + "computer_security": 0.5847953216374269, + "conceptual_physics": 0.4557823129251701, + "construction_project_management": 0.48201438848920863, + "economics": 0.5220125786163522, + "education": 0.5644171779141104, + "electrical_engineering": 0.4883720930232558, + "elementary_chinese": 0.5079365079365079, + "elementary_commonsense": 0.5656565656565656, + "elementary_information_and_technology": 0.6764705882352942, + "elementary_mathematics": 0.3869565217391304, + "ethnology": 0.5777777777777777, + "food_science": 0.5524475524475524, + "genetics": 0.4090909090909091, + "global_facts": 0.5637583892617449, + "high_school_biology": 0.35502958579881655, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.6016949152542372, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.5174825174825175, + "human_sexuality": 0.4126984126984127, + "international_law": 0.518918918918919, + "journalism": 0.5465116279069767, + "jurisprudence": 0.51338199513382, + "legal_and_moral_basis": 0.8785046728971962, + "logical": 0.4959349593495935, + "machine_learning": 0.45901639344262296, + "management": 0.6571428571428571, + "marketing": 0.6111111111111112, + "marxist_theory": 0.6613756613756614, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.5448275862068965, + "philosophy": 0.6666666666666666, + "professional_accounting": 0.6114285714285714, + "professional_law": 0.41706161137440756, + "professional_medicine": 0.4148936170212766, + "professional_psychology": 0.6163793103448276, + "public_relations": 0.5459770114942529, + "security_study": 0.6148148148148148, + "sociology": 0.584070796460177, + "sports_science": 0.49696969696969695, + "traditional_chinese_medicine": 0.4972972972972973, + "virology": 0.5680473372781065, + "world_history": 0.5900621118012422, + "world_religions": 0.625 + } + }, + "prompt_3": { + "accuracy": 0.5259886030046624, + "category_acc": { + "agronomy": 0.4970414201183432, + "anatomy": 0.42567567567567566, + "ancient_chinese": 0.34146341463414637, + "arts": 0.8375, + "astronomy": 0.4, + "business_ethics": 0.5550239234449761, + "chinese_civil_service_exam": 0.40625, + "chinese_driving_rule": 0.6793893129770993, + "chinese_food_culture": 0.6102941176470589, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.6068111455108359, + "chinese_literature": 0.5245098039215687, + "chinese_teacher_qualification": 0.7262569832402235, + "clinical_knowledge": 0.48523206751054854, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5794392523364486, + "college_engineering_hydrology": 0.4811320754716981, + "college_law": 0.4537037037037037, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.4908424908424908, + "computer_science": 0.4852941176470588, + "computer_security": 0.5906432748538012, + "conceptual_physics": 0.46258503401360546, + "construction_project_management": 0.460431654676259, + "economics": 0.5094339622641509, + "education": 0.6012269938650306, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.5079365079365079, + "elementary_commonsense": 0.5505050505050505, + "elementary_information_and_technology": 0.6890756302521008, + "elementary_mathematics": 0.3826086956521739, + "ethnology": 0.5555555555555556, + "food_science": 0.5384615384615384, + "genetics": 0.45454545454545453, + "global_facts": 0.5906040268456376, + "high_school_biology": 0.40236686390532544, + "high_school_chemistry": 0.3106060606060606, + "high_school_geography": 0.6101694915254238, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.3, + "high_school_politics": 0.5104895104895105, + "human_sexuality": 0.4444444444444444, + "international_law": 0.5243243243243243, + "journalism": 0.5581395348837209, + "jurisprudence": 0.5352798053527981, + "legal_and_moral_basis": 0.8925233644859814, + "logical": 0.43089430894308944, + "machine_learning": 0.45081967213114754, + "management": 0.6333333333333333, + "marketing": 0.6111111111111112, + "marxist_theory": 0.671957671957672, + "modern_chinese": 0.3793103448275862, + "nutrition": 0.5517241379310345, + "philosophy": 0.6476190476190476, + "professional_accounting": 0.6, + "professional_law": 0.45023696682464454, + "professional_medicine": 0.4095744680851064, + "professional_psychology": 0.6293103448275862, + "public_relations": 0.5229885057471264, + "security_study": 0.6296296296296297, + "sociology": 0.5619469026548672, + "sports_science": 0.5212121212121212, + "traditional_chinese_medicine": 0.5027027027027027, + "virology": 0.5680473372781065, + "world_history": 0.6273291925465838, + "world_religions": 0.60625 + } + }, + "prompt_4": { + "accuracy": 0.5271110343636678, + "category_acc": { + "agronomy": 0.44970414201183434, + "anatomy": 0.43243243243243246, + "ancient_chinese": 0.36585365853658536, + "arts": 0.83125, + "astronomy": 0.4, + "business_ethics": 0.5454545454545454, + "chinese_civil_service_exam": 0.425, + "chinese_driving_rule": 0.7022900763358778, + "chinese_food_culture": 0.5955882352941176, + "chinese_foreign_policy": 0.5420560747663551, + "chinese_history": 0.5975232198142415, + "chinese_literature": 0.4950980392156863, + "chinese_teacher_qualification": 0.6983240223463687, + "clinical_knowledge": 0.4767932489451477, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.5981308411214953, + "college_engineering_hydrology": 0.46226415094339623, + "college_law": 0.5, + "college_mathematics": 0.3238095238095238, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.5054945054945055, + "computer_science": 0.4950980392156863, + "computer_security": 0.6081871345029239, + "conceptual_physics": 0.5034013605442177, + "construction_project_management": 0.4244604316546763, + "economics": 0.5283018867924528, + "education": 0.5705521472392638, + "electrical_engineering": 0.42441860465116277, + "elementary_chinese": 0.5277777777777778, + "elementary_commonsense": 0.5454545454545454, + "elementary_information_and_technology": 0.6764705882352942, + "elementary_mathematics": 0.3652173913043478, + "ethnology": 0.5037037037037037, + "food_science": 0.5454545454545454, + "genetics": 0.4375, + "global_facts": 0.5302013422818792, + "high_school_biology": 0.41420118343195267, + "high_school_chemistry": 0.3409090909090909, + "high_school_geography": 0.576271186440678, + "high_school_mathematics": 0.34146341463414637, + "high_school_physics": 0.39090909090909093, + "high_school_politics": 0.5034965034965035, + "human_sexuality": 0.5, + "international_law": 0.5405405405405406, + "journalism": 0.5581395348837209, + "jurisprudence": 0.5547445255474452, + "legal_and_moral_basis": 0.8738317757009346, + "logical": 0.4878048780487805, + "machine_learning": 0.45901639344262296, + "management": 0.6142857142857143, + "marketing": 0.6277777777777778, + "marxist_theory": 0.6931216931216931, + "modern_chinese": 0.3879310344827586, + "nutrition": 0.5517241379310345, + "philosophy": 0.6190476190476191, + "professional_accounting": 0.6114285714285714, + "professional_law": 0.4312796208530806, + "professional_medicine": 0.43617021276595747, + "professional_psychology": 0.6077586206896551, + "public_relations": 0.5402298850574713, + "security_study": 0.5777777777777777, + "sociology": 0.5486725663716814, + "sports_science": 0.47878787878787876, + "traditional_chinese_medicine": 0.4864864864864865, + "virology": 0.5857988165680473, + "world_history": 0.6024844720496895, + "world_religions": 0.63125 + } + }, + "prompt_5": { + "accuracy": 0.5289241927128302, + "category_acc": { + "agronomy": 0.47928994082840237, + "anatomy": 0.42567567567567566, + "ancient_chinese": 0.32926829268292684, + "arts": 0.84375, + "astronomy": 0.38181818181818183, + "business_ethics": 0.569377990430622, + "chinese_civil_service_exam": 0.4125, + "chinese_driving_rule": 0.7404580152671756, + "chinese_food_culture": 0.5955882352941176, + "chinese_foreign_policy": 0.5327102803738317, + "chinese_history": 0.6191950464396285, + "chinese_literature": 0.5441176470588235, + "chinese_teacher_qualification": 0.7206703910614525, + "clinical_knowledge": 0.4978902953586498, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.6822429906542056, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.5, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.4908424908424908, + "computer_science": 0.5, + "computer_security": 0.5964912280701754, + "conceptual_physics": 0.5306122448979592, + "construction_project_management": 0.4244604316546763, + "economics": 0.5157232704402516, + "education": 0.5828220858895705, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.5158730158730159, + "elementary_commonsense": 0.5656565656565656, + "elementary_information_and_technology": 0.6470588235294118, + "elementary_mathematics": 0.391304347826087, + "ethnology": 0.5481481481481482, + "food_science": 0.5734265734265734, + "genetics": 0.4659090909090909, + "global_facts": 0.5570469798657718, + "high_school_biology": 0.3905325443786982, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.6271186440677966, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.4755244755244755, + "human_sexuality": 0.48412698412698413, + "international_law": 0.5243243243243243, + "journalism": 0.5581395348837209, + "jurisprudence": 0.5255474452554745, + "legal_and_moral_basis": 0.8598130841121495, + "logical": 0.5040650406504065, + "machine_learning": 0.4262295081967213, + "management": 0.638095238095238, + "marketing": 0.6277777777777778, + "marxist_theory": 0.6455026455026455, + "modern_chinese": 0.35344827586206895, + "nutrition": 0.5448275862068965, + "philosophy": 0.6095238095238096, + "professional_accounting": 0.6514285714285715, + "professional_law": 0.4597156398104265, + "professional_medicine": 0.4095744680851064, + "professional_psychology": 0.6293103448275862, + "public_relations": 0.5057471264367817, + "security_study": 0.6148148148148148, + "sociology": 0.6017699115044248, + "sports_science": 0.509090909090909, + "traditional_chinese_medicine": 0.4810810810810811, + "virology": 0.5857988165680473, + "world_history": 0.6335403726708074, + "world_religions": 0.65 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.42424242424242425 + }, + "prompt_3": { + "accuracy": 0.45454545454545453 + }, + "prompt_4": { + "accuracy": 0.48484848484848486 + }, + "prompt_5": { + "accuracy": 0.3939393939393939 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.44545454545454544 + }, + "prompt_2": { + "accuracy": 0.49318181818181817 + }, + "prompt_3": { + "accuracy": 0.40454545454545454 + }, + "prompt_4": { + "accuracy": 0.5522727272727272 + }, + "prompt_5": { + "accuracy": 0.5295454545454545 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4301694915254237 + }, + "prompt_2": { + "accuracy": 0.43186440677966104 + }, + "prompt_3": { + "accuracy": 0.4288135593220339 + }, + "prompt_4": { + "accuracy": 0.43796610169491523 + }, + "prompt_5": { + "accuracy": 0.5288135593220339 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.799925205684368 + }, + "prompt_2": { + "accuracy": 0.7898279730740464 + }, + "prompt_3": { + "accuracy": 0.7976813762154076 + }, + "prompt_4": { + "accuracy": 0.8055347793567689 + }, + "prompt_5": { + "accuracy": 0.8051608077786089 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4747672709456149 + }, + "prompt_2": { + "accuracy": 0.5658990690837825 + }, + "prompt_3": { + "accuracy": 0.6011758941695248 + }, + "prompt_4": { + "accuracy": 0.49387555120039195 + }, + "prompt_5": { + "accuracy": 0.5649191572758452 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.29844548205960825, + "rouge2": 0.0972660276724041, + "rougeL": 0.22306765597234657, + "avg_rouge": 0.20625972190145295 + }, + "prompt_2": { + "rouge1": 0.32657441349804, + "rouge2": 0.10708387093032645, + "rougeL": 0.24469847164511646, + "avg_rouge": 0.22611891869116096 + }, + "prompt_3": { + "rouge1": 0.30968125850564776, + "rouge2": 0.09852896277242615, + "rougeL": 0.22946000587059337, + "avg_rouge": 0.2125567423828891 + }, + "prompt_4": { + "rouge1": 0.29150616910466404, + "rouge2": 0.09268360179811519, + "rougeL": 0.21474459856028766, + "avg_rouge": 0.1996447898210223 + }, + "prompt_5": { + "rouge1": 0.31492748075985477, + "rouge2": 0.09927036472495064, + "rougeL": 0.23428143349530645, + "avg_rouge": 0.2161597596600373 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.18663143789659645, + "rouge2": 0.05993789980500529, + "rougeL": 0.14450148330665782, + "avg_rouge": 0.1303569403360865 + }, + "prompt_2": { + "rouge1": 0.18932779899554045, + "rouge2": 0.05951838477075756, + "rougeL": 0.14334341382612942, + "avg_rouge": 0.13072986586414248 + }, + "prompt_3": { + "rouge1": 0.19072073765867745, + "rouge2": 0.06140465938148166, + "rougeL": 0.14756258636842978, + "avg_rouge": 0.13322932780286298 + }, + "prompt_4": { + "rouge1": 0.19253274395834405, + "rouge2": 0.0585499783676254, + "rougeL": 0.1460014084578028, + "avg_rouge": 0.13236137692792407 + }, + "prompt_5": { + "rouge1": 0.1517698802197454, + "rouge2": 0.02221816361643254, + "rougeL": 0.13566591832213248, + "avg_rouge": 0.10321798738610348 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8922018348623854 + }, + "prompt_2": { + "accuracy": 0.8910550458715596 + }, + "prompt_3": { + "accuracy": 0.8922018348623854 + }, + "prompt_4": { + "accuracy": 0.8853211009174312 + }, + "prompt_5": { + "accuracy": 0.6972477064220184 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7861936720997124 + }, + "prompt_2": { + "accuracy": 0.7267497603068073 + }, + "prompt_3": { + "accuracy": 0.7545541706615532 + }, + "prompt_4": { + "accuracy": 0.7603068072866731 + }, + "prompt_5": { + "accuracy": 0.7785234899328859 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.601 + }, + "prompt_2": { + "accuracy": 0.6405 + }, + "prompt_3": { + "accuracy": 0.556 + }, + "prompt_4": { + "accuracy": 0.5845 + }, + "prompt_5": { + "accuracy": 0.5755 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5265 + }, + "prompt_2": { + "accuracy": 0.499 + }, + "prompt_3": { + "accuracy": 0.526 + }, + "prompt_4": { + "accuracy": 0.5075 + }, + "prompt_5": { + "accuracy": 0.4955 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6195 + }, + "prompt_2": { + "accuracy": 0.641 + }, + "prompt_3": { + "accuracy": 0.6155 + }, + "prompt_4": { + "accuracy": 0.6245 + }, + "prompt_5": { + "accuracy": 0.562 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5915492957746479 + }, + "prompt_2": { + "accuracy": 0.5352112676056338 + }, + "prompt_3": { + "accuracy": 0.5211267605633803 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5211267605633803 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6534296028880866 + }, + "prompt_2": { + "accuracy": 0.6137184115523465 + }, + "prompt_3": { + "accuracy": 0.5523465703971119 + }, + "prompt_4": { + "accuracy": 0.6173285198555957 + }, + "prompt_5": { + "accuracy": 0.5740072202166066 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5563725490196079 + }, + "prompt_2": { + "accuracy": 0.6053921568627451 + }, + "prompt_3": { + "accuracy": 0.5049019607843137 + }, + "prompt_4": { + "accuracy": 0.5931372549019608 + }, + "prompt_5": { + "accuracy": 0.4877450980392157 + } } }, "five_shot": { @@ -7778,53 +66287,1733 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-13B-Base", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4771428571428572, + "language_acc": { + "Malay": 0.41333333333333333, + "English": 0.6133333333333333, + "Vietnamese": 0.38666666666666666, + "Spanish": 0.5133333333333333, + "Indonesian": 0.46, + "Filipino": 0.35333333333333333, + "Chinese": 0.6 + }, + "consistency_score_2": 0.4342857142857143, + "consistency_score_3": 0.23466666666666666, + "consistency_score_4": 0.14019047619047623, + "consistency_score_5": 0.08825396825396825, + "consistency_score_6": 0.056190476190476187, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.41333333333333333, + "Malay,Vietnamese": 0.38, + "Malay,Spanish": 0.42, + "Malay,Indonesian": 0.56, + "Malay,Filipino": 0.46, + "Malay,Chinese": 0.44666666666666666, + "English,Vietnamese": 0.37333333333333335, + "English,Spanish": 0.5466666666666666, + "English,Indonesian": 0.47333333333333333, + "English,Filipino": 0.35333333333333333, + "English,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.38, + "Vietnamese,Indonesian": 0.42, + "Vietnamese,Filipino": 0.36, + "Vietnamese,Chinese": 0.4, + "Spanish,Indonesian": 0.4066666666666667, + "Spanish,Filipino": 0.32666666666666666, + "Spanish,Chinese": 0.52, + "Indonesian,Filipino": 0.4066666666666667, + "Indonesian,Chinese": 0.5133333333333333, + "Filipino,Chinese": 0.32666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.18666666666666668, + "Malay,English,Spanish": 0.26666666666666666, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.20666666666666667, + "Malay,English,Chinese": 0.3, + "Malay,Vietnamese,Spanish": 0.2, + "Malay,Vietnamese,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian": 0.25333333333333335, + "Malay,Spanish,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese": 0.26, + "Malay,Indonesian,Filipino": 0.28, + "Malay,Indonesian,Chinese": 0.32, + "Malay,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Indonesian": 0.23333333333333334, + "English,Vietnamese,Filipino": 0.15333333333333332, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian": 0.26666666666666666, + "English,Spanish,Filipino": 0.20666666666666667, + "English,Spanish,Chinese": 0.4, + "English,Indonesian,Filipino": 0.22666666666666666, + "English,Indonesian,Chinese": 0.36666666666666664, + "English,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino": 0.2, + "Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino": 0.18, + "Spanish,Indonesian,Chinese": 0.2866666666666667, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.14, + "Malay,English,Vietnamese,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Spanish,Filipino": 0.14, + "Malay,English,Spanish,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian": 0.14, + "Malay,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.14, + "Malay,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.14, + "Malay,Spanish,Indonesian,Chinese": 0.18, + "Malay,Spanish,Filipino,Chinese": 0.12, + "Malay,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.14, + "English,Vietnamese,Spanish,Filipino": 0.1, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.18, + "English,Vietnamese,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino": 0.14, + "English,Spanish,Indonesian,Chinese": 0.22, + "English,Spanish,Filipino,Chinese": 0.14, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.06666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.1, + "Malay,English,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + } + }, + "AC3_2": 0.45470667258781766, + "AC3_3": 0.31460529832347667, + "AC3_4": 0.21670912854073235, + "AC3_5": 0.1489564449884937, + "AC3_6": 0.10054081630767919, + "AC3_7": 0.06231343282361398 + }, + "prompt_2": { + "overall_acc": 0.4771428571428572, + "language_acc": { + "Malay": 0.3933333333333333, + "English": 0.5733333333333334, + "Vietnamese": 0.4066666666666667, + "Spanish": 0.5333333333333333, + "Indonesian": 0.4266666666666667, + "Filipino": 0.4066666666666667, + "Chinese": 0.6 + }, + "consistency_score_2": 0.4374603174603175, + "consistency_score_3": 0.23238095238095233, + "consistency_score_4": 0.13047619047619047, + "consistency_score_5": 0.07333333333333335, + "consistency_score_6": 0.04, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3933333333333333, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.4066666666666667, + "Malay,Indonesian": 0.54, + "Malay,Filipino": 0.42, + "Malay,Chinese": 0.4533333333333333, + "English,Vietnamese": 0.4, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.4266666666666667, + "English,Filipino": 0.4066666666666667, + "English,Chinese": 0.5133333333333333, + "Vietnamese,Spanish": 0.47333333333333333, + "Vietnamese,Indonesian": 0.44, + "Vietnamese,Filipino": 0.35333333333333333, + "Vietnamese,Chinese": 0.34, + "Spanish,Indonesian": 0.46, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.49333333333333335, + "Indonesian,Filipino": 0.42, + "Indonesian,Chinese": 0.4533333333333333, + "Filipino,Chinese": 0.37333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22, + "Malay,English,Spanish": 0.24666666666666667, + "Malay,English,Indonesian": 0.25333333333333335, + "Malay,English,Filipino": 0.18, + "Malay,English,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish": 0.26666666666666666, + "Malay,Vietnamese,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Chinese": 0.24, + "Malay,Spanish,Indonesian": 0.28, + "Malay,Spanish,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.26, + "Malay,Indonesian,Chinese": 0.3, + "Malay,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.20666666666666667, + "English,Vietnamese,Filipino": 0.18, + "English,Vietnamese,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian": 0.29333333333333333, + "English,Spanish,Filipino": 0.23333333333333334, + "English,Spanish,Chinese": 0.3333333333333333, + "English,Indonesian,Filipino": 0.21333333333333335, + "English,Indonesian,Chinese": 0.25333333333333335, + "English,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian": 0.28, + "Vietnamese,Spanish,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino": 0.22, + "Spanish,Indonesian,Chinese": 0.28, + "Spanish,Filipino,Chinese": 0.2, + "Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.12, + "Malay,English,Vietnamese,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian": 0.17333333333333334, + "Malay,English,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino": 0.11333333333333333, + "Malay,English,Indonesian,Chinese": 0.16, + "Malay,English,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,Vietnamese,Spanish,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.1, + "English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "English,Spanish,Filipino,Chinese": 0.13333333333333333, + "English,Indonesian,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.06, + "Malay,English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.06666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.08, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + } + }, + "AC3_2": 0.4564407199381936, + "AC3_3": 0.31254458288979475, + "AC3_4": 0.2049171517799789, + "AC3_5": 0.12712802765856673, + "AC3_6": 0.07381215468185953, + "AC3_7": 0.038390804589978865 + }, + "prompt_3": { + "overall_acc": 0.4657142857142858, + "language_acc": { + "Malay": 0.36666666666666664, + "English": 0.58, + "Vietnamese": 0.36666666666666664, + "Spanish": 0.5733333333333334, + "Indonesian": 0.43333333333333335, + "Filipino": 0.3933333333333333, + "Chinese": 0.5466666666666666 + }, + "consistency_score_2": 0.46793650793650793, + "consistency_score_3": 0.2775238095238096, + "consistency_score_4": 0.1878095238095238, + "consistency_score_5": 0.1365079365079365, + "consistency_score_6": 0.10285714285714286, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3933333333333333, + "Malay,Vietnamese": 0.42, + "Malay,Spanish": 0.44666666666666666, + "Malay,Indonesian": 0.6, + "Malay,Filipino": 0.4533333333333333, + "Malay,Chinese": 0.44, + "English,Vietnamese": 0.3933333333333333, + "English,Spanish": 0.58, + "English,Indonesian": 0.43333333333333335, + "English,Filipino": 0.4266666666666667, + "English,Chinese": 0.5066666666666667, + "Vietnamese,Spanish": 0.5133333333333333, + "Vietnamese,Indonesian": 0.44, + "Vietnamese,Filipino": 0.4666666666666667, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.47333333333333333, + "Spanish,Filipino": 0.48, + "Spanish,Chinese": 0.5466666666666666, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.4666666666666667, + "Filipino,Chinese": 0.4266666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.19333333333333333, + "Malay,English,Spanish": 0.28, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.23333333333333334, + "Malay,English,Chinese": 0.24, + "Malay,Vietnamese,Spanish": 0.26666666666666666, + "Malay,Vietnamese,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian": 0.3333333333333333, + "Malay,Spanish,Filipino": 0.2866666666666667, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish": 0.31333333333333335, + "English,Vietnamese,Indonesian": 0.24, + "English,Vietnamese,Filipino": 0.24, + "English,Vietnamese,Chinese": 0.24, + "English,Spanish,Indonesian": 0.3, + "English,Spanish,Filipino": 0.3, + "English,Spanish,Chinese": 0.38, + "English,Indonesian,Filipino": 0.2733333333333333, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Filipino,Chinese": 0.26, + "Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Vietnamese,Spanish,Filipino": 0.3, + "Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.24, + "Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino": 0.3, + "Spanish,Indonesian,Chinese": 0.3, + "Spanish,Filipino,Chinese": 0.2866666666666667, + "Indonesian,Filipino,Chinese": 0.26666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.22666666666666666, + "Malay,English,Spanish,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Chinese": 0.21333333333333335, + "Malay,English,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian": 0.22, + "Malay,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Filipino,Chinese": 0.16, + "English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "English,Spanish,Filipino,Chinese": 0.21333333333333335, + "English,Indonesian,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.14, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + } + }, + "AC3_2": 0.46682275217227653, + "AC3_3": 0.3477938053277389, + "AC3_4": 0.2676737310661739, + "AC3_5": 0.2111303561664626, + "AC3_6": 0.16849964103281953, + "AC3_7": 0.1365445025927798 + }, + "prompt_4": { + "overall_acc": 0.48857142857142855, + "language_acc": { + "Malay": 0.38, + "English": 0.6466666666666666, + "Vietnamese": 0.37333333333333335, + "Spanish": 0.54, + "Indonesian": 0.46, + "Filipino": 0.38666666666666666, + "Chinese": 0.6333333333333333 + }, + "consistency_score_2": 0.4825396825396825, + "consistency_score_3": 0.2866666666666667, + "consistency_score_4": 0.188, + "consistency_score_5": 0.1307936507936508, + "consistency_score_6": 0.09333333333333334, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4533333333333333, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.3933333333333333, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Filipino": 0.5666666666666667, + "Malay,Chinese": 0.41333333333333333, + "English,Vietnamese": 0.4, + "English,Spanish": 0.5333333333333333, + "English,Indonesian": 0.4666666666666667, + "English,Filipino": 0.4533333333333333, + "English,Chinese": 0.6466666666666666, + "Vietnamese,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian": 0.52, + "Vietnamese,Filipino": 0.48, + "Vietnamese,Chinese": 0.4, + "Spanish,Indonesian": 0.43333333333333335, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.5666666666666667, + "Indonesian,Filipino": 0.5866666666666667, + "Indonesian,Chinese": 0.48, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.2866666666666667, + "Malay,English,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.32, + "Malay,Vietnamese,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian": 0.2733333333333333, + "Malay,Spanish,Filipino": 0.24, + "Malay,Spanish,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.43333333333333335, + "Malay,Indonesian,Chinese": 0.32, + "Malay,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.28, + "English,Vietnamese,Filipino": 0.28, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Spanish,Indonesian": 0.2866666666666667, + "English,Spanish,Filipino": 0.26666666666666666, + "English,Spanish,Chinese": 0.41333333333333333, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Indonesian,Chinese": 0.3466666666666667, + "English,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian": 0.26, + "Vietnamese,Spanish,Filipino": 0.22, + "Vietnamese,Spanish,Chinese": 0.2733333333333333, + "Vietnamese,Indonesian,Filipino": 0.36, + "Vietnamese,Indonesian,Chinese": 0.28, + "Vietnamese,Filipino,Chinese": 0.22, + "Spanish,Indonesian,Filipino": 0.26, + "Spanish,Indonesian,Chinese": 0.3, + "Spanish,Filipino,Chinese": 0.24666666666666667, + "Indonesian,Filipino,Chinese": 0.2733333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.2, + "Malay,English,Vietnamese,Chinese": 0.18, + "Malay,English,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Spanish,Filipino": 0.16, + "Malay,English,Spanish,Chinese": 0.18666666666666668, + "Malay,English,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino": 0.18, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.14, + "Malay,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian": 0.17333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.18, + "English,Spanish,Indonesian,Filipino": 0.18666666666666668, + "English,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Spanish,Filipino,Chinese": 0.2, + "English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Chinese": 0.2, + "Vietnamese,Spanish,Filipino,Chinese": 0.14, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.18, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06666666666666667 + } + }, + "AC3_2": 0.4855368233750241, + "AC3_3": 0.3613267812801728, + "AC3_4": 0.27152027023013847, + "AC3_5": 0.20634692828183385, + "AC3_6": 0.15672667755080802, + "AC3_7": 0.11732418522758313 + }, + "prompt_5": { + "overall_acc": 0.47333333333333333, + "language_acc": { + "Malay": 0.36, + "English": 0.66, + "Vietnamese": 0.38, + "Spanish": 0.5533333333333333, + "Indonesian": 0.3933333333333333, + "Filipino": 0.38666666666666666, + "Chinese": 0.58 + }, + "consistency_score_2": 0.45682539682539686, + "consistency_score_3": 0.26095238095238094, + "consistency_score_4": 0.16285714285714287, + "consistency_score_5": 0.1053968253968254, + "consistency_score_6": 0.06761904761904762, + "consistency_score_7": 0.04, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.44666666666666666, + "Malay,Vietnamese": 0.48, + "Malay,Spanish": 0.4066666666666667, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Filipino": 0.49333333333333335, + "Malay,Chinese": 0.3933333333333333, + "English,Vietnamese": 0.4, + "English,Spanish": 0.5866666666666667, + "English,Indonesian": 0.4666666666666667, + "English,Filipino": 0.43333333333333335, + "English,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.36, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Filipino": 0.37333333333333335, + "Vietnamese,Chinese": 0.3933333333333333, + "Spanish,Indonesian": 0.46, + "Spanish,Filipino": 0.38, + "Spanish,Chinese": 0.5466666666666666, + "Indonesian,Filipino": 0.5133333333333333, + "Indonesian,Chinese": 0.4, + "Filipino,Chinese": 0.38 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.2733333333333333, + "Malay,English,Spanish": 0.29333333333333333, + "Malay,English,Indonesian": 0.3333333333333333, + "Malay,English,Filipino": 0.26666666666666666, + "Malay,English,Chinese": 0.3, + "Malay,Vietnamese,Spanish": 0.21333333333333335, + "Malay,Vietnamese,Indonesian": 0.36, + "Malay,Vietnamese,Filipino": 0.26, + "Malay,Vietnamese,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian": 0.30666666666666664, + "Malay,Spanish,Filipino": 0.22, + "Malay,Spanish,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.28, + "Malay,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.20666666666666667, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian": 0.32, + "English,Spanish,Filipino": 0.2733333333333333, + "English,Spanish,Chinese": 0.4, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.29333333333333333, + "English,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Vietnamese,Spanish,Filipino": 0.16, + "Vietnamese,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese": 0.22, + "Vietnamese,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino": 0.22666666666666666, + "Spanish,Indonesian,Chinese": 0.2866666666666667, + "Spanish,Filipino,Chinese": 0.23333333333333334, + "Indonesian,Filipino,Chinese": 0.2 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.16, + "Malay,English,Vietnamese,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.17333333333333334, + "Malay,English,Spanish,Chinese": 0.22666666666666666, + "Malay,English,Indonesian,Filipino": 0.20666666666666667, + "Malay,English,Indonesian,Chinese": 0.22666666666666666, + "Malay,English,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.2, + "Malay,Spanish,Filipino,Chinese": 0.14, + "Malay,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "English,Spanish,Indonesian,Chinese": 0.22666666666666666, + "English,Spanish,Filipino,Chinese": 0.19333333333333333, + "English,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Vietnamese,Spanish,Filipino,Chinese": 0.08, + "Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Malay,English,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + } + }, + "AC3_2": 0.464932878220778, + "AC3_3": 0.33642888019629263, + "AC3_4": 0.24233532930322574, + "AC3_5": 0.1724044614805408, + "AC3_6": 0.11833333331145833, + "AC3_7": 0.07376623375186372 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.40665584415584416, + "language_acc": { + "English": 0.4431818181818182, + "Vietnamese": 0.36363636363636365, + "Chinese": 0.5340909090909091, + "Indonesian": 0.3352272727272727, + "Filipino": 0.3352272727272727, + "Spanish": 0.42045454545454547, + "Malay": 0.4147727272727273 + }, + "consistency_score_2": 0.3933982683982684, + "consistency_score_3": 0.19724025974025972, + "consistency_score_4": 0.11590909090909093, + "consistency_score_5": 0.07656926406926408, + "consistency_score_6": 0.05438311688311689, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3181818181818182, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.35795454545454547, + "English,Filipino": 0.3409090909090909, + "English,Spanish": 0.4090909090909091, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.26704545454545453, + "Vietnamese,Indonesian": 0.39204545454545453, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.2727272727272727, + "Vietnamese,Malay": 0.4034090909090909, + "Chinese,Indonesian": 0.3693181818181818, + "Chinese,Filipino": 0.32954545454545453, + "Chinese,Spanish": 0.4375, + "Chinese,Malay": 0.4090909090909091, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,Spanish": 0.4090909090909091, + "Indonesian,Malay": 0.5284090909090909, + "Filipino,Spanish": 0.3693181818181818, + "Filipino,Malay": 0.4772727272727273, + "Spanish,Malay": 0.4659090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.16477272727272727, + "English,Vietnamese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Filipino": 0.16477272727272727, + "English,Vietnamese,Spanish": 0.11931818181818182, + "English,Vietnamese,Malay": 0.17613636363636365, + "English,Chinese,Indonesian": 0.2215909090909091, + "English,Chinese,Filipino": 0.17613636363636365, + "English,Chinese,Spanish": 0.24431818181818182, + "English,Chinese,Malay": 0.26136363636363635, + "English,Indonesian,Filipino": 0.14772727272727273, + "English,Indonesian,Spanish": 0.21022727272727273, + "English,Indonesian,Malay": 0.25, + "English,Filipino,Spanish": 0.16477272727272727, + "English,Filipino,Malay": 0.21022727272727273, + "English,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "Vietnamese,Chinese,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino": 0.17045454545454544, + "Chinese,Indonesian,Spanish": 0.21022727272727273, + "Chinese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Filipino,Spanish": 0.1875, + "Chinese,Filipino,Malay": 0.23863636363636365, + "Chinese,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish": 0.17613636363636365, + "Indonesian,Filipino,Malay": 0.26136363636363635, + "Indonesian,Spanish,Malay": 0.26704545454545453, + "Filipino,Spanish,Malay": 0.23863636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.125, + "English,Vietnamese,Chinese,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Chinese,Indonesian,Spanish": 0.125, + "English,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Chinese,Filipino,Spanish": 0.10227272727272728, + "English,Chinese,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Indonesian,Filipino,Malay": 0.125, + "English,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Malay": 0.125, + "Vietnamese,Chinese,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Chinese,Filipino,Spanish,Malay": 0.14772727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.13636363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.3999172115851828, + "AC3_3": 0.2656380917029753, + "AC3_4": 0.18039905105418444, + "AC3_5": 0.12887301665429807, + "AC3_6": 0.09593641391736576, + "AC3_7": 0.07245867766971967 + }, + "prompt_2": { + "overall_acc": 0.37743506493506496, + "language_acc": { + "English": 0.36363636363636365, + "Vietnamese": 0.3125, + "Chinese": 0.5454545454545454, + "Indonesian": 0.3522727272727273, + "Filipino": 0.3522727272727273, + "Spanish": 0.3522727272727273, + "Malay": 0.36363636363636365 + }, + "consistency_score_2": 0.377435064935065, + "consistency_score_3": 0.16964285714285718, + "consistency_score_4": 0.08733766233766233, + "consistency_score_5": 0.05005411255411255, + "consistency_score_6": 0.03165584415584415, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2840909090909091, + "English,Chinese": 0.4431818181818182, + "English,Indonesian": 0.3409090909090909, + "English,Filipino": 0.3465909090909091, + "English,Spanish": 0.39204545454545453, + "English,Malay": 0.3068181818181818, + "Vietnamese,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian": 0.39204545454545453, + "Vietnamese,Filipino": 0.3522727272727273, + "Vietnamese,Spanish": 0.3465909090909091, + "Vietnamese,Malay": 0.42045454545454547, + "Chinese,Indonesian": 0.38636363636363635, + "Chinese,Filipino": 0.3522727272727273, + "Chinese,Spanish": 0.42045454545454547, + "Chinese,Malay": 0.4147727272727273, + "Indonesian,Filipino": 0.38636363636363635, + "Indonesian,Spanish": 0.39204545454545453, + "Indonesian,Malay": 0.5170454545454546, + "Filipino,Spanish": 0.3352272727272727, + "Filipino,Malay": 0.3977272727272727, + "Spanish,Malay": 0.3806818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.14204545454545456, + "English,Vietnamese,Indonesian": 0.14772727272727273, + "English,Vietnamese,Filipino": 0.13636363636363635, + "English,Vietnamese,Spanish": 0.14772727272727273, + "English,Vietnamese,Malay": 0.13068181818181818, + "English,Chinese,Indonesian": 0.17613636363636365, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.19886363636363635, + "English,Chinese,Malay": 0.17045454545454544, + "English,Indonesian,Filipino": 0.14772727272727273, + "English,Indonesian,Spanish": 0.1590909090909091, + "English,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Spanish": 0.1534090909090909, + "English,Filipino,Malay": 0.1534090909090909, + "English,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "Vietnamese,Chinese,Filipino": 0.125, + "Vietnamese,Chinese,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay": 0.23863636363636365, + "Vietnamese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino": 0.17613636363636365, + "Chinese,Indonesian,Spanish": 0.19318181818181818, + "Chinese,Indonesian,Malay": 0.24431818181818182, + "Chinese,Filipino,Spanish": 0.17045454545454544, + "Chinese,Filipino,Malay": 0.19318181818181818, + "Chinese,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish": 0.16477272727272727, + "Indonesian,Filipino,Malay": 0.22727272727272727, + "Indonesian,Spanish,Malay": 0.25, + "Filipino,Spanish,Malay": 0.17613636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.0625, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Indonesian,Spanish,Malay": 0.125, + "Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Chinese,Indonesian,Spanish,Malay": 0.125, + "Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.0625, + "English,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Chinese,Filipino,Spanish,Malay": 0.0625, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.37743506488506506, + "AC3_3": 0.23407693933830692, + "AC3_4": 0.1418512503607228, + "AC3_5": 0.08838669240082676, + "AC3_6": 0.05841256955900522, + "AC3_7": 0.04287294854174785 + }, + "prompt_3": { + "overall_acc": 0.40340909090909094, + "language_acc": { + "English": 0.38636363636363635, + "Vietnamese": 0.3977272727272727, + "Chinese": 0.5170454545454546, + "Indonesian": 0.36363636363636365, + "Filipino": 0.35795454545454547, + "Spanish": 0.39204545454545453, + "Malay": 0.4090909090909091 + }, + "consistency_score_2": 0.39962121212121215, + "consistency_score_3": 0.20275974025974028, + "consistency_score_4": 0.1189935064935065, + "consistency_score_5": 0.07548701298701299, + "consistency_score_6": 0.05032467532467533, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.35795454545454547, + "English,Chinese": 0.4431818181818182, + "English,Indonesian": 0.3465909090909091, + "English,Filipino": 0.30113636363636365, + "English,Spanish": 0.42045454545454547, + "English,Malay": 0.4375, + "Vietnamese,Chinese": 0.3352272727272727, + "Vietnamese,Indonesian": 0.3693181818181818, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.4090909090909091, + "Vietnamese,Malay": 0.4943181818181818, + "Chinese,Indonesian": 0.3068181818181818, + "Chinese,Filipino": 0.3125, + "Chinese,Spanish": 0.44886363636363635, + "Chinese,Malay": 0.38636363636363635, + "Indonesian,Filipino": 0.4090909090909091, + "Indonesian,Spanish": 0.38636363636363635, + "Indonesian,Malay": 0.5284090909090909, + "Filipino,Spanish": 0.3465909090909091, + "Filipino,Malay": 0.4431818181818182, + "Spanish,Malay": 0.5 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17045454545454544, + "English,Vietnamese,Indonesian": 0.1534090909090909, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.21022727272727273, + "English,Vietnamese,Malay": 0.24431818181818182, + "English,Chinese,Indonesian": 0.16477272727272727, + "English,Chinese,Filipino": 0.1590909090909091, + "English,Chinese,Spanish": 0.23295454545454544, + "English,Chinese,Malay": 0.23295454545454544, + "English,Indonesian,Filipino": 0.1534090909090909, + "English,Indonesian,Spanish": 0.1875, + "English,Indonesian,Malay": 0.23295454545454544, + "English,Filipino,Spanish": 0.14772727272727273, + "English,Filipino,Malay": 0.1875, + "English,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian": 0.125, + "Vietnamese,Chinese,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay": 0.2784090909090909, + "Vietnamese,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.3068181818181818, + "Chinese,Indonesian,Filipino": 0.13068181818181818, + "Chinese,Indonesian,Spanish": 0.18181818181818182, + "Chinese,Indonesian,Malay": 0.19318181818181818, + "Chinese,Filipino,Spanish": 0.1875, + "Chinese,Filipino,Malay": 0.20454545454545456, + "Chinese,Spanish,Malay": 0.26136363636363635, + "Indonesian,Filipino,Spanish": 0.17613636363636365, + "Indonesian,Filipino,Malay": 0.2727272727272727, + "Indonesian,Spanish,Malay": 0.29545454545454547, + "Filipino,Spanish,Malay": 0.22727272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Malay": 0.125, + "English,Chinese,Filipino,Spanish": 0.09659090909090909, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.1590909090909091, + "English,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Indonesian,Filipino,Malay": 0.125, + "English,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.125, + "Vietnamese,Chinese,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.40150621778876616, + "AC3_3": 0.26987571208480604, + "AC3_4": 0.18377803827206962, + "AC3_5": 0.12717642524308945, + "AC3_6": 0.08948609528032664, + "AC3_7": 0.06286894921821555 + }, + "prompt_4": { + "overall_acc": 0.3969155844155844, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.3181818181818182, + "Chinese": 0.5397727272727273, + "Indonesian": 0.36363636363636365, + "Filipino": 0.30113636363636365, + "Spanish": 0.42045454545454547, + "Malay": 0.38636363636363635 + }, + "consistency_score_2": 0.4247835497835498, + "consistency_score_3": 0.23035714285714284, + "consistency_score_4": 0.1436688311688312, + "consistency_score_5": 0.09577922077922077, + "consistency_score_6": 0.06574675324675325, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3806818181818182, + "English,Chinese": 0.5681818181818182, + "English,Indonesian": 0.42045454545454547, + "English,Filipino": 0.32386363636363635, + "English,Spanish": 0.4943181818181818, + "English,Malay": 0.4772727272727273, + "Vietnamese,Chinese": 0.36363636363636365, + "Vietnamese,Indonesian": 0.3693181818181818, + "Vietnamese,Filipino": 0.39204545454545453, + "Vietnamese,Spanish": 0.3806818181818182, + "Vietnamese,Malay": 0.4034090909090909, + "Chinese,Indonesian": 0.4375, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,Spanish": 0.5056818181818182, + "Chinese,Malay": 0.4431818181818182, + "Indonesian,Filipino": 0.42613636363636365, + "Indonesian,Spanish": 0.42045454545454547, + "Indonesian,Malay": 0.5284090909090909, + "Filipino,Spanish": 0.375, + "Filipino,Malay": 0.42613636363636365, + "Spanish,Malay": 0.44886363636363635 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.23295454545454544, + "English,Vietnamese,Indonesian": 0.1875, + "English,Vietnamese,Filipino": 0.1590909090909091, + "English,Vietnamese,Spanish": 0.23295454545454544, + "English,Vietnamese,Malay": 0.2215909090909091, + "English,Chinese,Indonesian": 0.30113636363636365, + "English,Chinese,Filipino": 0.19886363636363635, + "English,Chinese,Spanish": 0.3522727272727273, + "English,Chinese,Malay": 0.3181818181818182, + "English,Indonesian,Filipino": 0.18181818181818182, + "English,Indonesian,Spanish": 0.26704545454545453, + "English,Indonesian,Malay": 0.29545454545454547, + "English,Filipino,Spanish": 0.19886363636363635, + "English,Filipino,Malay": 0.20454545454545456, + "English,Spanish,Malay": 0.3125, + "Vietnamese,Chinese,Indonesian": 0.19318181818181818, + "Vietnamese,Chinese,Filipino": 0.17045454545454544, + "Vietnamese,Chinese,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Malay": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino": 0.1875, + "Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Spanish": 0.2784090909090909, + "Chinese,Indonesian,Malay": 0.2784090909090909, + "Chinese,Filipino,Spanish": 0.19318181818181818, + "Chinese,Filipino,Malay": 0.20454545454545456, + "Chinese,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish": 0.19886363636363635, + "Indonesian,Filipino,Malay": 0.26704545454545453, + "Indonesian,Spanish,Malay": 0.30113636363636365, + "Filipino,Spanish,Malay": 0.19886363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Chinese,Filipino,Spanish": 0.14204545454545456, + "English,Chinese,Filipino,Malay": 0.14204545454545456, + "English,Chinese,Spanish,Malay": 0.23863636363636365, + "English,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Indonesian,Spanish,Malay": 0.2215909090909091, + "English,Filipino,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.13636363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.0625, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456 + } + }, + "AC3_2": 0.41037699536162653, + "AC3_3": 0.2915234027929219, + "AC3_4": 0.2109731484341216, + "AC3_5": 0.1543197329539798, + "AC3_6": 0.11280758712531012, + "AC3_7": 0.08156797329265372 + }, + "prompt_5": { + "overall_acc": 0.41477272727272724, + "language_acc": { + "English": 0.5170454545454546, + "Vietnamese": 0.32954545454545453, + "Chinese": 0.5454545454545454, + "Indonesian": 0.3522727272727273, + "Filipino": 0.3352272727272727, + "Spanish": 0.4147727272727273, + "Malay": 0.4090909090909091 + }, + "consistency_score_2": 0.4228896103896105, + "consistency_score_3": 0.2215909090909091, + "consistency_score_4": 0.13571428571428568, + "consistency_score_5": 0.09253246753246754, + "consistency_score_6": 0.06737012987012987, + "consistency_score_7": 0.05113636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3522727272727273, + "English,Chinese": 0.6022727272727273, + "English,Indonesian": 0.4431818181818182, + "English,Filipino": 0.4090909090909091, + "English,Spanish": 0.45454545454545453, + "English,Malay": 0.4943181818181818, + "Vietnamese,Chinese": 0.32386363636363635, + "Vietnamese,Indonesian": 0.4090909090909091, + "Vietnamese,Filipino": 0.375, + "Vietnamese,Spanish": 0.3409090909090909, + "Vietnamese,Malay": 0.42045454545454547, + "Chinese,Indonesian": 0.4147727272727273, + "Chinese,Filipino": 0.39204545454545453, + "Chinese,Spanish": 0.4431818181818182, + "Chinese,Malay": 0.42045454545454547, + "Indonesian,Filipino": 0.4431818181818182, + "Indonesian,Spanish": 0.35795454545454547, + "Indonesian,Malay": 0.5568181818181818, + "Filipino,Spanish": 0.4034090909090909, + "Filipino,Malay": 0.42045454545454547, + "Spanish,Malay": 0.4034090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2159090909090909, + "English,Vietnamese,Indonesian": 0.19318181818181818, + "English,Vietnamese,Filipino": 0.16477272727272727, + "English,Vietnamese,Spanish": 0.1875, + "English,Vietnamese,Malay": 0.21022727272727273, + "English,Chinese,Indonesian": 0.29545454545454547, + "English,Chinese,Filipino": 0.26704545454545453, + "English,Chinese,Spanish": 0.3068181818181818, + "English,Chinese,Malay": 0.3125, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.23295454545454544, + "English,Indonesian,Malay": 0.3068181818181818, + "English,Filipino,Spanish": 0.23295454545454544, + "English,Filipino,Malay": 0.2215909090909091, + "English,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Vietnamese,Chinese,Filipino": 0.14772727272727273, + "Vietnamese,Chinese,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Malay": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Malay": 0.25, + "Vietnamese,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay": 0.24431818181818182, + "Chinese,Filipino,Spanish": 0.2159090909090909, + "Chinese,Filipino,Malay": 0.20454545454545456, + "Chinese,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish": 0.1875, + "Indonesian,Filipino,Malay": 0.2897727272727273, + "Indonesian,Spanish,Malay": 0.22727272727272727, + "Filipino,Spanish,Malay": 0.19886363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Malay": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Malay": 0.14772727272727273, + "English,Vietnamese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino": 0.17045454545454544, + "English,Chinese,Indonesian,Spanish": 0.18181818181818182, + "English,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Chinese,Filipino,Spanish": 0.17045454545454544, + "English,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Chinese,Spanish,Malay": 0.20454545454545456, + "English,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Indonesian,Spanish,Malay": 0.16477272727272727, + "English,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + } + }, + "AC3_2": 0.4187918427970801, + "AC3_3": 0.2888595778766857, + "AC3_4": 0.2045119433423829, + "AC3_5": 0.15130909087926486, + "AC3_6": 0.11591291702523948, + "AC3_7": 0.09104767182081282 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5242718446601942 + }, + "prompt_2": { + "accuracy": 0.47572815533980584 + }, + "prompt_3": { + "accuracy": 0.49514563106796117 + }, + "prompt_4": { + "accuracy": 0.4077669902912621 + }, + "prompt_5": { + "accuracy": 0.5242718446601942 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6190476190476191 + }, + "prompt_2": { + "accuracy": 0.49523809523809526 + }, + "prompt_3": { + "accuracy": 0.5428571428571428 + }, + "prompt_4": { + "accuracy": 0.6190476190476191 + }, + "prompt_5": { + "accuracy": 0.5904761904761905 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5981308411214953 + }, + "prompt_2": { + "accuracy": 0.5607476635514018 + }, + "prompt_3": { + "accuracy": 0.4953271028037383 + }, + "prompt_4": { + "accuracy": 0.35514018691588783 + }, + "prompt_5": { + "accuracy": 0.5514018691588785 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.6, + "culture": 0.5, + "film": 0.6, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_2": { + "accuracy": 0.47, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.4, + "history": 0.2, + "literature": 0.4, + "politics": 0.8, + "culture": 0.7, + "film": 0.5, + "law": 0.5, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.6, + "culture": 0.4, + "film": 0.5, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.5, + "culture": 0.3, + "film": 0.5, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.5, + "history": 0.2, + "literature": 0.3, + "politics": 0.6, + "culture": 0.5, + "film": 0.6, + "law": 0.4, + "geography": 0.8 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.12384907248174179 + }, + "prompt_2": { + "bleu_score": 0.08587761153334315 + }, + "prompt_3": { + "bleu_score": 0.07465581134311214 + }, + "prompt_4": { + "bleu_score": 0.18667220090373415 + }, + "prompt_5": { + "bleu_score": 0.07035284588311369 + } }, "indommlu": { "prompt_1": -1, @@ -7834,179 +68023,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11216789463956799 + }, + "prompt_2": { + "bleu_score": 0.08085353140690815 + }, + "prompt_3": { + "bleu_score": 0.13327260954636835 + }, + "prompt_4": { + "bleu_score": 0.15938432300849847 + }, + "prompt_5": { + "bleu_score": 0.20508520153812088 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09234625583681672 + }, + "prompt_2": { + "bleu_score": 0.05870373842057371 + }, + "prompt_3": { + "bleu_score": 0.06525580580159475 + }, + "prompt_4": { + "bleu_score": 0.0884656346879959 + }, + "prompt_5": { + "bleu_score": 0.11007102387948767 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10671712398293302 + }, + "prompt_2": { + "bleu_score": 0.093831004513928 + }, + "prompt_3": { + "bleu_score": 0.13043563671419856 + }, + "prompt_4": { + "bleu_score": 0.13370800750004935 + }, + "prompt_5": { + "bleu_score": 0.11677866851714523 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11014243909156265 + }, + "prompt_2": { + "bleu_score": 0.07646619426422727 + }, + "prompt_3": { + "bleu_score": 0.15577227710040997 + }, + "prompt_4": { + "bleu_score": 0.17914746617177435 + }, + "prompt_5": { + "bleu_score": 0.2063270215892652 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5250875145857643 + }, + "prompt_2": { + "accuracy": 0.5169194865810969 + }, + "prompt_3": { + "accuracy": 0.5192532088681447 + }, + "prompt_4": { + "accuracy": 0.5472578763127188 + }, + "prompt_5": { + "accuracy": 0.5612602100350058 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5103324991061852, + "category_acc": { + "high_school_european_history": 0.6402439024390244, + "business_ethics": 0.5151515151515151, + "clinical_knowledge": 0.5037878787878788, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.6108374384236454, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.614406779661017, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.4936708860759494, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.6213592233009708, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.35587188612099646, + "philosophy": 0.5612903225806452, + "professional_medicine": 0.5867158671586716, + "nutrition": 0.5639344262295082, + "global_facts": 0.35353535353535354, + "machine_learning": 0.3333333333333333, + "security_studies": 0.6229508196721312, + "public_relations": 0.4954128440366973, + "professional_psychology": 0.546644844517185, + "prehistory": 0.6130030959752322, + "anatomy": 0.48507462686567165, + "human_sexuality": 0.6307692307692307, + "college_medicine": 0.5174418604651163, + "high_school_government_and_politics": 0.7239583333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.654320987654321, + "high_school_geography": 0.6598984771573604, + "elementary_mathematics": 0.4854111405835544, + "human_aging": 0.5405405405405406, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.6856617647058824, + "formal_logic": 0.424, + "high_school_statistics": 0.3395348837209302, + "international_law": 0.625, + "high_school_mathematics": 0.32342007434944237, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.4188034188034188, + "miscellaneous": 0.7250639386189258, + "high_school_chemistry": 0.3712871287128713, + "marketing": 0.7553648068669528, + "professional_law": 0.37181996086105673, + "management": 0.6568627450980392, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.7941176470588235, + "sociology": 0.765, + "us_foreign_policy": 0.7575757575757576, + "high_school_macroeconomics": 0.4524421593830334, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.25727069351230425, + "moral_disputes": 0.5623188405797102, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.5894039735099338, + "college_biology": 0.6013986013986014 + } + }, + "prompt_2": { + "accuracy": 0.4875223453700393, + "category_acc": { + "high_school_european_history": 0.5304878048780488, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5959595959595959, + "high_school_us_history": 0.4975369458128079, + "high_school_physics": 0.26, + "high_school_world_history": 0.5720338983050848, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.42616033755274263, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.6213592233009708, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.3416370106761566, + "philosophy": 0.535483870967742, + "professional_medicine": 0.5202952029520295, + "nutrition": 0.5770491803278689, + "global_facts": 0.2727272727272727, + "machine_learning": 0.36036036036036034, + "security_studies": 0.5122950819672131, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.486088379705401, + "prehistory": 0.5479876160990712, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.6307692307692307, + "college_medicine": 0.47093023255813954, + "high_school_government_and_politics": 0.671875, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.5989847715736041, + "elementary_mathematics": 0.48010610079575594, + "human_aging": 0.5675675675675675, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.6525735294117647, + "formal_logic": 0.352, + "high_school_statistics": 0.3813953488372093, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.22676579925650558, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.47435897435897434, + "miscellaneous": 0.6994884910485933, + "high_school_chemistry": 0.37623762376237624, + "marketing": 0.7339055793991416, + "professional_law": 0.345075016307893, + "management": 0.6176470588235294, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.7352941176470589, + "sociology": 0.73, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.44473007712082263, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.27628635346756153, + "moral_disputes": 0.5478260869565217, + "electrical_engineering": 0.5, + "astronomy": 0.5496688741721855, + "college_biology": 0.5314685314685315 + } + }, + "prompt_3": { + "accuracy": 0.5222023596710762, + "category_acc": { + "high_school_european_history": 0.5792682926829268, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.5151515151515151, + "medical_genetics": 0.6363636363636364, + "high_school_us_history": 0.6354679802955665, + "high_school_physics": 0.3333333333333333, + "high_school_world_history": 0.6101694915254238, + "virology": 0.4484848484848485, + "high_school_microeconomics": 0.510548523206751, + "econometrics": 0.36283185840707965, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.6634304207119741, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.6096774193548387, + "professional_medicine": 0.5424354243542435, + "nutrition": 0.580327868852459, + "global_facts": 0.36363636363636365, + "machine_learning": 0.3783783783783784, + "security_studies": 0.6229508196721312, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.5253682487725041, + "prehistory": 0.5913312693498453, + "anatomy": 0.5298507462686567, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5232558139534884, + "high_school_government_and_politics": 0.71875, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.6345177664974619, + "elementary_mathematics": 0.5225464190981433, + "human_aging": 0.5720720720720721, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.7040441176470589, + "formal_logic": 0.416, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.5916666666666667, + "high_school_mathematics": 0.30855018587360594, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.42735042735042733, + "miscellaneous": 0.7340153452685422, + "high_school_chemistry": 0.38613861386138615, + "marketing": 0.7725321888412017, + "professional_law": 0.3913894324853229, + "management": 0.6372549019607843, + "college_physics": 0.297029702970297, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.8058823529411765, + "sociology": 0.665, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.493573264781491, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.2807606263982103, + "moral_disputes": 0.5739130434782609, + "electrical_engineering": 0.4930555555555556, + "astronomy": 0.5960264900662252, + "college_biology": 0.6153846153846154 + } + }, + "prompt_4": { + "accuracy": 0.5311405076868073, + "category_acc": { + "high_school_european_history": 0.6158536585365854, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.5492424242424242, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.6157635467980296, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.6991525423728814, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.4936708860759494, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.656957928802589, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.6064516129032258, + "professional_medicine": 0.5756457564575646, + "nutrition": 0.5672131147540984, + "global_facts": 0.37373737373737376, + "machine_learning": 0.36036036036036034, + "security_studies": 0.6229508196721312, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.5515548281505729, + "prehistory": 0.6006191950464397, + "anatomy": 0.5074626865671642, + "human_sexuality": 0.6846153846153846, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.7395833333333334, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6851851851851852, + "high_school_geography": 0.6903553299492385, + "elementary_mathematics": 0.5119363395225465, + "human_aging": 0.5945945945945946, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.7242647058823529, + "formal_logic": 0.4, + "high_school_statistics": 0.386046511627907, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.38461538461538464, + "miscellaneous": 0.7455242966751918, + "high_school_chemistry": 0.4405940594059406, + "marketing": 0.7725321888412017, + "professional_law": 0.410958904109589, + "management": 0.696078431372549, + "college_physics": 0.26732673267326734, + "jurisprudence": 0.5981308411214953, + "world_religions": 0.7588235294117647, + "sociology": 0.73, + "us_foreign_policy": 0.7676767676767676, + "high_school_macroeconomics": 0.4652956298200514, + "computer_security": 0.6868686868686869, + "moral_scenarios": 0.28187919463087246, + "moral_disputes": 0.5826086956521739, + "electrical_engineering": 0.5, + "astronomy": 0.6026490066225165, + "college_biology": 0.6013986013986014 + } + }, + "prompt_5": { + "accuracy": 0.5272792277440115, + "category_acc": { + "high_school_european_history": 0.6890243902439024, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.571969696969697, + "medical_genetics": 0.5757575757575758, + "high_school_us_history": 0.7339901477832512, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.7076271186440678, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.5147679324894515, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.6666666666666666, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.34519572953736655, + "philosophy": 0.5967741935483871, + "professional_medicine": 0.5239852398523985, + "nutrition": 0.5245901639344263, + "global_facts": 0.32323232323232326, + "machine_learning": 0.4144144144144144, + "security_studies": 0.6352459016393442, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.5417348608837971, + "prehistory": 0.6099071207430341, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.5923076923076923, + "college_medicine": 0.5, + "high_school_government_and_politics": 0.7708333333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6172839506172839, + "high_school_geography": 0.6751269035532995, + "elementary_mathematics": 0.4297082228116711, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7150735294117647, + "formal_logic": 0.352, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.6416666666666667, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.5353535353535354, + "conceptual_physics": 0.44017094017094016, + "miscellaneous": 0.7122762148337596, + "high_school_chemistry": 0.40594059405940597, + "marketing": 0.7510729613733905, + "professional_law": 0.41291585127201563, + "management": 0.6764705882352942, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.616822429906542, + "world_religions": 0.7941176470588235, + "sociology": 0.785, + "us_foreign_policy": 0.7878787878787878, + "high_school_macroeconomics": 0.4832904884318766, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.2606263982102908, + "moral_disputes": 0.6, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.5629139072847682, + "college_biology": 0.6223776223776224 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5282317979197623 + }, + "prompt_2": { + "accuracy": 0.30683506686478457 + }, + "prompt_3": { + "accuracy": 0.4063893016344725 + }, + "prompt_4": { + "accuracy": 0.524517087667162 + }, + "prompt_5": { + "accuracy": 0.4450222882615156 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5379825653798257, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.5384615384615384, + "college_programming": 0.40476190476190477, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.5, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.5833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.84, + "veterinary_medicine": 0.5, + "college_economics": 0.38333333333333336, + "business_administration": 0.5, + "marxism": 0.7083333333333334, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.7346938775510204, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.75, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.6428571428571429, + "ideological_and_moral_cultivation": 0.7916666666666666, + "logic": 0.6296296296296297, + "law": 0.4827586206896552, + "chinese_language_and_literature": 0.7142857142857143, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.6470588235294118, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.76, + "middle_school_history": 0.7037037037037037, + "civil_servant": 0.6538461538461539, + "sports_science": 0.4583333333333333, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.5, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.6470588235294118, + "accountant": 0.4444444444444444, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.5, + "tax_accountant": 0.42592592592592593, + "physician": 0.5740740740740741 + } + }, + "prompt_2": { + "accuracy": 0.30635118306351183, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.21428571428571427, + "college_physics": 0.08333333333333333, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.23333333333333334, + "business_administration": 0.2894736842105263, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.375, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.2962962962962963, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.08333333333333333, + "high_school_history": 0.32, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.34615384615384615, + "sports_science": 0.20833333333333334, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.25, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2037037037037037, + "physician": 0.37037037037037035 + } + }, + "prompt_3": { + "accuracy": 0.42092154420921546, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.5, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.2857142857142857, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.375, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.5, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.7307692307692307, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.4642857142857143, + "college_economics": 0.38333333333333336, + "business_administration": 0.5, + "marxism": 0.625, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.7083333333333334, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.25925925925925924, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.5263157894736842, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.25, + "sports_science": 0.375, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.5098039215686274, + "accountant": 0.2777777777777778, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.37037037037037035, + "physician": 0.5555555555555556 + } + }, + "prompt_4": { + "accuracy": 0.5342465753424658, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.40476190476190477, + "college_physics": 0.375, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.5, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.76, + "veterinary_medicine": 0.5, + "college_economics": 0.4666666666666667, + "business_administration": 0.47368421052631576, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.7931034482758621, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.7959183673469388, + "high_school_politics": 0.75, + "high_school_geography": 0.7083333333333334, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.6785714285714286, + "ideological_and_moral_cultivation": 0.8333333333333334, + "logic": 0.5925925925925926, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.75, + "art_studies": 0.7368421052631579, + "professional_tour_guide": 0.6470588235294118, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.76, + "middle_school_history": 0.6666666666666666, + "civil_servant": 0.5384615384615384, + "sports_science": 0.5833333333333334, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.7058823529411765, + "accountant": 0.4074074074074074, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.35185185185185186, + "physician": 0.5740740740740741 + } + }, + "prompt_5": { + "accuracy": 0.42714819427148193, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.38095238095238093, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.375, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.68, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.36666666666666664, + "business_administration": 0.2894736842105263, + "marxism": 0.625, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.6666666666666666, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.6296296296296297, + "law": 0.4827586206896552, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.6296296296296297, + "civil_servant": 0.4807692307692308, + "sports_science": 0.3333333333333333, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.3148148148148148, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.2222222222222222, + "physician": 0.5185185185185185 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5698924731182796 + }, + "prompt_2": { + "accuracy": 0.32974910394265233 + }, + "prompt_3": { + "accuracy": 0.5376344086021505 + }, + "prompt_4": { + "accuracy": 0.5806451612903226 + }, + "prompt_5": { + "accuracy": 0.4946236559139785 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5641512692108445, + "category_acc": { + "agronomy": 0.5384615384615384, + "anatomy": 0.42567567567567566, + "ancient_chinese": 0.3902439024390244, + "arts": 0.79375, + "astronomy": 0.36363636363636365, + "business_ethics": 0.5550239234449761, + "chinese_civil_service_exam": 0.4875, + "chinese_driving_rule": 0.7022900763358778, + "chinese_food_culture": 0.5588235294117647, + "chinese_foreign_policy": 0.6448598130841121, + "chinese_history": 0.7337461300309598, + "chinese_literature": 0.5441176470588235, + "chinese_teacher_qualification": 0.7374301675977654, + "clinical_knowledge": 0.540084388185654, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.6822429906542056, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.5370370370370371, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.46226415094339623, + "college_medicine": 0.575091575091575, + "computer_science": 0.5490196078431373, + "computer_security": 0.6257309941520468, + "conceptual_physics": 0.5850340136054422, + "construction_project_management": 0.45323741007194246, + "economics": 0.6163522012578616, + "education": 0.6625766871165644, + "electrical_engineering": 0.4883720930232558, + "elementary_chinese": 0.5714285714285714, + "elementary_commonsense": 0.5959595959595959, + "elementary_information_and_technology": 0.7394957983193278, + "elementary_mathematics": 0.3826086956521739, + "ethnology": 0.6148148148148148, + "food_science": 0.5594405594405595, + "genetics": 0.4772727272727273, + "global_facts": 0.6375838926174496, + "high_school_biology": 0.5029585798816568, + "high_school_chemistry": 0.4318181818181818, + "high_school_geography": 0.6101694915254238, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.39090909090909093, + "high_school_politics": 0.6013986013986014, + "human_sexuality": 0.5793650793650794, + "international_law": 0.3945945945945946, + "journalism": 0.5465116279069767, + "jurisprudence": 0.5523114355231143, + "legal_and_moral_basis": 0.8271028037383178, + "logical": 0.5040650406504065, + "machine_learning": 0.45901639344262296, + "management": 0.6190476190476191, + "marketing": 0.5944444444444444, + "marxist_theory": 0.6666666666666666, + "modern_chinese": 0.41379310344827586, + "nutrition": 0.5586206896551724, + "philosophy": 0.5904761904761905, + "professional_accounting": 0.6285714285714286, + "professional_law": 0.46919431279620855, + "professional_medicine": 0.5026595744680851, + "professional_psychology": 0.6551724137931034, + "public_relations": 0.5632183908045977, + "security_study": 0.6444444444444445, + "sociology": 0.6017699115044248, + "sports_science": 0.593939393939394, + "traditional_chinese_medicine": 0.5459459459459459, + "virology": 0.650887573964497, + "world_history": 0.6956521739130435, + "world_religions": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.3366430668278363, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.3918918918918919, + "ancient_chinese": 0.2865853658536585, + "arts": 0.4625, + "astronomy": 0.24242424242424243, + "business_ethics": 0.32057416267942584, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.3281733746130031, + "chinese_literature": 0.35784313725490197, + "chinese_teacher_qualification": 0.36312849162011174, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.4766355140186916, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.2777777777777778, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.326007326007326, + "computer_science": 0.3480392156862745, + "computer_security": 0.3508771929824561, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.2158273381294964, + "economics": 0.29559748427672955, + "education": 0.34355828220858897, + "electrical_engineering": 0.29651162790697677, + "elementary_chinese": 0.35714285714285715, + "elementary_commonsense": 0.35858585858585856, + "elementary_information_and_technology": 0.4327731092436975, + "elementary_mathematics": 0.3217391304347826, + "ethnology": 0.2740740740740741, + "food_science": 0.34965034965034963, + "genetics": 0.30113636363636365, + "global_facts": 0.30201342281879195, + "high_school_biology": 0.2958579881656805, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.34965034965034963, + "human_sexuality": 0.36507936507936506, + "international_law": 0.33513513513513515, + "journalism": 0.3488372093023256, + "jurisprudence": 0.35523114355231145, + "legal_and_moral_basis": 0.4953271028037383, + "logical": 0.35772357723577236, + "machine_learning": 0.28688524590163933, + "management": 0.35714285714285715, + "marketing": 0.37777777777777777, + "marxist_theory": 0.3439153439153439, + "modern_chinese": 0.25, + "nutrition": 0.3931034482758621, + "philosophy": 0.3523809523809524, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.3191489361702128, + "professional_psychology": 0.38362068965517243, + "public_relations": 0.3103448275862069, + "security_study": 0.3851851851851852, + "sociology": 0.42920353982300885, + "sports_science": 0.43636363636363634, + "traditional_chinese_medicine": 0.41621621621621624, + "virology": 0.33727810650887574, + "world_history": 0.2857142857142857, + "world_religions": 0.36875 + } + }, + "prompt_3": { + "accuracy": 0.467881194957693, + "category_acc": { + "agronomy": 0.514792899408284, + "anatomy": 0.4864864864864865, + "ancient_chinese": 0.2926829268292683, + "arts": 0.6625, + "astronomy": 0.3696969696969697, + "business_ethics": 0.49282296650717705, + "chinese_civil_service_exam": 0.375, + "chinese_driving_rule": 0.6335877862595419, + "chinese_food_culture": 0.36764705882352944, + "chinese_foreign_policy": 0.35514018691588783, + "chinese_history": 0.43962848297213625, + "chinese_literature": 0.45098039215686275, + "chinese_teacher_qualification": 0.553072625698324, + "clinical_knowledge": 0.4641350210970464, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.5887850467289719, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.3333333333333333, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.44339622641509435, + "college_medicine": 0.5164835164835165, + "computer_science": 0.4362745098039216, + "computer_security": 0.47953216374269003, + "conceptual_physics": 0.42857142857142855, + "construction_project_management": 0.31654676258992803, + "economics": 0.5849056603773585, + "education": 0.5030674846625767, + "electrical_engineering": 0.436046511627907, + "elementary_chinese": 0.47619047619047616, + "elementary_commonsense": 0.5454545454545454, + "elementary_information_and_technology": 0.6638655462184874, + "elementary_mathematics": 0.34782608695652173, + "ethnology": 0.5259259259259259, + "food_science": 0.5034965034965035, + "genetics": 0.4659090909090909, + "global_facts": 0.5033557046979866, + "high_school_biology": 0.39644970414201186, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.4406779661016949, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.4195804195804196, + "human_sexuality": 0.5238095238095238, + "international_law": 0.42702702702702705, + "journalism": 0.46511627906976744, + "jurisprudence": 0.44525547445255476, + "legal_and_moral_basis": 0.6401869158878505, + "logical": 0.34146341463414637, + "machine_learning": 0.3524590163934426, + "management": 0.5380952380952381, + "marketing": 0.5722222222222222, + "marxist_theory": 0.5978835978835979, + "modern_chinese": 0.3706896551724138, + "nutrition": 0.47586206896551725, + "philosophy": 0.580952380952381, + "professional_accounting": 0.5314285714285715, + "professional_law": 0.35545023696682465, + "professional_medicine": 0.47074468085106386, + "professional_psychology": 0.47413793103448276, + "public_relations": 0.47701149425287354, + "security_study": 0.5555555555555556, + "sociology": 0.5221238938053098, + "sports_science": 0.43636363636363634, + "traditional_chinese_medicine": 0.4918918918918919, + "virology": 0.6035502958579881, + "world_history": 0.391304347826087, + "world_religions": 0.575 + } + }, + "prompt_4": { + "accuracy": 0.5605249525125194, + "category_acc": { + "agronomy": 0.5443786982248521, + "anatomy": 0.4527027027027027, + "ancient_chinese": 0.3780487804878049, + "arts": 0.8125, + "astronomy": 0.40606060606060607, + "business_ethics": 0.583732057416268, + "chinese_civil_service_exam": 0.475, + "chinese_driving_rule": 0.7099236641221374, + "chinese_food_culture": 0.5367647058823529, + "chinese_foreign_policy": 0.616822429906542, + "chinese_history": 0.7306501547987616, + "chinese_literature": 0.5343137254901961, + "chinese_teacher_qualification": 0.7150837988826816, + "clinical_knowledge": 0.5147679324894515, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.7289719626168224, + "college_engineering_hydrology": 0.5471698113207547, + "college_law": 0.5185185185185185, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.5860805860805861, + "computer_science": 0.5490196078431373, + "computer_security": 0.5964912280701754, + "conceptual_physics": 0.5986394557823129, + "construction_project_management": 0.4028776978417266, + "economics": 0.5911949685534591, + "education": 0.6503067484662577, + "electrical_engineering": 0.5290697674418605, + "elementary_chinese": 0.5634920634920635, + "elementary_commonsense": 0.5959595959595959, + "elementary_information_and_technology": 0.6512605042016807, + "elementary_mathematics": 0.3826086956521739, + "ethnology": 0.6074074074074074, + "food_science": 0.5734265734265734, + "genetics": 0.48295454545454547, + "global_facts": 0.6174496644295302, + "high_school_biology": 0.4970414201183432, + "high_school_chemistry": 0.3787878787878788, + "high_school_geography": 0.5847457627118644, + "high_school_mathematics": 0.34146341463414637, + "high_school_physics": 0.42727272727272725, + "high_school_politics": 0.5804195804195804, + "human_sexuality": 0.5634920634920635, + "international_law": 0.42702702702702705, + "journalism": 0.5523255813953488, + "jurisprudence": 0.5766423357664233, + "legal_and_moral_basis": 0.8457943925233645, + "logical": 0.44715447154471544, + "machine_learning": 0.47540983606557374, + "management": 0.6523809523809524, + "marketing": 0.6, + "marxist_theory": 0.6666666666666666, + "modern_chinese": 0.3879310344827586, + "nutrition": 0.5172413793103449, + "philosophy": 0.6190476190476191, + "professional_accounting": 0.6171428571428571, + "professional_law": 0.46919431279620855, + "professional_medicine": 0.5053191489361702, + "professional_psychology": 0.6551724137931034, + "public_relations": 0.5057471264367817, + "security_study": 0.6222222222222222, + "sociology": 0.5796460176991151, + "sports_science": 0.5575757575757576, + "traditional_chinese_medicine": 0.518918918918919, + "virology": 0.6272189349112426, + "world_history": 0.7080745341614907, + "world_religions": 0.70625 + } + }, + "prompt_5": { + "accuracy": 0.4585563805905716, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.41216216216216217, + "ancient_chinese": 0.3231707317073171, + "arts": 0.68125, + "astronomy": 0.3878787878787879, + "business_ethics": 0.4258373205741627, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.6412213740458015, + "chinese_food_culture": 0.5147058823529411, + "chinese_foreign_policy": 0.4392523364485981, + "chinese_history": 0.541795665634675, + "chinese_literature": 0.4068627450980392, + "chinese_teacher_qualification": 0.6145251396648045, + "clinical_knowledge": 0.4936708860759494, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.5327102803738317, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.4166666666666667, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.4835164835164835, + "computer_science": 0.49019607843137253, + "computer_security": 0.543859649122807, + "conceptual_physics": 0.48299319727891155, + "construction_project_management": 0.35251798561151076, + "economics": 0.4088050314465409, + "education": 0.558282208588957, + "electrical_engineering": 0.4418604651162791, + "elementary_chinese": 0.5238095238095238, + "elementary_commonsense": 0.4898989898989899, + "elementary_information_and_technology": 0.634453781512605, + "elementary_mathematics": 0.3652173913043478, + "ethnology": 0.48148148148148145, + "food_science": 0.4755244755244755, + "genetics": 0.44886363636363635, + "global_facts": 0.4697986577181208, + "high_school_biology": 0.46153846153846156, + "high_school_chemistry": 0.4015151515151515, + "high_school_geography": 0.3644067796610169, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.4195804195804196, + "human_sexuality": 0.4365079365079365, + "international_law": 0.3783783783783784, + "journalism": 0.436046511627907, + "jurisprudence": 0.40145985401459855, + "legal_and_moral_basis": 0.5794392523364486, + "logical": 0.44715447154471544, + "machine_learning": 0.4098360655737705, + "management": 0.5238095238095238, + "marketing": 0.5277777777777778, + "marxist_theory": 0.47619047619047616, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.43448275862068964, + "philosophy": 0.44761904761904764, + "professional_accounting": 0.4514285714285714, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.38563829787234044, + "professional_psychology": 0.5474137931034483, + "public_relations": 0.41954022988505746, + "security_study": 0.4666666666666667, + "sociology": 0.5, + "sports_science": 0.46060606060606063, + "traditional_chinese_medicine": 0.4810810810810811, + "virology": 0.5502958579881657, + "world_history": 0.484472049689441, + "world_religions": 0.5625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.3939393939393939 + }, + "prompt_4": { + "accuracy": 0.24242424242424243 + }, + "prompt_5": { + "accuracy": 0.3333333333333333 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25681818181818183 + }, + "prompt_2": { + "accuracy": 0.2636363636363636 + }, + "prompt_3": { + "accuracy": 0.1590909090909091 + }, + "prompt_4": { + "accuracy": 0.45454545454545453 + }, + "prompt_5": { + "accuracy": 0.3386363636363636 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3586440677966102 + }, + "prompt_2": { + "accuracy": 0.3586440677966102 + }, + "prompt_3": { + "accuracy": 0.3338983050847458 + }, + "prompt_4": { + "accuracy": 0.3501694915254237 + }, + "prompt_5": { + "accuracy": 0.3528813559322034 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8268511593118923 + }, + "prompt_2": { + "accuracy": 0.8171278982797308 + }, + "prompt_3": { + "accuracy": 0.805908750934929 + }, + "prompt_4": { + "accuracy": 0.8216155572176514 + }, + "prompt_5": { + "accuracy": 0.7378459237097981 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6286134247917687 + }, + "prompt_2": { + "accuracy": 0.7280744732974033 + }, + "prompt_3": { + "accuracy": 0.7594316511513963 + }, + "prompt_4": { + "accuracy": 0.6810387065164135 + }, + "prompt_5": { + "accuracy": 0.6408623223909848 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2447230860779559, + "rouge2": 0.08991768211374013, + "rougeL": 0.18374994878348822, + "avg_rouge": 0.17279690565839476 + }, + "prompt_2": { + "rouge1": 0.20909384131086023, + "rouge2": 0.0743162027862913, + "rougeL": 0.1515914962350016, + "avg_rouge": 0.14500051344405104 + }, + "prompt_3": { + "rouge1": 0.23902514549953788, + "rouge2": 0.08228941845235581, + "rougeL": 0.18040610814000127, + "avg_rouge": 0.16724022403063166 + }, + "prompt_4": { + "rouge1": 0.24586842516716625, + "rouge2": 0.08976234321614976, + "rougeL": 0.18613606729886192, + "avg_rouge": 0.17392227856072598 + }, + "prompt_5": { + "rouge1": 0.2432731500007881, + "rouge2": 0.08604339770852643, + "rougeL": 0.18115184348718974, + "avg_rouge": 0.17015613039883473 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2052743262891212, + "rouge2": 0.06203995326141301, + "rougeL": 0.1501559303457005, + "avg_rouge": 0.13915673663207825 + }, + "prompt_2": { + "rouge1": 0.2008063479665273, + "rouge2": 0.05899298806556013, + "rougeL": 0.14606807609453126, + "avg_rouge": 0.13528913737553958 + }, + "prompt_3": { + "rouge1": 0.18365921197277854, + "rouge2": 0.05688731369243091, + "rougeL": 0.13294583268732826, + "avg_rouge": 0.12449745278417923 + }, + "prompt_4": { + "rouge1": 0.21645895715152577, + "rouge2": 0.06319194822999684, + "rougeL": 0.15873792994084954, + "avg_rouge": 0.14612961177412406 + }, + "prompt_5": { + "rouge1": 0.1994949421207918, + "rouge2": 0.060184436908365495, + "rougeL": 0.14537280323595006, + "avg_rouge": 0.1350173940883691 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7924311926605505 + }, + "prompt_2": { + "accuracy": 0.8061926605504587 + }, + "prompt_3": { + "accuracy": 0.7958715596330275 + }, + "prompt_4": { + "accuracy": 0.8107798165137615 + }, + "prompt_5": { + "accuracy": 0.8142201834862385 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46308724832214765 + }, + "prompt_2": { + "accuracy": 0.6011505273250239 + }, + "prompt_3": { + "accuracy": 0.6922339405560882 + }, + "prompt_4": { + "accuracy": 0.45445829338446786 + }, + "prompt_5": { + "accuracy": 0.663470757430489 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5575 + }, + "prompt_2": { + "accuracy": 0.5695 + }, + "prompt_3": { + "accuracy": 0.5685 + }, + "prompt_4": { + "accuracy": 0.5155 + }, + "prompt_5": { + "accuracy": 0.565 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4145 + }, + "prompt_2": { + "accuracy": 0.4365 + }, + "prompt_3": { + "accuracy": 0.4595 + }, + "prompt_4": { + "accuracy": 0.4175 + }, + "prompt_5": { + "accuracy": 0.4665 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5725 + }, + "prompt_2": { + "accuracy": 0.496 + }, + "prompt_3": { + "accuracy": 0.5955 + }, + "prompt_4": { + "accuracy": 0.5365 + }, + "prompt_5": { + "accuracy": 0.51 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4647887323943662 + }, + "prompt_2": { + "accuracy": 0.4647887323943662 + }, + "prompt_3": { + "accuracy": 0.4788732394366197 + }, + "prompt_4": { + "accuracy": 0.4507042253521127 + }, + "prompt_5": { + "accuracy": 0.5070422535211268 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5090252707581228 + }, + "prompt_2": { + "accuracy": 0.5090252707581228 + }, + "prompt_3": { + "accuracy": 0.6028880866425993 + }, + "prompt_4": { + "accuracy": 0.5234657039711191 + }, + "prompt_5": { + "accuracy": 0.5740072202166066 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46078431372549017 + }, + "prompt_2": { + "accuracy": 0.4803921568627451 + }, + "prompt_3": { + "accuracy": 0.6004901960784313 + }, + "prompt_4": { + "accuracy": 0.49754901960784315 + }, + "prompt_5": { + "accuracy": 0.5171568627450981 + } } }, "five_shot": { @@ -8116,53 +69495,1733 @@ "model_link": "https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.5028571428571428, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.6733333333333333, + "Vietnamese": 0.41333333333333333, + "Spanish": 0.58, + "Indonesian": 0.4, + "Filipino": 0.4533333333333333, + "Chinese": 0.6666666666666666 + }, + "consistency_score_2": 0.45079365079365086, + "consistency_score_3": 0.2624761904761905, + "consistency_score_4": 0.17447619047619048, + "consistency_score_5": 0.1273015873015873, + "consistency_score_6": 0.09904761904761904, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4066666666666667, + "Malay,Vietnamese": 0.37333333333333335, + "Malay,Spanish": 0.34, + "Malay,Indonesian": 0.5733333333333334, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.44, + "English,Spanish": 0.6266666666666667, + "English,Indonesian": 0.43333333333333335, + "English,Filipino": 0.47333333333333333, + "English,Chinese": 0.6733333333333333, + "Vietnamese,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian": 0.38, + "Vietnamese,Filipino": 0.3466666666666667, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.43333333333333335, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.5866666666666667, + "Indonesian,Filipino": 0.44666666666666666, + "Indonesian,Chinese": 0.44666666666666666, + "Filipino,Chinese": 0.46 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22, + "Malay,English,Spanish": 0.25333333333333335, + "Malay,English,Indonesian": 0.2733333333333333, + "Malay,English,Filipino": 0.24666666666666667, + "Malay,English,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish": 0.2, + "Malay,Vietnamese,Indonesian": 0.26, + "Malay,Vietnamese,Filipino": 0.18666666666666668, + "Malay,Vietnamese,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian": 0.25333333333333335, + "Malay,Spanish,Filipino": 0.2, + "Malay,Spanish,Chinese": 0.22, + "Malay,Indonesian,Filipino": 0.30666666666666664, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Indonesian": 0.20666666666666667, + "English,Vietnamese,Filipino": 0.23333333333333334, + "English,Vietnamese,Chinese": 0.34, + "English,Spanish,Indonesian": 0.3, + "English,Spanish,Filipino": 0.32, + "English,Spanish,Chinese": 0.5066666666666667, + "English,Indonesian,Filipino": 0.26, + "English,Indonesian,Chinese": 0.35333333333333333, + "English,Filipino,Chinese": 0.37333333333333335, + "Vietnamese,Spanish,Indonesian": 0.21333333333333335, + "Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Chinese": 0.22, + "Spanish,Indonesian,Filipino": 0.22, + "Spanish,Indonesian,Chinese": 0.30666666666666664, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.26 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian": 0.15333333333333332, + "Malay,English,Vietnamese,Filipino": 0.14, + "Malay,English,Vietnamese,Chinese": 0.18, + "Malay,English,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Spanish,Filipino": 0.17333333333333334, + "Malay,English,Spanish,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino": 0.18666666666666668, + "Malay,English,Indonesian,Chinese": 0.22, + "Malay,English,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.16, + "English,Vietnamese,Spanish,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "English,Spanish,Indonesian,Chinese": 0.2733333333333333, + "English,Spanish,Filipino,Chinese": 0.2866666666666667, + "English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.16, + "Malay,English,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.08666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08 + } + }, + "AC3_2": 0.4754042228909898, + "AC3_3": 0.34491644737013555, + "AC3_4": 0.25906475972392484, + "AC3_5": 0.20316948539417135, + "AC3_6": 0.16549728749510836, + "AC3_7": 0.13803921566259134 + }, + "prompt_2": { + "overall_acc": 0.48857142857142855, + "language_acc": { + "Malay": 0.38, + "English": 0.7066666666666667, + "Vietnamese": 0.36666666666666664, + "Spanish": 0.6066666666666667, + "Indonesian": 0.38, + "Filipino": 0.37333333333333335, + "Chinese": 0.6066666666666667 + }, + "consistency_score_2": 0.4717460317460319, + "consistency_score_3": 0.28590476190476194, + "consistency_score_4": 0.19428571428571434, + "consistency_score_5": 0.1415873015873016, + "consistency_score_6": 0.10857142857142858, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.4533333333333333, + "Malay,Vietnamese": 0.4533333333333333, + "Malay,Spanish": 0.43333333333333335, + "Malay,Indonesian": 0.56, + "Malay,Filipino": 0.4866666666666667, + "Malay,Chinese": 0.4066666666666667, + "English,Vietnamese": 0.4, + "English,Spanish": 0.6333333333333333, + "English,Indonesian": 0.46, + "English,Filipino": 0.5, + "English,Chinese": 0.6733333333333333, + "Vietnamese,Spanish": 0.44, + "Vietnamese,Indonesian": 0.41333333333333333, + "Vietnamese,Filipino": 0.3466666666666667, + "Vietnamese,Chinese": 0.38666666666666666, + "Spanish,Indonesian": 0.49333333333333335, + "Spanish,Filipino": 0.4, + "Spanish,Chinese": 0.6066666666666667, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.4533333333333333, + "Filipino,Chinese": 0.44666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.24, + "Malay,English,Spanish": 0.32666666666666666, + "Malay,English,Indonesian": 0.3, + "Malay,English,Filipino": 0.29333333333333333, + "Malay,English,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Spanish": 0.26666666666666666, + "Malay,Vietnamese,Indonesian": 0.30666666666666664, + "Malay,Vietnamese,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Chinese": 0.22666666666666666, + "Malay,Spanish,Indonesian": 0.3, + "Malay,Spanish,Filipino": 0.24, + "Malay,Spanish,Chinese": 0.30666666666666664, + "Malay,Indonesian,Filipino": 0.31333333333333335, + "Malay,Indonesian,Chinese": 0.2733333333333333, + "Malay,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Filipino": 0.22, + "English,Vietnamese,Chinese": 0.32, + "English,Spanish,Indonesian": 0.3466666666666667, + "English,Spanish,Filipino": 0.32666666666666666, + "English,Spanish,Chinese": 0.5066666666666667, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.36, + "English,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Spanish,Indonesian": 0.24666666666666667, + "Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Vietnamese,Spanish,Chinese": 0.3, + "Vietnamese,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese": 0.22, + "Vietnamese,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino": 0.24666666666666667, + "Spanish,Indonesian,Chinese": 0.32666666666666666, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.26666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.2, + "Malay,English,Vietnamese,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Chinese": 0.2, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.2, + "Malay,English,Spanish,Chinese": 0.2866666666666667, + "Malay,English,Indonesian,Filipino": 0.2, + "Malay,English,Indonesian,Chinese": 0.24, + "Malay,English,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.2, + "Malay,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.22, + "Malay,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Indonesian,Chinese": 0.29333333333333333, + "English,Spanish,Filipino,Chinese": 0.28, + "English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.16, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + } + }, + "AC3_2": 0.48001133407044044, + "AC3_3": 0.3607209612351328, + "AC3_4": 0.27801554090369773, + "AC3_5": 0.21954947819473866, + "AC3_6": 0.17766233763258563, + "AC3_7": 0.14721854302076498 + }, + "prompt_3": { + "overall_acc": 0.4742857142857143, + "language_acc": { + "Malay": 0.3333333333333333, + "English": 0.66, + "Vietnamese": 0.38, + "Spanish": 0.5666666666666667, + "Indonesian": 0.4, + "Filipino": 0.36666666666666664, + "Chinese": 0.6133333333333333 + }, + "consistency_score_2": 0.4666666666666666, + "consistency_score_3": 0.2864761904761904, + "consistency_score_4": 0.20247619047619042, + "consistency_score_5": 0.15555555555555556, + "consistency_score_6": 0.12666666666666668, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.43333333333333335, + "Malay,Vietnamese": 0.37333333333333335, + "Malay,Spanish": 0.43333333333333335, + "Malay,Indonesian": 0.54, + "Malay,Filipino": 0.46, + "Malay,Chinese": 0.38666666666666666, + "English,Vietnamese": 0.44, + "English,Spanish": 0.6533333333333333, + "English,Indonesian": 0.5, + "English,Filipino": 0.4533333333333333, + "English,Chinese": 0.68, + "Vietnamese,Spanish": 0.44, + "Vietnamese,Indonesian": 0.44, + "Vietnamese,Filipino": 0.36666666666666664, + "Vietnamese,Chinese": 0.42, + "Spanish,Indonesian": 0.5266666666666666, + "Spanish,Filipino": 0.38666666666666666, + "Spanish,Chinese": 0.62, + "Indonesian,Filipino": 0.42, + "Indonesian,Chinese": 0.4533333333333333, + "Filipino,Chinese": 0.37333333333333335 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.22, + "Malay,English,Spanish": 0.31333333333333335, + "Malay,English,Indonesian": 0.32, + "Malay,English,Filipino": 0.28, + "Malay,English,Chinese": 0.32, + "Malay,Vietnamese,Spanish": 0.24, + "Malay,Vietnamese,Indonesian": 0.2733333333333333, + "Malay,Vietnamese,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Chinese": 0.22, + "Malay,Spanish,Indonesian": 0.32666666666666666, + "Malay,Spanish,Filipino": 0.24, + "Malay,Spanish,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino": 0.30666666666666664, + "Malay,Indonesian,Chinese": 0.26666666666666666, + "Malay,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish": 0.32, + "English,Vietnamese,Indonesian": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.23333333333333334, + "English,Vietnamese,Chinese": 0.34, + "English,Spanish,Indonesian": 0.3933333333333333, + "English,Spanish,Filipino": 0.31333333333333335, + "English,Spanish,Chinese": 0.5133333333333333, + "English,Indonesian,Filipino": 0.3, + "English,Indonesian,Chinese": 0.38, + "English,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Vietnamese,Spanish,Filipino": 0.22, + "Vietnamese,Spanish,Chinese": 0.3, + "Vietnamese,Indonesian,Filipino": 0.2, + "Vietnamese,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Filipino,Chinese": 0.20666666666666667, + "Spanish,Indonesian,Filipino": 0.26, + "Spanish,Indonesian,Chinese": 0.36666666666666664, + "Spanish,Filipino,Chinese": 0.28, + "Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.18, + "Malay,English,Vietnamese,Filipino": 0.16, + "Malay,English,Vietnamese,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian": 0.24666666666666667, + "Malay,English,Spanish,Filipino": 0.21333333333333335, + "Malay,English,Spanish,Chinese": 0.26, + "Malay,English,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Indonesian,Chinese": 0.24, + "Malay,English,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.15333333333333332, + "Malay,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.2, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.22, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino": 0.16, + "English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Filipino,Chinese": 0.2, + "English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "English,Spanish,Indonesian,Chinese": 0.3333333333333333, + "English,Spanish,Filipino,Chinese": 0.26, + "English,Indonesian,Filipino,Chinese": 0.22, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.18, + "Malay,English,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.4704453440795579, + "AC3_3": 0.35719865507859266, + "AC3_4": 0.28379719347662113, + "AC3_5": 0.23427419351119136, + "AC3_6": 0.19993660852457473, + "AC3_7": 0.1741639343962505 + }, + "prompt_4": { + "overall_acc": 0.48857142857142855, + "language_acc": { + "Malay": 0.29333333333333333, + "English": 0.6933333333333334, + "Vietnamese": 0.38, + "Spanish": 0.62, + "Indonesian": 0.4066666666666667, + "Filipino": 0.4, + "Chinese": 0.6266666666666667 + }, + "consistency_score_2": 0.46761904761904766, + "consistency_score_3": 0.2822857142857143, + "consistency_score_4": 0.1958095238095238, + "consistency_score_5": 0.14952380952380956, + "consistency_score_6": 0.12285714285714286, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.41333333333333333, + "Malay,Vietnamese": 0.38, + "Malay,Spanish": 0.4266666666666667, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.4066666666666667, + "English,Spanish": 0.6666666666666666, + "English,Indonesian": 0.44, + "English,Filipino": 0.48, + "English,Chinese": 0.68, + "Vietnamese,Spanish": 0.44, + "Vietnamese,Indonesian": 0.4066666666666667, + "Vietnamese,Filipino": 0.3, + "Vietnamese,Chinese": 0.46, + "Spanish,Indonesian": 0.5066666666666667, + "Spanish,Filipino": 0.41333333333333333, + "Spanish,Chinese": 0.66, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.4866666666666667, + "Filipino,Chinese": 0.4 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.21333333333333335, + "Malay,English,Spanish": 0.30666666666666664, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.2733333333333333, + "Malay,English,Chinese": 0.3, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.26, + "Malay,Vietnamese,Filipino": 0.2, + "Malay,Vietnamese,Chinese": 0.22, + "Malay,Spanish,Indonesian": 0.30666666666666664, + "Malay,Spanish,Filipino": 0.25333333333333335, + "Malay,Spanish,Chinese": 0.26666666666666666, + "Malay,Indonesian,Filipino": 0.30666666666666664, + "Malay,Indonesian,Chinese": 0.28, + "Malay,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish": 0.30666666666666664, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Filipino": 0.20666666666666667, + "English,Vietnamese,Chinese": 0.34, + "English,Spanish,Indonesian": 0.34, + "English,Spanish,Filipino": 0.3466666666666667, + "English,Spanish,Chinese": 0.5466666666666666, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.36666666666666664, + "English,Filipino,Chinese": 0.36, + "Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Vietnamese,Spanish,Chinese": 0.3333333333333333, + "Vietnamese,Indonesian,Filipino": 0.2, + "Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino": 0.2733333333333333, + "Spanish,Indonesian,Chinese": 0.37333333333333335, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.16666666666666666, + "Malay,English,Vietnamese,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Chinese": 0.18, + "Malay,English,Spanish,Indonesian": 0.22, + "Malay,English,Spanish,Filipino": 0.22, + "Malay,English,Spanish,Chinese": 0.24, + "Malay,English,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Indonesian,Chinese": 0.24, + "Malay,English,Filipino,Chinese": 0.2, + "Malay,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Filipino": 0.16, + "Malay,Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.18666666666666668, + "Malay,Spanish,Indonesian,Chinese": 0.22, + "Malay,Spanish,Filipino,Chinese": 0.18, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.28, + "English,Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.2, + "English,Vietnamese,Filipino,Chinese": 0.18, + "English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "English,Spanish,Indonesian,Chinese": 0.3, + "English,Spanish,Filipino,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "Vietnamese,Spanish,Indonesian,Chinese": 0.22, + "Vietnamese,Spanish,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian,Filipino": 0.16666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.16, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.47786568008662045, + "AC3_3": 0.35782696172420436, + "AC3_4": 0.27957218396773775, + "AC3_5": 0.22897228141400972, + "AC3_6": 0.19634178901995744, + "AC3_7": 0.17510399997058254 + }, + "prompt_5": { + "overall_acc": 0.49619047619047624, + "language_acc": { + "Malay": 0.3466666666666667, + "English": 0.6933333333333334, + "Vietnamese": 0.3933333333333333, + "Spanish": 0.6266666666666667, + "Indonesian": 0.3933333333333333, + "Filipino": 0.3933333333333333, + "Chinese": 0.6266666666666667 + }, + "consistency_score_2": 0.45396825396825397, + "consistency_score_3": 0.2704761904761904, + "consistency_score_4": 0.18723809523809523, + "consistency_score_5": 0.14253968253968252, + "consistency_score_6": 0.1161904761904762, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.42, + "Malay,Vietnamese": 0.36, + "Malay,Spanish": 0.42, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Filipino": 0.47333333333333333, + "Malay,Chinese": 0.3933333333333333, + "English,Vietnamese": 0.3933333333333333, + "English,Spanish": 0.6933333333333334, + "English,Indonesian": 0.44, + "English,Filipino": 0.43333333333333335, + "English,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.4266666666666667, + "Vietnamese,Indonesian": 0.38666666666666666, + "Vietnamese,Filipino": 0.36, + "Vietnamese,Chinese": 0.3466666666666667, + "Spanish,Indonesian": 0.48, + "Spanish,Filipino": 0.4066666666666667, + "Spanish,Chinese": 0.6266666666666667, + "Indonesian,Filipino": 0.42, + "Indonesian,Chinese": 0.4533333333333333, + "Filipino,Chinese": 0.4 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.19333333333333333, + "Malay,English,Spanish": 0.32, + "Malay,English,Indonesian": 0.2866666666666667, + "Malay,English,Filipino": 0.24, + "Malay,English,Chinese": 0.32, + "Malay,Vietnamese,Spanish": 0.22666666666666666, + "Malay,Vietnamese,Indonesian": 0.26, + "Malay,Vietnamese,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian": 0.3, + "Malay,Spanish,Filipino": 0.24, + "Malay,Spanish,Chinese": 0.32, + "Malay,Indonesian,Filipino": 0.3, + "Malay,Indonesian,Chinese": 0.2733333333333333, + "Malay,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish": 0.32, + "English,Vietnamese,Indonesian": 0.20666666666666667, + "English,Vietnamese,Filipino": 0.22, + "English,Vietnamese,Chinese": 0.28, + "English,Spanish,Indonesian": 0.36666666666666664, + "English,Spanish,Filipino": 0.32, + "English,Spanish,Chinese": 0.5333333333333333, + "English,Indonesian,Filipino": 0.24, + "English,Indonesian,Chinese": 0.3466666666666667, + "English,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino": 0.24, + "Spanish,Indonesian,Chinese": 0.35333333333333333, + "Spanish,Filipino,Chinese": 0.29333333333333333, + "Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.17333333333333334, + "Malay,English,Vietnamese,Indonesian": 0.16, + "Malay,English,Vietnamese,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Chinese": 0.16, + "Malay,English,Spanish,Indonesian": 0.24, + "Malay,English,Spanish,Filipino": 0.2, + "Malay,English,Spanish,Chinese": 0.2733333333333333, + "Malay,English,Indonesian,Filipino": 0.17333333333333334, + "Malay,English,Indonesian,Chinese": 0.23333333333333334, + "Malay,English,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Chinese": 0.18, + "Malay,Vietnamese,Indonesian,Filipino": 0.16, + "Malay,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.18, + "Malay,Spanish,Indonesian,Chinese": 0.22666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.18, + "English,Vietnamese,Filipino,Chinese": 0.16, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.31333333333333335, + "English,Spanish,Filipino,Chinese": 0.25333333333333335, + "English,Indonesian,Filipino,Chinese": 0.18, + "Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.18 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.16, + "Malay,English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.16, + "Malay,English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.12, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.47414125016886316, + "AC3_3": 0.35010706886885623, + "AC3_4": 0.271881403580752, + "AC3_5": 0.22146075921932773, + "AC3_6": 0.188290009596743, + "AC3_7": 0.1664536740934862 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4342532467532468, + "language_acc": { + "English": 0.5625, + "Vietnamese": 0.4034090909090909, + "Chinese": 0.5284090909090909, + "Indonesian": 0.3806818181818182, + "Filipino": 0.29545454545454547, + "Spanish": 0.4602272727272727, + "Malay": 0.4090909090909091 + }, + "consistency_score_2": 0.4724025974025975, + "consistency_score_3": 0.27386363636363636, + "consistency_score_4": 0.17467532467532465, + "consistency_score_5": 0.11688311688311687, + "consistency_score_6": 0.08035714285714286, + "consistency_score_7": 0.056818181818181816, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4715909090909091, + "English,Chinese": 0.6022727272727273, + "English,Indonesian": 0.5227272727272727, + "English,Filipino": 0.35795454545454547, + "English,Spanish": 0.6761363636363636, + "English,Malay": 0.4375, + "Vietnamese,Chinese": 0.4090909090909091, + "Vietnamese,Indonesian": 0.4772727272727273, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.4375, + "Vietnamese,Malay": 0.4375, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,Spanish": 0.5397727272727273, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.3977272727272727, + "Indonesian,Spanish": 0.5625, + "Indonesian,Malay": 0.5681818181818182, + "Filipino,Spanish": 0.4147727272727273, + "Filipino,Malay": 0.4034090909090909, + "Spanish,Malay": 0.5227272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.30113636363636365, + "English,Vietnamese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Filipino": 0.19886363636363635, + "English,Vietnamese,Spanish": 0.3409090909090909, + "English,Vietnamese,Malay": 0.23295454545454544, + "English,Chinese,Indonesian": 0.3352272727272727, + "English,Chinese,Filipino": 0.22727272727272727, + "English,Chinese,Spanish": 0.45454545454545453, + "English,Chinese,Malay": 0.3125, + "English,Indonesian,Filipino": 0.22727272727272727, + "English,Indonesian,Spanish": 0.4147727272727273, + "English,Indonesian,Malay": 0.3125, + "English,Filipino,Spanish": 0.2727272727272727, + "English,Filipino,Malay": 0.19318181818181818, + "English,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "Vietnamese,Chinese,Filipino": 0.18181818181818182, + "Vietnamese,Chinese,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Filipino,Spanish": 0.19318181818181818, + "Vietnamese,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Spanish,Malay": 0.26136363636363635, + "Chinese,Indonesian,Filipino": 0.1875, + "Chinese,Indonesian,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Filipino,Spanish": 0.2159090909090909, + "Chinese,Filipino,Malay": 0.19886363636363635, + "Chinese,Spanish,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish": 0.2556818181818182, + "Indonesian,Filipino,Malay": 0.2556818181818182, + "Indonesian,Spanish,Malay": 0.3693181818181818, + "Filipino,Spanish,Malay": 0.24431818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.21022727272727273, + "English,Vietnamese,Chinese,Filipino": 0.13068181818181818, + "English,Vietnamese,Chinese,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Filipino": 0.14204545454545456, + "English,Vietnamese,Indonesian,Spanish": 0.24431818181818182, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.1875, + "English,Chinese,Indonesian,Filipino": 0.16477272727272727, + "English,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Chinese,Filipino,Spanish": 0.1875, + "English,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Spanish,Malay": 0.26136363636363635, + "English,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.125, + "Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.125, + "Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.18181818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.125, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + } + }, + "AC3_2": 0.45252531706806454, + "AC3_3": 0.3358941894402897, + "AC3_4": 0.24913702662210568, + "AC3_5": 0.18418988960565957, + "AC3_6": 0.1356185218303386, + "AC3_7": 0.10048835460012294 + }, + "prompt_2": { + "overall_acc": 0.4358766233766234, + "language_acc": { + "English": 0.5625, + "Vietnamese": 0.36363636363636365, + "Chinese": 0.5625, + "Indonesian": 0.375, + "Filipino": 0.32386363636363635, + "Spanish": 0.48863636363636365, + "Malay": 0.375 + }, + "consistency_score_2": 0.4756493506493507, + "consistency_score_3": 0.2798701298701299, + "consistency_score_4": 0.18392857142857144, + "consistency_score_5": 0.12987012987012983, + "consistency_score_6": 0.09659090909090909, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.42045454545454547, + "English,Chinese": 0.5681818181818182, + "English,Indonesian": 0.48295454545454547, + "English,Filipino": 0.3977272727272727, + "English,Spanish": 0.6363636363636364, + "English,Malay": 0.4318181818181818, + "Vietnamese,Chinese": 0.42613636363636365, + "Vietnamese,Indonesian": 0.4772727272727273, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.5284090909090909, + "Vietnamese,Malay": 0.4772727272727273, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Filipino": 0.42045454545454547, + "Chinese,Spanish": 0.5511363636363636, + "Chinese,Malay": 0.42045454545454547, + "Indonesian,Filipino": 0.42045454545454547, + "Indonesian,Spanish": 0.5397727272727273, + "Indonesian,Malay": 0.5454545454545454, + "Filipino,Spanish": 0.4659090909090909, + "Filipino,Malay": 0.375, + "Spanish,Malay": 0.5227272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2784090909090909, + "English,Vietnamese,Indonesian": 0.2727272727272727, + "English,Vietnamese,Filipino": 0.20454545454545456, + "English,Vietnamese,Spanish": 0.3465909090909091, + "English,Vietnamese,Malay": 0.23295454545454544, + "English,Chinese,Indonesian": 0.3125, + "English,Chinese,Filipino": 0.2727272727272727, + "English,Chinese,Spanish": 0.42613636363636365, + "English,Chinese,Malay": 0.26704545454545453, + "English,Indonesian,Filipino": 0.25, + "English,Indonesian,Spanish": 0.3693181818181818, + "English,Indonesian,Malay": 0.29545454545454547, + "English,Filipino,Spanish": 0.3068181818181818, + "English,Filipino,Malay": 0.17613636363636365, + "English,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian": 0.26136363636363635, + "Vietnamese,Chinese,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Spanish": 0.3181818181818182, + "Vietnamese,Chinese,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Filipino,Spanish": 0.2556818181818182, + "Vietnamese,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Spanish,Malay": 0.32386363636363635, + "Chinese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Indonesian,Spanish": 0.32954545454545453, + "Chinese,Indonesian,Malay": 0.26136363636363635, + "Chinese,Filipino,Spanish": 0.2784090909090909, + "Chinese,Filipino,Malay": 0.19318181818181818, + "Chinese,Spanish,Malay": 0.32386363636363635, + "Indonesian,Filipino,Spanish": 0.2784090909090909, + "Indonesian,Filipino,Malay": 0.23295454545454544, + "Indonesian,Spanish,Malay": 0.3465909090909091, + "Filipino,Spanish,Malay": 0.23863636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Spanish": 0.25, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.23863636363636365, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Chinese,Indonesian,Malay": 0.1875, + "English,Chinese,Filipino,Spanish": 0.22727272727272727, + "English,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Spanish,Malay": 0.24431818181818182, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.125, + "Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.125, + "English,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + } + }, + "AC3_2": 0.4548952827616737, + "AC3_3": 0.3408715348588389, + "AC3_4": 0.25869471671348165, + "AC3_5": 0.2001155228922026, + "AC3_6": 0.15813816515877094, + "AC3_7": 0.12632093222767948 + }, + "prompt_3": { + "overall_acc": 0.42613636363636365, + "language_acc": { + "English": 0.5681818181818182, + "Vietnamese": 0.3522727272727273, + "Chinese": 0.5113636363636364, + "Indonesian": 0.36363636363636365, + "Filipino": 0.3068181818181818, + "Spanish": 0.48863636363636365, + "Malay": 0.39204545454545453 + }, + "consistency_score_2": 0.48241341991341985, + "consistency_score_3": 0.2883116883116883, + "consistency_score_4": 0.18912337662337667, + "consistency_score_5": 0.13230519480519481, + "consistency_score_6": 0.09902597402597405, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4147727272727273, + "English,Chinese": 0.6079545454545454, + "English,Indonesian": 0.5, + "English,Filipino": 0.4147727272727273, + "English,Spanish": 0.6761363636363636, + "English,Malay": 0.4772727272727273, + "Vietnamese,Chinese": 0.375, + "Vietnamese,Indonesian": 0.4602272727272727, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Spanish": 0.4659090909090909, + "Vietnamese,Malay": 0.45454545454545453, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Filipino": 0.42613636363636365, + "Chinese,Spanish": 0.5909090909090909, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.38636363636363635, + "Indonesian,Spanish": 0.5340909090909091, + "Indonesian,Malay": 0.5852272727272727, + "Filipino,Spanish": 0.42045454545454547, + "Filipino,Malay": 0.44886363636363635, + "Spanish,Malay": 0.5340909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.26136363636363635, + "English,Vietnamese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Filipino": 0.20454545454545456, + "English,Vietnamese,Spanish": 0.3181818181818182, + "English,Vietnamese,Malay": 0.23295454545454544, + "English,Chinese,Indonesian": 0.3409090909090909, + "English,Chinese,Filipino": 0.2897727272727273, + "English,Chinese,Spanish": 0.48295454545454547, + "English,Chinese,Malay": 0.3352272727272727, + "English,Indonesian,Filipino": 0.22727272727272727, + "English,Indonesian,Spanish": 0.39204545454545453, + "English,Indonesian,Malay": 0.3465909090909091, + "English,Filipino,Spanish": 0.3125, + "English,Filipino,Malay": 0.24431818181818182, + "English,Spanish,Malay": 0.39204545454545453, + "Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "Vietnamese,Chinese,Filipino": 0.20454545454545456, + "Vietnamese,Chinese,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Malay": 0.23295454545454544, + "Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Vietnamese,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Spanish,Malay": 0.2897727272727273, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.3522727272727273, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish": 0.30113636363636365, + "Chinese,Filipino,Malay": 0.23863636363636365, + "Chinese,Spanish,Malay": 0.35795454545454547, + "Indonesian,Filipino,Spanish": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.2784090909090909, + "Indonesian,Spanish,Malay": 0.375, + "Filipino,Spanish,Malay": 0.2556818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino": 0.125, + "English,Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "English,Vietnamese,Indonesian,Malay": 0.16477272727272727, + "English,Vietnamese,Filipino,Spanish": 0.1534090909090909, + "English,Vietnamese,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino": 0.17613636363636365, + "English,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.25, + "English,Chinese,Filipino,Spanish": 0.25, + "English,Chinese,Filipino,Malay": 0.17045454545454544, + "English,Chinese,Spanish,Malay": 0.29545454545454547, + "English,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Indonesian,Spanish,Malay": 0.29545454545454547, + "English,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.125, + "Vietnamese,Chinese,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, + "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.18181818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.125, + "English,Vietnamese,Chinese,Filipino,Malay": 0.09659090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.22727272727272727, + "English,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.125, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + } + }, + "AC3_2": 0.4525319453722504, + "AC3_3": 0.34393009848318623, + "AC3_4": 0.26197829211381235, + "AC3_5": 0.2019192652915379, + "AC3_6": 0.1607067584350443, + "AC3_7": 0.13406537280290598 + }, + "prompt_4": { + "overall_acc": 0.4342532467532468, + "language_acc": { + "English": 0.5909090909090909, + "Vietnamese": 0.39204545454545453, + "Chinese": 0.5227272727272727, + "Indonesian": 0.35795454545454547, + "Filipino": 0.32386363636363635, + "Spanish": 0.45454545454545453, + "Malay": 0.3977272727272727 + }, + "consistency_score_2": 0.46969696969696967, + "consistency_score_3": 0.2728896103896104, + "consistency_score_4": 0.17646103896103904, + "consistency_score_5": 0.12121212121212123, + "consistency_score_6": 0.08766233766233765, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4318181818181818, + "English,Chinese": 0.6193181818181818, + "English,Indonesian": 0.5056818181818182, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.6761363636363636, + "English,Malay": 0.48295454545454547, + "Vietnamese,Chinese": 0.38636363636363635, + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Filipino": 0.4318181818181818, + "Vietnamese,Spanish": 0.4602272727272727, + "Vietnamese,Malay": 0.42613636363636365, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Filipino": 0.3068181818181818, + "Chinese,Spanish": 0.5625, + "Chinese,Malay": 0.4772727272727273, + "Indonesian,Filipino": 0.4318181818181818, + "Indonesian,Spanish": 0.5397727272727273, + "Indonesian,Malay": 0.5852272727272727, + "Filipino,Spanish": 0.3806818181818182, + "Filipino,Malay": 0.38636363636363635, + "Spanish,Malay": 0.5340909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.2784090909090909, + "English,Vietnamese,Indonesian": 0.2556818181818182, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.32386363636363635, + "English,Vietnamese,Malay": 0.2215909090909091, + "English,Chinese,Indonesian": 0.3409090909090909, + "English,Chinese,Filipino": 0.22727272727272727, + "English,Chinese,Spanish": 0.4715909090909091, + "English,Chinese,Malay": 0.3352272727272727, + "English,Indonesian,Filipino": 0.23863636363636365, + "English,Indonesian,Spanish": 0.39204545454545453, + "English,Indonesian,Malay": 0.3409090909090909, + "English,Filipino,Spanish": 0.26704545454545453, + "English,Filipino,Malay": 0.20454545454545456, + "English,Spanish,Malay": 0.38636363636363635, + "Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "Vietnamese,Chinese,Filipino": 0.18181818181818182, + "Vietnamese,Chinese,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Malay": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Filipino,Spanish": 0.19886363636363635, + "Vietnamese,Filipino,Malay": 0.21022727272727273, + "Vietnamese,Spanish,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Indonesian,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish": 0.2159090909090909, + "Chinese,Filipino,Malay": 0.17045454545454544, + "Chinese,Spanish,Malay": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.2556818181818182, + "Indonesian,Filipino,Malay": 0.25, + "Indonesian,Spanish,Malay": 0.3806818181818182, + "Filipino,Spanish,Malay": 0.21022727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.1875, + "English,Vietnamese,Chinese,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Malay": 0.16477272727272727, + "English,Vietnamese,Filipino,Spanish": 0.14204545454545456, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino": 0.17613636363636365, + "English,Chinese,Indonesian,Spanish": 0.2840909090909091, + "English,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Spanish,Malay": 0.2897727272727273, + "English,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Indonesian,Filipino,Malay": 0.17045454545454544, + "English,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.125, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Chinese,Indonesian,Filipino,Malay": 0.125, + "Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.125, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + } + }, + "AC3_2": 0.451280237222532, + "AC3_3": 0.33516056367721686, + "AC3_4": 0.25094804845480456, + "AC3_5": 0.18952309255765287, + "AC3_6": 0.1458766738425028, + "AC3_7": 0.1178587163813009 + }, + "prompt_5": { + "overall_acc": 0.4415584415584415, + "language_acc": { + "English": 0.5795454545454546, + "Vietnamese": 0.38636363636363635, + "Chinese": 0.5454545454545454, + "Indonesian": 0.375, + "Filipino": 0.3068181818181818, + "Spanish": 0.4772727272727273, + "Malay": 0.42045454545454547 + }, + "consistency_score_2": 0.48971861471861466, + "consistency_score_3": 0.3011363636363637, + "consistency_score_4": 0.20957792207792209, + "consistency_score_5": 0.15773809523809523, + "consistency_score_6": 0.125, + "consistency_score_7": 0.10227272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4318181818181818, + "English,Chinese": 0.625, + "English,Indonesian": 0.48295454545454547, + "English,Filipino": 0.4147727272727273, + "English,Spanish": 0.7045454545454546, + "English,Malay": 0.4943181818181818, + "Vietnamese,Chinese": 0.4431818181818182, + "Vietnamese,Indonesian": 0.4375, + "Vietnamese,Filipino": 0.42045454545454547, + "Vietnamese,Spanish": 0.4602272727272727, + "Vietnamese,Malay": 0.44886363636363635, + "Chinese,Indonesian": 0.45454545454545453, + "Chinese,Filipino": 0.4147727272727273, + "Chinese,Spanish": 0.5965909090909091, + "Chinese,Malay": 0.48863636363636365, + "Indonesian,Filipino": 0.39204545454545453, + "Indonesian,Spanish": 0.5113636363636364, + "Indonesian,Malay": 0.6306818181818182, + "Filipino,Spanish": 0.44886363636363635, + "Filipino,Malay": 0.4318181818181818, + "Spanish,Malay": 0.5511363636363636 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3068181818181818, + "English,Vietnamese,Indonesian": 0.26136363636363635, + "English,Vietnamese,Filipino": 0.22727272727272727, + "English,Vietnamese,Spanish": 0.3352272727272727, + "English,Vietnamese,Malay": 0.25, + "English,Chinese,Indonesian": 0.3465909090909091, + "English,Chinese,Filipino": 0.29545454545454547, + "English,Chinese,Spanish": 0.5056818181818182, + "English,Chinese,Malay": 0.3522727272727273, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.38636363636363635, + "English,Indonesian,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Filipino,Malay": 0.2556818181818182, + "English,Spanish,Malay": 0.4090909090909091, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Filipino": 0.25, + "Vietnamese,Chinese,Spanish": 0.3125, + "Vietnamese,Chinese,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Spanish,Malay": 0.2840909090909091, + "Chinese,Indonesian,Filipino": 0.22727272727272727, + "Chinese,Indonesian,Spanish": 0.3465909090909091, + "Chinese,Indonesian,Malay": 0.3409090909090909, + "Chinese,Filipino,Spanish": 0.30113636363636365, + "Chinese,Filipino,Malay": 0.24431818181818182, + "Chinese,Spanish,Malay": 0.375, + "Indonesian,Filipino,Spanish": 0.26136363636363635, + "Indonesian,Filipino,Malay": 0.2727272727272727, + "Indonesian,Spanish,Malay": 0.38636363636363635, + "Filipino,Spanish,Malay": 0.2784090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.26704545454545453, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Filipino,Spanish": 0.19886363636363635, + "English,Vietnamese,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish": 0.3125, + "English,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Chinese,Filipino,Spanish": 0.2727272727272727, + "English,Chinese,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Spanish,Malay": 0.3181818181818182, + "English,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Indonesian,Spanish,Malay": 0.30113636363636365, + "English,Filipino,Spanish,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.1875, + "Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, + "Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "English,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728 + } + }, + "AC3_2": 0.4643932476089246, + "AC3_3": 0.3580725285161192, + "AC3_4": 0.2842443021512844, + "AC3_5": 0.23244114797798307, + "AC3_6": 0.1948424068424003, + "AC3_7": 0.16607869739144235 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5728155339805825 + }, + "prompt_2": { + "accuracy": 0.6019417475728155 + }, + "prompt_3": { + "accuracy": 0.6310679611650486 + }, + "prompt_4": { + "accuracy": 0.6116504854368932 + }, + "prompt_5": { + "accuracy": 0.6213592233009708 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5904761904761905 + }, + "prompt_2": { + "accuracy": 0.6 + }, + "prompt_3": { + "accuracy": 0.6 + }, + "prompt_4": { + "accuracy": 0.580952380952381 + }, + "prompt_5": { + "accuracy": 0.580952380952381 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6635514018691588 + }, + "prompt_2": { + "accuracy": 0.6728971962616822 + }, + "prompt_3": { + "accuracy": 0.6448598130841121 + }, + "prompt_4": { + "accuracy": 0.6915887850467289 + }, + "prompt_5": { + "accuracy": 0.7102803738317757 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.5, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.5, + "history": 0.3333333333333333, + "literature": 0.1, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.2, + "geography": 0.3 + } + }, + "prompt_4": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.4 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21881669611787624 + }, + "prompt_2": { + "bleu_score": 0.20529564356985583 + }, + "prompt_3": { + "bleu_score": 0.19627288323493136 + }, + "prompt_4": { + "bleu_score": 0.21585573309769215 + }, + "prompt_5": { + "bleu_score": 0.18317884015596073 + } }, "indommlu": { "prompt_1": -1, @@ -8172,179 +71231,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.27313428319629635 + }, + "prompt_2": { + "bleu_score": 0.2766231538171056 + }, + "prompt_3": { + "bleu_score": 0.2803007248521438 + }, + "prompt_4": { + "bleu_score": 0.27909095455809874 + }, + "prompt_5": { + "bleu_score": 0.2685729817436869 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17660789332654386 + }, + "prompt_2": { + "bleu_score": 0.18492133685549905 + }, + "prompt_3": { + "bleu_score": 0.18557622251342762 + }, + "prompt_4": { + "bleu_score": 0.18142082676727472 + }, + "prompt_5": { + "bleu_score": 0.17377284943890234 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.22107047120553416 + }, + "prompt_2": { + "bleu_score": 0.22101762381742626 + }, + "prompt_3": { + "bleu_score": 0.2244390439745376 + }, + "prompt_4": { + "bleu_score": 0.21924887399336457 + }, + "prompt_5": { + "bleu_score": 0.21930170427900322 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.25561706687285723 + }, + "prompt_2": { + "bleu_score": 0.2572254155770503 + }, + "prompt_3": { + "bleu_score": 0.2580177718074157 + }, + "prompt_4": { + "bleu_score": 0.2566099046552893 + }, + "prompt_5": { + "bleu_score": 0.25007355768765394 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5507584597432905 + }, + "prompt_2": { + "accuracy": 0.5717619603267211 + }, + "prompt_3": { + "accuracy": 0.5892648774795799 + }, + "prompt_4": { + "accuracy": 0.5600933488914819 + }, + "prompt_5": { + "accuracy": 0.558926487747958 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5383625312835181, + "category_acc": { + "high_school_european_history": 0.7012195121951219, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.5568181818181818, + "medical_genetics": 0.6060606060606061, + "high_school_us_history": 0.729064039408867, + "high_school_physics": 0.21333333333333335, + "high_school_world_history": 0.75, + "virology": 0.44242424242424244, + "high_school_microeconomics": 0.5569620253164557, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.686084142394822, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.3701067615658363, + "philosophy": 0.5483870967741935, + "professional_medicine": 0.6125461254612546, + "nutrition": 0.6163934426229508, + "global_facts": 0.3939393939393939, + "machine_learning": 0.32432432432432434, + "security_studies": 0.6147540983606558, + "public_relations": 0.5229357798165137, + "professional_psychology": 0.5155482815057283, + "prehistory": 0.5851393188854489, + "anatomy": 0.48507462686567165, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.5174418604651163, + "high_school_government_and_politics": 0.7916666666666666, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.6728395061728395, + "high_school_geography": 0.7309644670050761, + "elementary_mathematics": 0.5649867374005305, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.7555147058823529, + "formal_logic": 0.456, + "high_school_statistics": 0.39069767441860465, + "international_law": 0.65, + "high_school_mathematics": 0.3382899628252788, + "high_school_computer_science": 0.5050505050505051, + "conceptual_physics": 0.47863247863247865, + "miscellaneous": 0.7723785166240409, + "high_school_chemistry": 0.43564356435643564, + "marketing": 0.7510729613733905, + "professional_law": 0.39856490541422046, + "management": 0.7352941176470589, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.6074766355140186, + "world_religions": 0.7764705882352941, + "sociology": 0.75, + "us_foreign_policy": 0.7373737373737373, + "high_school_macroeconomics": 0.493573264781491, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.2539149888143177, + "moral_disputes": 0.5217391304347826, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.6291390728476821, + "college_biology": 0.6573426573426573 + } + }, + "prompt_2": { + "accuracy": 0.5468001430103683, + "category_acc": { + "high_school_european_history": 0.7195121951219512, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.5454545454545454, + "medical_genetics": 0.5858585858585859, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.34, + "high_school_world_history": 0.7457627118644068, + "virology": 0.42424242424242425, + "high_school_microeconomics": 0.5907172995780591, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.6893203883495146, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.398576512455516, + "philosophy": 0.5774193548387097, + "professional_medicine": 0.5830258302583026, + "nutrition": 0.6131147540983607, + "global_facts": 0.2222222222222222, + "machine_learning": 0.2972972972972973, + "security_studies": 0.6229508196721312, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.5351882160392799, + "prehistory": 0.5882352941176471, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.676923076923077, + "college_medicine": 0.563953488372093, + "high_school_government_and_politics": 0.8020833333333334, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6728395061728395, + "high_school_geography": 0.7563451776649747, + "elementary_mathematics": 0.48010610079575594, + "human_aging": 0.6081081081081081, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.7794117647058824, + "formal_logic": 0.488, + "high_school_statistics": 0.4418604651162791, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.44871794871794873, + "miscellaneous": 0.7583120204603581, + "high_school_chemistry": 0.45544554455445546, + "marketing": 0.7682403433476395, + "professional_law": 0.4050880626223092, + "management": 0.7254901960784313, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.7764705882352941, + "sociology": 0.74, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5244215938303342, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.2483221476510067, + "moral_disputes": 0.5942028985507246, + "electrical_engineering": 0.5347222222222222, + "astronomy": 0.6754966887417219, + "college_biology": 0.6713286713286714 + } + }, + "prompt_3": { + "accuracy": 0.552592062924562, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.5858585858585859, + "clinical_knowledge": 0.5757575757575758, + "medical_genetics": 0.5858585858585859, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.3, + "high_school_world_history": 0.7415254237288136, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.5864978902953587, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.6990291262135923, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.40213523131672596, + "philosophy": 0.5870967741935483, + "professional_medicine": 0.6014760147601476, + "nutrition": 0.6295081967213115, + "global_facts": 0.30303030303030304, + "machine_learning": 0.24324324324324326, + "security_studies": 0.6475409836065574, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.5400981996726678, + "prehistory": 0.6130030959752322, + "anatomy": 0.5149253731343284, + "human_sexuality": 0.6307692307692307, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.796875, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.47480106100795755, + "human_aging": 0.6081081081081081, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.78125, + "formal_logic": 0.48, + "high_school_statistics": 0.4046511627906977, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.3271375464684015, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.46153846153846156, + "miscellaneous": 0.7634271099744245, + "high_school_chemistry": 0.4603960396039604, + "marketing": 0.8068669527896996, + "professional_law": 0.4181343770384866, + "management": 0.7352941176470589, + "college_physics": 0.43564356435643564, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.7588235294117647, + "sociology": 0.735, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.5475578406169666, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.6028985507246377, + "electrical_engineering": 0.5833333333333334, + "astronomy": 0.6158940397350994, + "college_biology": 0.6713286713286714 + } + }, + "prompt_4": { + "accuracy": 0.5480872363246335, + "category_acc": { + "high_school_european_history": 0.7195121951219512, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.5606060606060606, + "medical_genetics": 0.5656565656565656, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.32, + "high_school_world_history": 0.7457627118644068, + "virology": 0.4484848484848485, + "high_school_microeconomics": 0.5991561181434599, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.6666666666666666, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.4128113879003559, + "philosophy": 0.5774193548387097, + "professional_medicine": 0.5867158671586716, + "nutrition": 0.6229508196721312, + "global_facts": 0.31313131313131315, + "machine_learning": 0.3063063063063063, + "security_studies": 0.6024590163934426, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.5204582651391162, + "prehistory": 0.5727554179566563, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.5348837209302325, + "high_school_government_and_politics": 0.796875, + "college_chemistry": 0.4444444444444444, + "logical_fallacies": 0.654320987654321, + "high_school_geography": 0.7106598984771574, + "elementary_mathematics": 0.506631299734748, + "human_aging": 0.6126126126126126, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.7702205882352942, + "formal_logic": 0.424, + "high_school_statistics": 0.4558139534883721, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.3382899628252788, + "high_school_computer_science": 0.5252525252525253, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.768542199488491, + "high_school_chemistry": 0.45544554455445546, + "marketing": 0.7467811158798283, + "professional_law": 0.4272667971298108, + "management": 0.7450980392156863, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.7588235294117647, + "sociology": 0.765, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.532133676092545, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.5681159420289855, + "electrical_engineering": 0.5208333333333334, + "astronomy": 0.6225165562913907, + "college_biology": 0.6993006993006993 + } + }, + "prompt_5": { + "accuracy": 0.554022166607079, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.5946969696969697, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.3, + "high_school_world_history": 0.7584745762711864, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.5949367088607594, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.6731391585760518, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.3914590747330961, + "philosophy": 0.5870967741935483, + "professional_medicine": 0.6236162361623616, + "nutrition": 0.6163934426229508, + "global_facts": 0.26262626262626265, + "machine_learning": 0.3153153153153153, + "security_studies": 0.6434426229508197, + "public_relations": 0.5321100917431193, + "professional_psychology": 0.5368248772504092, + "prehistory": 0.5851393188854489, + "anatomy": 0.5149253731343284, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.5755813953488372, + "high_school_government_and_politics": 0.8020833333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.654320987654321, + "high_school_geography": 0.7106598984771574, + "elementary_mathematics": 0.5623342175066313, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.7738970588235294, + "formal_logic": 0.44, + "high_school_statistics": 0.4372093023255814, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.3345724907063197, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.4444444444444444, + "miscellaneous": 0.7736572890025576, + "high_school_chemistry": 0.42574257425742573, + "marketing": 0.7854077253218884, + "professional_law": 0.4207436399217221, + "management": 0.7254901960784313, + "college_physics": 0.43564356435643564, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.7352941176470589, + "sociology": 0.76, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.5449871465295629, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.6028985507246377, + "electrical_engineering": 0.5763888888888888, + "astronomy": 0.6622516556291391, + "college_biology": 0.6713286713286714 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5646359583952452 + }, + "prompt_2": { + "accuracy": 0.5564635958395245 + }, + "prompt_3": { + "accuracy": 0.5601783060921248 + }, + "prompt_4": { + "accuracy": 0.5364041604754829 + }, + "prompt_5": { + "accuracy": 0.5423476968796433 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5579078455790785, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.40476190476190477, + "college_physics": 0.25, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.7586206896551724, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.5416666666666666, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.9615384615384616, + "middle_school_physics": 0.6666666666666666, + "middle_school_chemistry": 0.84, + "veterinary_medicine": 0.5357142857142857, + "college_economics": 0.5, + "business_administration": 0.5526315789473685, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.8367346938775511, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.75, + "middle_school_politics": 0.9230769230769231, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.6785714285714286, + "ideological_and_moral_cultivation": 0.8333333333333334, + "logic": 0.5555555555555556, + "law": 0.5517241379310345, + "chinese_language_and_literature": 0.7857142857142857, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.72, + "middle_school_history": 0.8518518518518519, + "civil_servant": 0.5961538461538461, + "sports_science": 0.5, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.5882352941176471, + "accountant": 0.4444444444444444, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.3888888888888889, + "physician": 0.5370370370370371 + } + }, + "prompt_2": { + "accuracy": 0.5691158156911582, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.40476190476190477, + "college_physics": 0.25, + "college_chemistry": 0.4827586206896552, + "advanced_mathematics": 0.5416666666666666, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.8275862068965517, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.5, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.5, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.7083333333333334, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.5, + "college_economics": 0.43333333333333335, + "business_administration": 0.5, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.6764705882352942, + "teacher_qualification": 0.8163265306122449, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.75, + "middle_school_politics": 0.9230769230769231, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.7142857142857143, + "ideological_and_moral_cultivation": 0.9166666666666666, + "logic": 0.5185185185185185, + "law": 0.5172413793103449, + "chinese_language_and_literature": 0.7857142857142857, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.6764705882352942, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.6, + "middle_school_history": 0.8518518518518519, + "civil_servant": 0.5961538461538461, + "sports_science": 0.5416666666666666, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.5833333333333334, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.5882352941176471, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.4074074074074074, + "physician": 0.5370370370370371 + } + }, + "prompt_3": { + "accuracy": 0.564134495641345, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.5, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.47619047619047616, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.8275862068965517, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.5, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.7083333333333334, + "middle_school_chemistry": 0.88, + "veterinary_medicine": 0.4642857142857143, + "college_economics": 0.45, + "business_administration": 0.47368421052631576, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.6764705882352942, + "teacher_qualification": 0.8775510204081632, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.75, + "middle_school_politics": 0.9230769230769231, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.7142857142857143, + "ideological_and_moral_cultivation": 0.875, + "logic": 0.6666666666666666, + "law": 0.5862068965517241, + "chinese_language_and_literature": 0.8214285714285714, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.64, + "middle_school_history": 0.8518518518518519, + "civil_servant": 0.5384615384615384, + "sports_science": 0.5416666666666666, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.625, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.48148148148148145, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.5, + "tax_accountant": 0.4074074074074074, + "physician": 0.5740740740740741 + } + }, + "prompt_4": { + "accuracy": 0.5554171855541719, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.4523809523809524, + "college_physics": 0.25, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.7241379310344828, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.5833333333333334, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.8461538461538461, + "middle_school_physics": 0.75, + "middle_school_chemistry": 0.84, + "veterinary_medicine": 0.4642857142857143, + "college_economics": 0.4166666666666667, + "business_administration": 0.5, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.6896551724137931, + "education_science": 0.6176470588235294, + "teacher_qualification": 0.8775510204081632, + "high_school_politics": 0.7916666666666666, + "high_school_geography": 0.7083333333333334, + "middle_school_politics": 0.8846153846153846, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.6785714285714286, + "ideological_and_moral_cultivation": 0.75, + "logic": 0.5555555555555556, + "law": 0.5517241379310345, + "chinese_language_and_literature": 0.7857142857142857, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.64, + "middle_school_history": 0.8518518518518519, + "civil_servant": 0.5769230769230769, + "sports_science": 0.5833333333333334, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.5882352941176471, + "accountant": 0.46296296296296297, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.42592592592592593, + "physician": 0.5370370370370371 + } + }, + "prompt_5": { + "accuracy": 0.5112079701120797, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.47619047619047616, + "college_physics": 0.375, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.6551724137931034, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.6666666666666666, + "middle_school_chemistry": 0.8, + "veterinary_medicine": 0.5, + "college_economics": 0.4666666666666667, + "business_administration": 0.47368421052631576, + "marxism": 0.7083333333333334, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.7551020408163265, + "high_school_politics": 0.7083333333333334, + "high_school_geography": 0.6666666666666666, + "middle_school_politics": 0.8076923076923077, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.6428571428571429, + "ideological_and_moral_cultivation": 0.6666666666666666, + "logic": 0.5925925925925926, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.75, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.6764705882352942, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.52, + "middle_school_history": 0.8888888888888888, + "civil_servant": 0.5769230769230769, + "sports_science": 0.5, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.5925925925925926, + "urban_and_rural_planner": 0.5882352941176471, + "accountant": 0.46296296296296297, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.37037037037037035, + "physician": 0.5370370370370371 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5591397849462365 + }, + "prompt_2": { + "accuracy": 0.5913978494623656 + }, + "prompt_3": { + "accuracy": 0.5698924731182796 + }, + "prompt_4": { + "accuracy": 0.5734767025089605 + }, + "prompt_5": { + "accuracy": 0.5698924731182796 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5827145570713176, + "category_acc": { + "agronomy": 0.5562130177514792, + "anatomy": 0.47297297297297297, + "ancient_chinese": 0.3719512195121951, + "arts": 0.81875, + "astronomy": 0.4121212121212121, + "business_ethics": 0.5502392344497608, + "chinese_civil_service_exam": 0.53125, + "chinese_driving_rule": 0.7786259541984732, + "chinese_food_culture": 0.5735294117647058, + "chinese_foreign_policy": 0.6542056074766355, + "chinese_history": 0.6996904024767802, + "chinese_literature": 0.5637254901960784, + "chinese_teacher_qualification": 0.7206703910614525, + "clinical_knowledge": 0.5232067510548524, + "college_actuarial_science": 0.33962264150943394, + "college_education": 0.7383177570093458, + "college_engineering_hydrology": 0.5566037735849056, + "college_law": 0.5462962962962963, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.44339622641509435, + "college_medicine": 0.5641025641025641, + "computer_science": 0.5588235294117647, + "computer_security": 0.6432748538011696, + "conceptual_physics": 0.5918367346938775, + "construction_project_management": 0.43884892086330934, + "economics": 0.6226415094339622, + "education": 0.6441717791411042, + "electrical_engineering": 0.5348837209302325, + "elementary_chinese": 0.5833333333333334, + "elementary_commonsense": 0.6212121212121212, + "elementary_information_and_technology": 0.7521008403361344, + "elementary_mathematics": 0.39565217391304347, + "ethnology": 0.6222222222222222, + "food_science": 0.5874125874125874, + "genetics": 0.48295454545454547, + "global_facts": 0.6644295302013423, + "high_school_biology": 0.5088757396449705, + "high_school_chemistry": 0.4393939393939394, + "high_school_geography": 0.6610169491525424, + "high_school_mathematics": 0.36585365853658536, + "high_school_physics": 0.4727272727272727, + "high_school_politics": 0.7342657342657343, + "human_sexuality": 0.5952380952380952, + "international_law": 0.4864864864864865, + "journalism": 0.5872093023255814, + "jurisprudence": 0.6009732360097324, + "legal_and_moral_basis": 0.9018691588785047, + "logical": 0.5284552845528455, + "machine_learning": 0.5081967213114754, + "management": 0.6428571428571429, + "marketing": 0.6166666666666667, + "marxist_theory": 0.6984126984126984, + "modern_chinese": 0.4827586206896552, + "nutrition": 0.6, + "philosophy": 0.6571428571428571, + "professional_accounting": 0.6514285714285715, + "professional_law": 0.5071090047393365, + "professional_medicine": 0.4867021276595745, + "professional_psychology": 0.6767241379310345, + "public_relations": 0.5919540229885057, + "security_study": 0.7037037037037037, + "sociology": 0.6194690265486725, + "sports_science": 0.5878787878787879, + "traditional_chinese_medicine": 0.4702702702702703, + "virology": 0.5857988165680473, + "world_history": 0.6521739130434783, + "world_religions": 0.70625 + } + }, + "prompt_2": { + "accuracy": 0.589621826972889, + "category_acc": { + "agronomy": 0.5502958579881657, + "anatomy": 0.4797297297297297, + "ancient_chinese": 0.35365853658536583, + "arts": 0.84375, + "astronomy": 0.40606060606060607, + "business_ethics": 0.5741626794258373, + "chinese_civil_service_exam": 0.5375, + "chinese_driving_rule": 0.7557251908396947, + "chinese_food_culture": 0.5882352941176471, + "chinese_foreign_policy": 0.6635514018691588, + "chinese_history": 0.7151702786377709, + "chinese_literature": 0.553921568627451, + "chinese_teacher_qualification": 0.7262569832402235, + "clinical_knowledge": 0.5485232067510548, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.7476635514018691, + "college_engineering_hydrology": 0.5660377358490566, + "college_law": 0.5370370370370371, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.46226415094339623, + "college_medicine": 0.5714285714285714, + "computer_science": 0.6078431372549019, + "computer_security": 0.6491228070175439, + "conceptual_physics": 0.5986394557823129, + "construction_project_management": 0.460431654676259, + "economics": 0.6226415094339622, + "education": 0.6503067484662577, + "electrical_engineering": 0.5581395348837209, + "elementary_chinese": 0.5873015873015873, + "elementary_commonsense": 0.6666666666666666, + "elementary_information_and_technology": 0.7563025210084033, + "elementary_mathematics": 0.3652173913043478, + "ethnology": 0.6666666666666666, + "food_science": 0.5944055944055944, + "genetics": 0.48863636363636365, + "global_facts": 0.697986577181208, + "high_school_biology": 0.5325443786982249, + "high_school_chemistry": 0.4015151515151515, + "high_school_geography": 0.6779661016949152, + "high_school_mathematics": 0.32926829268292684, + "high_school_physics": 0.5, + "high_school_politics": 0.7482517482517482, + "human_sexuality": 0.5793650793650794, + "international_law": 0.4864864864864865, + "journalism": 0.6046511627906976, + "jurisprudence": 0.610705596107056, + "legal_and_moral_basis": 0.9158878504672897, + "logical": 0.5528455284552846, + "machine_learning": 0.4918032786885246, + "management": 0.6619047619047619, + "marketing": 0.6055555555555555, + "marxist_theory": 0.6984126984126984, + "modern_chinese": 0.49137931034482757, + "nutrition": 0.593103448275862, + "philosophy": 0.6571428571428571, + "professional_accounting": 0.6628571428571428, + "professional_law": 0.4881516587677725, + "professional_medicine": 0.48404255319148937, + "professional_psychology": 0.6724137931034483, + "public_relations": 0.603448275862069, + "security_study": 0.6962962962962963, + "sociology": 0.6415929203539823, + "sports_science": 0.5818181818181818, + "traditional_chinese_medicine": 0.4864864864864865, + "virology": 0.6035502958579881, + "world_history": 0.6459627329192547, + "world_religions": 0.725 + } + }, + "prompt_3": { + "accuracy": 0.5837506475565533, + "category_acc": { + "agronomy": 0.5680473372781065, + "anatomy": 0.4391891891891892, + "ancient_chinese": 0.3597560975609756, + "arts": 0.825, + "astronomy": 0.4727272727272727, + "business_ethics": 0.569377990430622, + "chinese_civil_service_exam": 0.55625, + "chinese_driving_rule": 0.7862595419847328, + "chinese_food_culture": 0.5955882352941176, + "chinese_foreign_policy": 0.6728971962616822, + "chinese_history": 0.7213622291021672, + "chinese_literature": 0.5833333333333334, + "chinese_teacher_qualification": 0.7150837988826816, + "clinical_knowledge": 0.5316455696202531, + "college_actuarial_science": 0.3490566037735849, + "college_education": 0.7476635514018691, + "college_engineering_hydrology": 0.5471698113207547, + "college_law": 0.5185185185185185, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.44339622641509435, + "college_medicine": 0.5604395604395604, + "computer_science": 0.5833333333333334, + "computer_security": 0.6608187134502924, + "conceptual_physics": 0.5986394557823129, + "construction_project_management": 0.4676258992805755, + "economics": 0.5849056603773585, + "education": 0.6503067484662577, + "electrical_engineering": 0.5755813953488372, + "elementary_chinese": 0.5793650793650794, + "elementary_commonsense": 0.6464646464646465, + "elementary_information_and_technology": 0.7647058823529411, + "elementary_mathematics": 0.34782608695652173, + "ethnology": 0.6074074074074074, + "food_science": 0.5804195804195804, + "genetics": 0.4431818181818182, + "global_facts": 0.697986577181208, + "high_school_biology": 0.4911242603550296, + "high_school_chemistry": 0.4318181818181818, + "high_school_geography": 0.6779661016949152, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.5, + "high_school_politics": 0.6783216783216783, + "human_sexuality": 0.5714285714285714, + "international_law": 0.4918918918918919, + "journalism": 0.563953488372093, + "jurisprudence": 0.5936739659367397, + "legal_and_moral_basis": 0.9112149532710281, + "logical": 0.5284552845528455, + "machine_learning": 0.4672131147540984, + "management": 0.6476190476190476, + "marketing": 0.6111111111111112, + "marxist_theory": 0.7142857142857143, + "modern_chinese": 0.5086206896551724, + "nutrition": 0.5793103448275863, + "philosophy": 0.6761904761904762, + "professional_accounting": 0.6571428571428571, + "professional_law": 0.4786729857819905, + "professional_medicine": 0.47606382978723405, + "professional_psychology": 0.6681034482758621, + "public_relations": 0.603448275862069, + "security_study": 0.7037037037037037, + "sociology": 0.6371681415929203, + "sports_science": 0.6060606060606061, + "traditional_chinese_medicine": 0.4702702702702703, + "virology": 0.5680473372781065, + "world_history": 0.6459627329192547, + "world_religions": 0.7125 + } + }, + "prompt_4": { + "accuracy": 0.5778794681402176, + "category_acc": { + "agronomy": 0.5680473372781065, + "anatomy": 0.4527027027027027, + "ancient_chinese": 0.3475609756097561, + "arts": 0.8125, + "astronomy": 0.4484848484848485, + "business_ethics": 0.5598086124401914, + "chinese_civil_service_exam": 0.5625, + "chinese_driving_rule": 0.7709923664122137, + "chinese_food_culture": 0.5808823529411765, + "chinese_foreign_policy": 0.6448598130841121, + "chinese_history": 0.7089783281733746, + "chinese_literature": 0.5784313725490197, + "chinese_teacher_qualification": 0.7318435754189944, + "clinical_knowledge": 0.5063291139240507, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.7476635514018691, + "college_engineering_hydrology": 0.5377358490566038, + "college_law": 0.5277777777777778, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.4716981132075472, + "college_medicine": 0.5677655677655677, + "computer_science": 0.553921568627451, + "computer_security": 0.631578947368421, + "conceptual_physics": 0.6122448979591837, + "construction_project_management": 0.49640287769784175, + "economics": 0.610062893081761, + "education": 0.6196319018404908, + "electrical_engineering": 0.5406976744186046, + "elementary_chinese": 0.5634920634920635, + "elementary_commonsense": 0.6464646464646465, + "elementary_information_and_technology": 0.7647058823529411, + "elementary_mathematics": 0.4652173913043478, + "ethnology": 0.5851851851851851, + "food_science": 0.5384615384615384, + "genetics": 0.45454545454545453, + "global_facts": 0.6577181208053692, + "high_school_biology": 0.48520710059171596, + "high_school_chemistry": 0.42424242424242425, + "high_school_geography": 0.635593220338983, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.45454545454545453, + "high_school_politics": 0.6923076923076923, + "human_sexuality": 0.5793650793650794, + "international_law": 0.5135135135135135, + "journalism": 0.5581395348837209, + "jurisprudence": 0.5961070559610706, + "legal_and_moral_basis": 0.9065420560747663, + "logical": 0.5691056910569106, + "machine_learning": 0.45081967213114754, + "management": 0.6476190476190476, + "marketing": 0.6, + "marxist_theory": 0.6825396825396826, + "modern_chinese": 0.47413793103448276, + "nutrition": 0.5793103448275863, + "philosophy": 0.6571428571428571, + "professional_accounting": 0.6114285714285714, + "professional_law": 0.4928909952606635, + "professional_medicine": 0.48936170212765956, + "professional_psychology": 0.6551724137931034, + "public_relations": 0.5804597701149425, + "security_study": 0.6962962962962963, + "sociology": 0.6061946902654868, + "sports_science": 0.5515151515151515, + "traditional_chinese_medicine": 0.4702702702702703, + "virology": 0.5798816568047337, + "world_history": 0.6335403726708074, + "world_religions": 0.70625 + } + }, + "prompt_5": { + "accuracy": 0.5600069072699015, + "category_acc": { + "agronomy": 0.5266272189349113, + "anatomy": 0.4391891891891892, + "ancient_chinese": 0.36585365853658536, + "arts": 0.8, + "astronomy": 0.38181818181818183, + "business_ethics": 0.5789473684210527, + "chinese_civil_service_exam": 0.53125, + "chinese_driving_rule": 0.732824427480916, + "chinese_food_culture": 0.5661764705882353, + "chinese_foreign_policy": 0.6355140186915887, + "chinese_history": 0.6749226006191951, + "chinese_literature": 0.5294117647058824, + "chinese_teacher_qualification": 0.6927374301675978, + "clinical_knowledge": 0.5189873417721519, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.6915887850467289, + "college_engineering_hydrology": 0.5471698113207547, + "college_law": 0.5277777777777778, + "college_mathematics": 0.2, + "college_medical_statistics": 0.46226415094339623, + "college_medicine": 0.5347985347985348, + "computer_science": 0.5882352941176471, + "computer_security": 0.6432748538011696, + "conceptual_physics": 0.5918367346938775, + "construction_project_management": 0.4244604316546763, + "economics": 0.5849056603773585, + "education": 0.6073619631901841, + "electrical_engineering": 0.5290697674418605, + "elementary_chinese": 0.5714285714285714, + "elementary_commonsense": 0.6313131313131313, + "elementary_information_and_technology": 0.7436974789915967, + "elementary_mathematics": 0.3826086956521739, + "ethnology": 0.6074074074074074, + "food_science": 0.5664335664335665, + "genetics": 0.4659090909090909, + "global_facts": 0.6644295302013423, + "high_school_biology": 0.5088757396449705, + "high_school_chemistry": 0.3560606060606061, + "high_school_geography": 0.635593220338983, + "high_school_mathematics": 0.35365853658536583, + "high_school_physics": 0.41818181818181815, + "high_school_politics": 0.6013986013986014, + "human_sexuality": 0.5158730158730159, + "international_law": 0.4648648648648649, + "journalism": 0.5465116279069767, + "jurisprudence": 0.5693430656934306, + "legal_and_moral_basis": 0.822429906542056, + "logical": 0.5284552845528455, + "machine_learning": 0.47540983606557374, + "management": 0.6571428571428571, + "marketing": 0.5944444444444444, + "marxist_theory": 0.6190476190476191, + "modern_chinese": 0.45689655172413796, + "nutrition": 0.5517241379310345, + "philosophy": 0.6666666666666666, + "professional_accounting": 0.6514285714285715, + "professional_law": 0.4549763033175355, + "professional_medicine": 0.4920212765957447, + "professional_psychology": 0.6206896551724138, + "public_relations": 0.5689655172413793, + "security_study": 0.6888888888888889, + "sociology": 0.5796460176991151, + "sports_science": 0.593939393939394, + "traditional_chinese_medicine": 0.4702702702702703, + "virology": 0.5976331360946746, + "world_history": 0.5962732919254659, + "world_religions": 0.7 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.30303030303030304 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.30303030303030304 + }, + "prompt_5": { + "accuracy": 0.42424242424242425 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4431818181818182 + }, + "prompt_2": { + "accuracy": 0.38636363636363635 + }, + "prompt_3": { + "accuracy": 0.4090909090909091 + }, + "prompt_4": { + "accuracy": 0.5022727272727273 + }, + "prompt_5": { + "accuracy": 0.4818181818181818 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4271186440677966 + }, + "prompt_2": { + "accuracy": 0.44169491525423726 + }, + "prompt_3": { + "accuracy": 0.3833898305084746 + }, + "prompt_4": { + "accuracy": 0.4511864406779661 + }, + "prompt_5": { + "accuracy": 0.424406779661017 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8522812266267764 + }, + "prompt_2": { + "accuracy": 0.8496634255796559 + }, + "prompt_3": { + "accuracy": 0.8515332834704562 + }, + "prompt_4": { + "accuracy": 0.8504113687359761 + }, + "prompt_5": { + "accuracy": 0.8418100224382947 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8388045075943165 + }, + "prompt_2": { + "accuracy": 0.8657520823125918 + }, + "prompt_3": { + "accuracy": 0.8633023027927487 + }, + "prompt_4": { + "accuracy": 0.787359137677609 + }, + "prompt_5": { + "accuracy": 0.8397844194022538 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3372010450108503, + "rouge2": 0.1194335112538586, + "rougeL": 0.25332300265728314, + "avg_rouge": 0.236652519640664 + }, + "prompt_2": { + "rouge1": 0.35952407565794425, + "rouge2": 0.13151925460671704, + "rougeL": 0.2720873236520973, + "avg_rouge": 0.25437688463891955 + }, + "prompt_3": { + "rouge1": 0.36123982204533983, + "rouge2": 0.1301510273959024, + "rougeL": 0.2749833523953474, + "avg_rouge": 0.25545806727886317 + }, + "prompt_4": { + "rouge1": 0.33626181404747296, + "rouge2": 0.11939161991492668, + "rougeL": 0.2542277073436538, + "avg_rouge": 0.23662704710201785 + }, + "prompt_5": { + "rouge1": 0.3595118835731163, + "rouge2": 0.1268612468245694, + "rougeL": 0.27225013621628763, + "avg_rouge": 0.2528744222046578 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.18667044293911464, + "rouge2": 0.059662664643230535, + "rougeL": 0.1410174679052223, + "avg_rouge": 0.12911685849585583 + }, + "prompt_2": { + "rouge1": 0.18992517810083062, + "rouge2": 0.05824462651521212, + "rougeL": 0.14435143249511387, + "avg_rouge": 0.13084041237038554 + }, + "prompt_3": { + "rouge1": 0.19131185049530708, + "rouge2": 0.05801106210210166, + "rougeL": 0.14474094098188986, + "avg_rouge": 0.1313546178597662 + }, + "prompt_4": { + "rouge1": 0.18764029498183024, + "rouge2": 0.06065572149534456, + "rougeL": 0.14220543264443083, + "avg_rouge": 0.1301671497072019 + }, + "prompt_5": { + "rouge1": 0.18983968786802372, + "rouge2": 0.054284682864904954, + "rougeL": 0.14580715235369868, + "avg_rouge": 0.12997717436220912 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8967889908256881 + }, + "prompt_2": { + "accuracy": 0.8956422018348624 + }, + "prompt_3": { + "accuracy": 0.8853211009174312 + }, + "prompt_4": { + "accuracy": 0.8577981651376146 + }, + "prompt_5": { + "accuracy": 0.875 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7056567593480345 + }, + "prompt_2": { + "accuracy": 0.7497603068072867 + }, + "prompt_3": { + "accuracy": 0.725790987535954 + }, + "prompt_4": { + "accuracy": 0.7363374880153404 + }, + "prompt_5": { + "accuracy": 0.7794822627037392 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.722 + }, + "prompt_2": { + "accuracy": 0.7405 + }, + "prompt_3": { + "accuracy": 0.742 + }, + "prompt_4": { + "accuracy": 0.659 + }, + "prompt_5": { + "accuracy": 0.661 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.697 + }, + "prompt_2": { + "accuracy": 0.643 + }, + "prompt_3": { + "accuracy": 0.707 + }, + "prompt_4": { + "accuracy": 0.6735 + }, + "prompt_5": { + "accuracy": 0.6315 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.664 + }, + "prompt_2": { + "accuracy": 0.608 + }, + "prompt_3": { + "accuracy": 0.676 + }, + "prompt_4": { + "accuracy": 0.562 + }, + "prompt_5": { + "accuracy": 0.6465 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5070422535211268 + }, + "prompt_2": { + "accuracy": 0.4647887323943662 + }, + "prompt_3": { + "accuracy": 0.4647887323943662 + }, + "prompt_4": { + "accuracy": 0.4788732394366197 + }, + "prompt_5": { + "accuracy": 0.6056338028169014 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7689530685920578 + }, + "prompt_2": { + "accuracy": 0.7436823104693141 + }, + "prompt_3": { + "accuracy": 0.7256317689530686 + }, + "prompt_4": { + "accuracy": 0.6859205776173285 + }, + "prompt_5": { + "accuracy": 0.779783393501805 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7034313725490197 + }, + "prompt_2": { + "accuracy": 0.6691176470588235 + }, + "prompt_3": { + "accuracy": 0.7156862745098039 + }, + "prompt_4": { + "accuracy": 0.6764705882352942 + }, + "prompt_5": { + "accuracy": 0.7009803921568627 + } } }, "five_shot": { @@ -8454,53 +72703,1733 @@ "model_link": "https://huggingface.co/lmsys/vicuna-7b-v1.5", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.44476190476190475, + "language_acc": { + "Malay": 0.4066666666666667, + "English": 0.5333333333333333, + "Vietnamese": 0.46, + "Spanish": 0.5133333333333333, + "Indonesian": 0.4666666666666667, + "Filipino": 0.32666666666666666, + "Chinese": 0.4066666666666667 + }, + "consistency_score_2": 0.566984126984127, + "consistency_score_3": 0.3998095238095239, + "consistency_score_4": 0.30628571428571433, + "consistency_score_5": 0.24380952380952386, + "consistency_score_6": 0.199047619047619, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.56, + "Malay,Vietnamese": 0.6133333333333333, + "Malay,Spanish": 0.54, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Filipino": 0.5066666666666667, + "Malay,Chinese": 0.46, + "English,Vietnamese": 0.6266666666666667, + "English,Spanish": 0.6666666666666666, + "English,Indonesian": 0.62, + "English,Filipino": 0.6, + "English,Chinese": 0.58, + "Vietnamese,Spanish": 0.6133333333333333, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Filipino": 0.5666666666666667, + "Vietnamese,Chinese": 0.58, + "Spanish,Indonesian": 0.6133333333333333, + "Spanish,Filipino": 0.5133333333333333, + "Spanish,Chinese": 0.5333333333333333, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.48, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.44666666666666666, + "Malay,English,Spanish": 0.42, + "Malay,English,Indonesian": 0.4533333333333333, + "Malay,English,Filipino": 0.38, + "Malay,English,Chinese": 0.37333333333333335, + "Malay,Vietnamese,Spanish": 0.43333333333333335, + "Malay,Vietnamese,Indonesian": 0.46, + "Malay,Vietnamese,Filipino": 0.38666666666666666, + "Malay,Vietnamese,Chinese": 0.38, + "Malay,Spanish,Indonesian": 0.44666666666666666, + "Malay,Spanish,Filipino": 0.34, + "Malay,Spanish,Chinese": 0.32666666666666666, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.36, + "Malay,Filipino,Chinese": 0.3, + "English,Vietnamese,Spanish": 0.48, + "English,Vietnamese,Indonesian": 0.48, + "English,Vietnamese,Filipino": 0.43333333333333335, + "English,Vietnamese,Chinese": 0.44666666666666666, + "English,Spanish,Indonesian": 0.47333333333333333, + "English,Spanish,Filipino": 0.43333333333333335, + "English,Spanish,Chinese": 0.4266666666666667, + "English,Indonesian,Filipino": 0.4066666666666667, + "English,Indonesian,Chinese": 0.4, + "English,Filipino,Chinese": 0.38, + "Vietnamese,Spanish,Indonesian": 0.4666666666666667, + "Vietnamese,Spanish,Filipino": 0.4, + "Vietnamese,Spanish,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian,Chinese": 0.4, + "Vietnamese,Filipino,Chinese": 0.35333333333333333, + "Spanish,Indonesian,Filipino": 0.36666666666666664, + "Spanish,Indonesian,Chinese": 0.36666666666666664, + "Spanish,Filipino,Chinese": 0.3, + "Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.36, + "Malay,English,Vietnamese,Indonesian": 0.36, + "Malay,English,Vietnamese,Filipino": 0.31333333333333335, + "Malay,English,Vietnamese,Chinese": 0.31333333333333335, + "Malay,English,Spanish,Indonesian": 0.36666666666666664, + "Malay,English,Spanish,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Chinese": 0.29333333333333333, + "Malay,English,Indonesian,Filipino": 0.31333333333333335, + "Malay,English,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian": 0.36666666666666664, + "Malay,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Indonesian,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.35333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.35333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.34, + "English,Vietnamese,Indonesian,Chinese": 0.34, + "English,Vietnamese,Filipino,Chinese": 0.3, + "English,Spanish,Indonesian,Filipino": 0.32, + "English,Spanish,Indonesian,Chinese": 0.32, + "English,Spanish,Filipino,Chinese": 0.2866666666666667, + "English,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.32, + "Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.26, + "Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.30666666666666664, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26, + "Malay,English,Vietnamese,Spanish,Chinese": 0.26, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.26, + "Malay,English,Vietnamese,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Filipino,Chinese": 0.2, + "Malay,English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 + } + }, + "AC3_2": 0.49849059418026337, + "AC3_3": 0.4210894172235914, + "AC3_4": 0.3627578710433269, + "AC3_5": 0.31496278728219107, + "AC3_6": 0.2750154972804727, + "AC3_7": 0.24247144336636628 + }, + "prompt_2": { + "overall_acc": 0.4523809523809524, + "language_acc": { + "Malay": 0.41333333333333333, + "English": 0.5533333333333333, + "Vietnamese": 0.44, + "Spanish": 0.5133333333333333, + "Indonesian": 0.4666666666666667, + "Filipino": 0.32666666666666666, + "Chinese": 0.4533333333333333 + }, + "consistency_score_2": 0.5666666666666668, + "consistency_score_3": 0.39790476190476204, + "consistency_score_4": 0.3041904761904762, + "consistency_score_5": 0.24349206349206348, + "consistency_score_6": 0.2019047619047619, + "consistency_score_7": 0.17333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.5933333333333334, + "Malay,Spanish": 0.5066666666666667, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Filipino": 0.5133333333333333, + "Malay,Chinese": 0.48, + "English,Vietnamese": 0.62, + "English,Spanish": 0.7066666666666667, + "English,Indonesian": 0.6466666666666666, + "English,Filipino": 0.5866666666666667, + "English,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.5866666666666667, + "Vietnamese,Indonesian": 0.6066666666666667, + "Vietnamese,Filipino": 0.5733333333333334, + "Vietnamese,Chinese": 0.58, + "Spanish,Indonesian": 0.6733333333333333, + "Spanish,Filipino": 0.47333333333333333, + "Spanish,Chinese": 0.5733333333333334, + "Indonesian,Filipino": 0.48, + "Indonesian,Chinese": 0.49333333333333335, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.4, + "Malay,English,Spanish": 0.38, + "Malay,English,Indonesian": 0.42, + "Malay,English,Filipino": 0.34, + "Malay,English,Chinese": 0.36666666666666664, + "Malay,Vietnamese,Spanish": 0.3933333333333333, + "Malay,Vietnamese,Indonesian": 0.43333333333333335, + "Malay,Vietnamese,Filipino": 0.38, + "Malay,Vietnamese,Chinese": 0.38666666666666666, + "Malay,Spanish,Indonesian": 0.44, + "Malay,Spanish,Filipino": 0.3, + "Malay,Spanish,Chinese": 0.3466666666666667, + "Malay,Indonesian,Filipino": 0.3466666666666667, + "Malay,Indonesian,Chinese": 0.36, + "Malay,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish": 0.49333333333333335, + "English,Vietnamese,Indonesian": 0.47333333333333333, + "English,Vietnamese,Filipino": 0.43333333333333335, + "English,Vietnamese,Chinese": 0.46, + "English,Spanish,Indonesian": 0.5333333333333333, + "English,Spanish,Filipino": 0.41333333333333333, + "English,Spanish,Chinese": 0.4866666666666667, + "English,Indonesian,Filipino": 0.4, + "English,Indonesian,Chinese": 0.4266666666666667, + "English,Filipino,Chinese": 0.38, + "Vietnamese,Spanish,Indonesian": 0.48, + "Vietnamese,Spanish,Filipino": 0.38, + "Vietnamese,Spanish,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese": 0.38666666666666666, + "Vietnamese,Filipino,Chinese": 0.37333333333333335, + "Spanish,Indonesian,Filipino": 0.36666666666666664, + "Spanish,Indonesian,Chinese": 0.43333333333333335, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.32666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.34, + "Malay,English,Vietnamese,Filipino": 0.28, + "Malay,English,Vietnamese,Chinese": 0.31333333333333335, + "Malay,English,Spanish,Indonesian": 0.35333333333333333, + "Malay,English,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Chinese": 0.3, + "Malay,English,Indonesian,Filipino": 0.28, + "Malay,English,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.36, + "Malay,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Indonesian,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.32, + "Malay,Spanish,Filipino,Chinese": 0.22, + "Malay,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "English,Vietnamese,Spanish,Filipino": 0.34, + "English,Vietnamese,Spanish,Chinese": 0.38, + "English,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.31333333333333335, + "English,Spanish,Indonesian,Filipino": 0.32666666666666666, + "English,Spanish,Indonesian,Chinese": 0.38, + "English,Spanish,Filipino,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino,Chinese": 0.28, + "Vietnamese,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.3, + "Malay,English,Vietnamese,Spanish,Filipino": 0.22, + "Malay,English,Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.28, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.32, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "English,Spanish,Indonesian,Filipino,Chinese": 0.24, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + } + }, + "AC3_2": 0.5031152647481366, + "AC3_3": 0.4233977640737759, + "AC3_4": 0.36377259861877675, + "AC3_5": 0.3165841153524895, + "AC3_6": 0.27919872457089945, + "AC3_7": 0.25063419579961965 + }, + "prompt_3": { + "overall_acc": 0.4428571428571429, + "language_acc": { + "Malay": 0.4066666666666667, + "English": 0.5466666666666666, + "Vietnamese": 0.43333333333333335, + "Spanish": 0.5266666666666666, + "Indonesian": 0.44, + "Filipino": 0.30666666666666664, + "Chinese": 0.44 + }, + "consistency_score_2": 0.559047619047619, + "consistency_score_3": 0.38952380952380966, + "consistency_score_4": 0.29657142857142865, + "consistency_score_5": 0.23746031746031748, + "consistency_score_6": 0.19619047619047622, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.5933333333333334, + "Malay,Spanish": 0.5133333333333333, + "Malay,Indonesian": 0.6066666666666667, + "Malay,Filipino": 0.5333333333333333, + "Malay,Chinese": 0.48, + "English,Vietnamese": 0.62, + "English,Spanish": 0.6933333333333334, + "English,Indonesian": 0.6266666666666667, + "English,Filipino": 0.5333333333333333, + "English,Chinese": 0.58, + "Vietnamese,Spanish": 0.6066666666666667, + "Vietnamese,Indonesian": 0.5933333333333334, + "Vietnamese,Filipino": 0.5666666666666667, + "Vietnamese,Chinese": 0.5466666666666666, + "Spanish,Indonesian": 0.6333333333333333, + "Spanish,Filipino": 0.5, + "Spanish,Chinese": 0.5866666666666667, + "Indonesian,Filipino": 0.4866666666666667, + "Indonesian,Chinese": 0.46, + "Filipino,Chinese": 0.4866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.3933333333333333, + "Malay,English,Spanish": 0.3933333333333333, + "Malay,English,Indonesian": 0.4066666666666667, + "Malay,English,Filipino": 0.32666666666666666, + "Malay,English,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Spanish": 0.4066666666666667, + "Malay,Vietnamese,Indonesian": 0.42, + "Malay,Vietnamese,Filipino": 0.38666666666666666, + "Malay,Vietnamese,Chinese": 0.36666666666666664, + "Malay,Spanish,Indonesian": 0.42, + "Malay,Spanish,Filipino": 0.32, + "Malay,Spanish,Chinese": 0.36666666666666664, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.34, + "Malay,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish": 0.5, + "English,Vietnamese,Indonesian": 0.4666666666666667, + "English,Vietnamese,Filipino": 0.4, + "English,Vietnamese,Chinese": 0.42, + "English,Spanish,Indonesian": 0.5, + "English,Spanish,Filipino": 0.4066666666666667, + "English,Spanish,Chinese": 0.4533333333333333, + "English,Indonesian,Filipino": 0.38, + "English,Indonesian,Chinese": 0.3933333333333333, + "English,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Spanish,Indonesian": 0.46, + "Vietnamese,Spanish,Filipino": 0.38666666666666666, + "Vietnamese,Spanish,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Filipino,Chinese": 0.36666666666666664, + "Spanish,Indonesian,Filipino": 0.37333333333333335, + "Spanish,Indonesian,Chinese": 0.4, + "Spanish,Filipino,Chinese": 0.3466666666666667, + "Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.34, + "Malay,English,Vietnamese,Indonesian": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Chinese": 0.3, + "Malay,English,Spanish,Indonesian": 0.34, + "Malay,English,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Chinese": 0.30666666666666664, + "Malay,English,Indonesian,Filipino": 0.28, + "Malay,English,Indonesian,Chinese": 0.3, + "Malay,English,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.34, + "Malay,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.3, + "Malay,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.28, + "Malay,Spanish,Indonesian,Chinese": 0.30666666666666664, + "Malay,Spanish,Filipino,Chinese": 0.24, + "Malay,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.32666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.36666666666666664, + "English,Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.32, + "English,Vietnamese,Filipino,Chinese": 0.29333333333333333, + "English,Spanish,Indonesian,Filipino": 0.32, + "English,Spanish,Indonesian,Chinese": 0.3333333333333333, + "English,Spanish,Filipino,Chinese": 0.3, + "English,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.32, + "Vietnamese,Spanish,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.22, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Malay,English,Vietnamese,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.18666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16666666666666666 + } + }, + "AC3_2": 0.49421510043953726, + "AC3_3": 0.4144818567661583, + "AC3_4": 0.3552439831712107, + "AC3_5": 0.30915272310634745, + "AC3_6": 0.271918245646187, + "AC3_7": 0.2421874999602661 + }, + "prompt_4": { + "overall_acc": 0.45047619047619053, + "language_acc": { + "Malay": 0.4066666666666667, + "English": 0.56, + "Vietnamese": 0.44666666666666666, + "Spanish": 0.5066666666666667, + "Indonesian": 0.4666666666666667, + "Filipino": 0.34, + "Chinese": 0.4266666666666667 + }, + "consistency_score_2": 0.5838095238095239, + "consistency_score_3": 0.4213333333333334, + "consistency_score_4": 0.3314285714285714, + "consistency_score_5": 0.2739682539682539, + "consistency_score_6": 0.23428571428571426, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5333333333333333, + "Malay,Vietnamese": 0.5866666666666667, + "Malay,Spanish": 0.5266666666666666, + "Malay,Indonesian": 0.64, + "Malay,Filipino": 0.5933333333333334, + "Malay,Chinese": 0.5, + "English,Vietnamese": 0.6266666666666667, + "English,Spanish": 0.72, + "English,Indonesian": 0.66, + "English,Filipino": 0.58, + "English,Chinese": 0.6266666666666667, + "Vietnamese,Spanish": 0.56, + "Vietnamese,Indonesian": 0.6066666666666667, + "Vietnamese,Filipino": 0.5733333333333334, + "Vietnamese,Chinese": 0.6066666666666667, + "Spanish,Indonesian": 0.66, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.5733333333333334, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.5066666666666667, + "Filipino,Chinese": 0.5466666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.42, + "Malay,English,Spanish": 0.41333333333333333, + "Malay,English,Indonesian": 0.4666666666666667, + "Malay,English,Filipino": 0.38666666666666666, + "Malay,English,Chinese": 0.38666666666666666, + "Malay,Vietnamese,Spanish": 0.3933333333333333, + "Malay,Vietnamese,Indonesian": 0.44, + "Malay,Vietnamese,Filipino": 0.42, + "Malay,Vietnamese,Chinese": 0.4066666666666667, + "Malay,Spanish,Indonesian": 0.47333333333333333, + "Malay,Spanish,Filipino": 0.36666666666666664, + "Malay,Spanish,Chinese": 0.36, + "Malay,Indonesian,Filipino": 0.41333333333333333, + "Malay,Indonesian,Chinese": 0.38666666666666666, + "Malay,Filipino,Chinese": 0.37333333333333335, + "English,Vietnamese,Spanish": 0.4866666666666667, + "English,Vietnamese,Indonesian": 0.48, + "English,Vietnamese,Filipino": 0.43333333333333335, + "English,Vietnamese,Chinese": 0.47333333333333333, + "English,Spanish,Indonesian": 0.54, + "English,Spanish,Filipino": 0.44666666666666666, + "English,Spanish,Chinese": 0.49333333333333335, + "English,Indonesian,Filipino": 0.41333333333333333, + "English,Indonesian,Chinese": 0.44, + "English,Filipino,Chinese": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.46, + "Vietnamese,Spanish,Filipino": 0.38666666666666666, + "Vietnamese,Spanish,Chinese": 0.42, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.4066666666666667, + "Vietnamese,Filipino,Chinese": 0.42, + "Spanish,Indonesian,Filipino": 0.3933333333333333, + "Spanish,Indonesian,Chinese": 0.41333333333333333, + "Spanish,Filipino,Chinese": 0.36666666666666664, + "Indonesian,Filipino,Chinese": 0.35333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.3466666666666667, + "Malay,English,Vietnamese,Indonesian": 0.36666666666666664, + "Malay,English,Vietnamese,Filipino": 0.31333333333333335, + "Malay,English,Vietnamese,Chinese": 0.32666666666666666, + "Malay,English,Spanish,Indonesian": 0.38666666666666666, + "Malay,English,Spanish,Filipino": 0.31333333333333335, + "Malay,English,Spanish,Chinese": 0.32666666666666666, + "Malay,English,Indonesian,Filipino": 0.3333333333333333, + "Malay,English,Indonesian,Chinese": 0.34, + "Malay,English,Filipino,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.36, + "Malay,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.31333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.28, + "Malay,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "English,Vietnamese,Spanish,Filipino": 0.35333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.38666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.36, + "English,Vietnamese,Filipino,Chinese": 0.34, + "English,Spanish,Indonesian,Filipino": 0.3466666666666667, + "English,Spanish,Indonesian,Chinese": 0.38, + "English,Spanish,Filipino,Chinese": 0.34, + "English,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.32, + "Vietnamese,Spanish,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.32, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.28, + "Malay,English,Vietnamese,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.30666666666666664, + "Malay,English,Spanish,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.26666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + } + }, + "AC3_2": 0.5085486275049833, + "AC3_3": 0.43541766800045284, + "AC3_4": 0.3818896815241537, + "AC3_5": 0.3407195024779045, + "AC3_6": 0.3082535266786079, + "AC3_7": 0.28334299512596467 + }, + "prompt_5": { + "overall_acc": 0.440952380952381, + "language_acc": { + "Malay": 0.37333333333333335, + "English": 0.5333333333333333, + "Vietnamese": 0.43333333333333335, + "Spanish": 0.52, + "Indonesian": 0.47333333333333333, + "Filipino": 0.34, + "Chinese": 0.41333333333333333 + }, + "consistency_score_2": 0.5692063492063493, + "consistency_score_3": 0.40228571428571425, + "consistency_score_4": 0.3127619047619048, + "consistency_score_5": 0.25619047619047625, + "consistency_score_6": 0.2161904761904762, + "consistency_score_7": 0.18666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5066666666666667, + "Malay,Vietnamese": 0.58, + "Malay,Spanish": 0.5, + "Malay,Indonesian": 0.6266666666666667, + "Malay,Filipino": 0.5466666666666666, + "Malay,Chinese": 0.49333333333333335, + "English,Vietnamese": 0.6, + "English,Spanish": 0.74, + "English,Indonesian": 0.6133333333333333, + "English,Filipino": 0.56, + "English,Chinese": 0.62, + "Vietnamese,Spanish": 0.5733333333333334, + "Vietnamese,Indonesian": 0.5866666666666667, + "Vietnamese,Filipino": 0.5733333333333334, + "Vietnamese,Chinese": 0.5466666666666666, + "Spanish,Indonesian": 0.64, + "Spanish,Filipino": 0.5133333333333333, + "Spanish,Chinese": 0.5666666666666667, + "Indonesian,Filipino": 0.5266666666666666, + "Indonesian,Chinese": 0.5133333333333333, + "Filipino,Chinese": 0.5266666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.38, + "Malay,English,Spanish": 0.4066666666666667, + "Malay,English,Indonesian": 0.4266666666666667, + "Malay,English,Filipino": 0.3466666666666667, + "Malay,English,Chinese": 0.38, + "Malay,Vietnamese,Spanish": 0.38, + "Malay,Vietnamese,Indonesian": 0.43333333333333335, + "Malay,Vietnamese,Filipino": 0.38, + "Malay,Vietnamese,Chinese": 0.36, + "Malay,Spanish,Indonesian": 0.44, + "Malay,Spanish,Filipino": 0.34, + "Malay,Spanish,Chinese": 0.36666666666666664, + "Malay,Indonesian,Filipino": 0.38666666666666666, + "Malay,Indonesian,Chinese": 0.38, + "Malay,Filipino,Chinese": 0.36, + "English,Vietnamese,Spanish": 0.4866666666666667, + "English,Vietnamese,Indonesian": 0.44, + "English,Vietnamese,Filipino": 0.4066666666666667, + "English,Vietnamese,Chinese": 0.4266666666666667, + "English,Spanish,Indonesian": 0.52, + "English,Spanish,Filipino": 0.44, + "English,Spanish,Chinese": 0.5066666666666667, + "English,Indonesian,Filipino": 0.38666666666666666, + "English,Indonesian,Chinese": 0.4266666666666667, + "English,Filipino,Chinese": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian": 0.44666666666666666, + "Vietnamese,Spanish,Filipino": 0.38666666666666666, + "Vietnamese,Spanish,Chinese": 0.38666666666666666, + "Vietnamese,Indonesian,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Filipino,Chinese": 0.38666666666666666, + "Spanish,Indonesian,Filipino": 0.3933333333333333, + "Spanish,Indonesian,Chinese": 0.4066666666666667, + "Spanish,Filipino,Chinese": 0.35333333333333333, + "Indonesian,Filipino,Chinese": 0.3466666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.32666666666666666, + "Malay,English,Vietnamese,Indonesian": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Chinese": 0.3, + "Malay,English,Spanish,Indonesian": 0.36666666666666664, + "Malay,English,Spanish,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Chinese": 0.3333333333333333, + "Malay,English,Indonesian,Filipino": 0.29333333333333333, + "Malay,English,Indonesian,Chinese": 0.32666666666666666, + "Malay,English,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.32666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.38, + "English,Vietnamese,Spanish,Filipino": 0.34, + "English,Vietnamese,Spanish,Chinese": 0.36666666666666664, + "English,Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.34, + "English,Vietnamese,Filipino,Chinese": 0.32, + "English,Spanish,Indonesian,Filipino": 0.34, + "English,Spanish,Indonesian,Chinese": 0.38, + "English,Spanish,Filipino,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino": 0.32, + "Vietnamese,Spanish,Indonesian,Chinese": 0.32, + "Vietnamese,Spanish,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.2733333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.26, + "Malay,English,Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.26, + "Malay,English,Spanish,Indonesian,Chinese": 0.3, + "Malay,English,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.28, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.30666666666666664, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.18666666666666668 + } + }, + "AC3_2": 0.4969375354929927, + "AC3_3": 0.42073251794035976, + "AC3_4": 0.36595593097182016, + "AC3_5": 0.324087952074251, + "AC3_6": 0.29013388539408247, + "AC3_7": 0.2622964086582583 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.41883116883116883, + "language_acc": { + "English": 0.5397727272727273, + "Vietnamese": 0.4147727272727273, + "Chinese": 0.4147727272727273, + "Indonesian": 0.4375, + "Filipino": 0.32386363636363635, + "Spanish": 0.4318181818181818, + "Malay": 0.3693181818181818 + }, + "consistency_score_2": 0.5616883116883116, + "consistency_score_3": 0.39090909090909093, + "consistency_score_4": 0.29967532467532465, + "consistency_score_5": 0.24215367965367968, + "consistency_score_6": 0.20129870129870128, + "consistency_score_7": 0.17045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.5568181818181818, + "English,Indonesian": 0.5454545454545454, + "English,Filipino": 0.4715909090909091, + "English,Spanish": 0.6079545454545454, + "English,Malay": 0.48295454545454547, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.6079545454545454, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Spanish": 0.6988636363636364, + "Vietnamese,Malay": 0.6420454545454546, + "Chinese,Indonesian": 0.5340909090909091, + "Chinese,Filipino": 0.4715909090909091, + "Chinese,Spanish": 0.5568181818181818, + "Chinese,Malay": 0.5454545454545454, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.6022727272727273, + "Indonesian,Malay": 0.6647727272727273, + "Filipino,Spanish": 0.4772727272727273, + "Filipino,Malay": 0.6079545454545454, + "Spanish,Malay": 0.5568181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.39204545454545453, + "English,Vietnamese,Indonesian": 0.42613636363636365, + "English,Vietnamese,Filipino": 0.3409090909090909, + "English,Vietnamese,Spanish": 0.48295454545454547, + "English,Vietnamese,Malay": 0.4090909090909091, + "English,Chinese,Indonesian": 0.3806818181818182, + "English,Chinese,Filipino": 0.3181818181818182, + "English,Chinese,Spanish": 0.4090909090909091, + "English,Chinese,Malay": 0.35795454545454547, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.42613636363636365, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Filipino,Malay": 0.32954545454545453, + "English,Spanish,Malay": 0.3806818181818182, + "Vietnamese,Chinese,Indonesian": 0.4034090909090909, + "Vietnamese,Chinese,Filipino": 0.3125, + "Vietnamese,Chinese,Spanish": 0.4318181818181818, + "Vietnamese,Chinese,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Filipino": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish": 0.4772727272727273, + "Vietnamese,Indonesian,Malay": 0.48863636363636365, + "Vietnamese,Filipino,Spanish": 0.38636363636363635, + "Vietnamese,Filipino,Malay": 0.3977272727272727, + "Vietnamese,Spanish,Malay": 0.4772727272727273, + "Chinese,Indonesian,Filipino": 0.3352272727272727, + "Chinese,Indonesian,Spanish": 0.3977272727272727, + "Chinese,Indonesian,Malay": 0.42045454545454547, + "Chinese,Filipino,Spanish": 0.3068181818181818, + "Chinese,Filipino,Malay": 0.35795454545454547, + "Chinese,Spanish,Malay": 0.375, + "Indonesian,Filipino,Spanish": 0.3693181818181818, + "Indonesian,Filipino,Malay": 0.4431818181818182, + "Indonesian,Spanish,Malay": 0.45454545454545453, + "Filipino,Spanish,Malay": 0.3693181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "English,Vietnamese,Chinese,Filipino": 0.25, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Chinese,Malay": 0.3068181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.2727272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.35795454545454547, + "English,Vietnamese,Indonesian,Malay": 0.3409090909090909, + "English,Vietnamese,Filipino,Spanish": 0.2897727272727273, + "English,Vietnamese,Filipino,Malay": 0.2784090909090909, + "English,Vietnamese,Spanish,Malay": 0.3409090909090909, + "English,Chinese,Indonesian,Filipino": 0.25, + "English,Chinese,Indonesian,Spanish": 0.3125, + "English,Chinese,Indonesian,Malay": 0.3068181818181818, + "English,Chinese,Filipino,Spanish": 0.24431818181818182, + "English,Chinese,Filipino,Malay": 0.25, + "English,Chinese,Spanish,Malay": 0.2897727272727273, + "English,Indonesian,Filipino,Spanish": 0.26704545454545453, + "English,Indonesian,Filipino,Malay": 0.26704545454545453, + "English,Indonesian,Spanish,Malay": 0.3181818181818182, + "English,Filipino,Spanish,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3409090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.26704545454545453, + "Vietnamese,Chinese,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish": 0.3125, + "Vietnamese,Indonesian,Filipino,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.39204545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.3181818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Filipino,Malay": 0.29545454545454547, + "Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish,Malay": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.3125 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2727272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Chinese,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.26136363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2897727272727273, + "English,Vietnamese,Filipino,Spanish,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1875, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + } + }, + "AC3_2": 0.4798529284794865, + "AC3_3": 0.40438871468360177, + "AC3_4": 0.3493729496406375, + "AC3_5": 0.30687998039065467, + "AC3_6": 0.2719113346947124, + "AC3_7": 0.2422990232496414 + }, + "prompt_2": { + "overall_acc": 0.4131493506493507, + "language_acc": { + "English": 0.5, + "Vietnamese": 0.4090909090909091, + "Chinese": 0.42613636363636365, + "Indonesian": 0.4034090909090909, + "Filipino": 0.3352272727272727, + "Spanish": 0.4431818181818182, + "Malay": 0.375 + }, + "consistency_score_2": 0.5836038961038961, + "consistency_score_3": 0.415422077922078, + "consistency_score_4": 0.3258116883116883, + "consistency_score_5": 0.2713744588744588, + "consistency_score_6": 0.2353896103896104, + "consistency_score_7": 0.21022727272727273, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.5397727272727273, + "English,Indonesian": 0.5568181818181818, + "English,Filipino": 0.4431818181818182, + "English,Spanish": 0.6477272727272727, + "English,Malay": 0.5340909090909091, + "Vietnamese,Chinese": 0.5454545454545454, + "Vietnamese,Indonesian": 0.6193181818181818, + "Vietnamese,Filipino": 0.5227272727272727, + "Vietnamese,Spanish": 0.6818181818181818, + "Vietnamese,Malay": 0.6761363636363636, + "Chinese,Indonesian": 0.5909090909090909, + "Chinese,Filipino": 0.48863636363636365, + "Chinese,Spanish": 0.5795454545454546, + "Chinese,Malay": 0.6022727272727273, + "Indonesian,Filipino": 0.5511363636363636, + "Indonesian,Spanish": 0.6420454545454546, + "Indonesian,Malay": 0.6875, + "Filipino,Spanish": 0.5227272727272727, + "Filipino,Malay": 0.6306818181818182, + "Spanish,Malay": 0.6079545454545454 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3806818181818182, + "English,Vietnamese,Indonesian": 0.42045454545454547, + "English,Vietnamese,Filipino": 0.3125, + "English,Vietnamese,Spanish": 0.48863636363636365, + "English,Vietnamese,Malay": 0.4431818181818182, + "English,Chinese,Indonesian": 0.3977272727272727, + "English,Chinese,Filipino": 0.30113636363636365, + "English,Chinese,Spanish": 0.4318181818181818, + "English,Chinese,Malay": 0.3806818181818182, + "English,Indonesian,Filipino": 0.3181818181818182, + "English,Indonesian,Spanish": 0.44886363636363635, + "English,Indonesian,Malay": 0.42045454545454547, + "English,Filipino,Spanish": 0.3465909090909091, + "English,Filipino,Malay": 0.3409090909090909, + "English,Spanish,Malay": 0.4318181818181818, + "Vietnamese,Chinese,Indonesian": 0.42613636363636365, + "Vietnamese,Chinese,Filipino": 0.3522727272727273, + "Vietnamese,Chinese,Spanish": 0.4318181818181818, + "Vietnamese,Chinese,Malay": 0.44886363636363635, + "Vietnamese,Indonesian,Filipino": 0.3977272727272727, + "Vietnamese,Indonesian,Spanish": 0.5, + "Vietnamese,Indonesian,Malay": 0.5113636363636364, + "Vietnamese,Filipino,Spanish": 0.3977272727272727, + "Vietnamese,Filipino,Malay": 0.4431818181818182, + "Vietnamese,Spanish,Malay": 0.5056818181818182, + "Chinese,Indonesian,Filipino": 0.38636363636363635, + "Chinese,Indonesian,Spanish": 0.4375, + "Chinese,Indonesian,Malay": 0.4772727272727273, + "Chinese,Filipino,Spanish": 0.35795454545454547, + "Chinese,Filipino,Malay": 0.3977272727272727, + "Chinese,Spanish,Malay": 0.4318181818181818, + "Indonesian,Filipino,Spanish": 0.4034090909090909, + "Indonesian,Filipino,Malay": 0.4659090909090909, + "Indonesian,Spanish,Malay": 0.4943181818181818, + "Filipino,Spanish,Malay": 0.4090909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.32954545454545453, + "English,Vietnamese,Chinese,Filipino": 0.25, + "English,Vietnamese,Chinese,Spanish": 0.3465909090909091, + "English,Vietnamese,Chinese,Malay": 0.3352272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.26136363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.3693181818181818, + "English,Vietnamese,Indonesian,Malay": 0.35795454545454547, + "English,Vietnamese,Filipino,Spanish": 0.2840909090909091, + "English,Vietnamese,Filipino,Malay": 0.2840909090909091, + "English,Vietnamese,Spanish,Malay": 0.3806818181818182, + "English,Chinese,Indonesian,Filipino": 0.26136363636363635, + "English,Chinese,Indonesian,Spanish": 0.3409090909090909, + "English,Chinese,Indonesian,Malay": 0.3352272727272727, + "English,Chinese,Filipino,Spanish": 0.26704545454545453, + "English,Chinese,Filipino,Malay": 0.26136363636363635, + "English,Chinese,Spanish,Malay": 0.3352272727272727, + "English,Indonesian,Filipino,Spanish": 0.2784090909090909, + "English,Indonesian,Filipino,Malay": 0.2840909090909091, + "English,Indonesian,Spanish,Malay": 0.35795454545454547, + "English,Filipino,Spanish,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Filipino": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3693181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.3125, + "Vietnamese,Chinese,Spanish,Malay": 0.375, + "Vietnamese,Indonesian,Filipino,Spanish": 0.3352272727272727, + "Vietnamese,Indonesian,Filipino,Malay": 0.3522727272727273, + "Vietnamese,Indonesian,Spanish,Malay": 0.42045454545454547, + "Vietnamese,Filipino,Spanish,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.3068181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.32954545454545453, + "Chinese,Indonesian,Spanish,Malay": 0.375, + "Chinese,Filipino,Spanish,Malay": 0.3181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.3465909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.30113636363636365, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.3068181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.23863636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.3181818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.26136363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Chinese,Indonesian,Spanish,Malay": 0.30113636363636365, + "English,Chinese,Filipino,Spanish,Malay": 0.24431818181818182, + "English,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.3352272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.30113636363636365, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2784090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.22727272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + } + }, + "AC3_2": 0.48380192685192336, + "AC3_3": 0.41428259725081873, + "AC3_4": 0.36431930871750273, + "AC3_5": 0.3275800779768869, + "AC3_6": 0.29990816438006845, + "AC3_7": 0.2786606296901467 + }, + "prompt_3": { + "overall_acc": 0.40584415584415584, + "language_acc": { + "English": 0.48863636363636365, + "Vietnamese": 0.4034090909090909, + "Chinese": 0.42045454545454547, + "Indonesian": 0.4090909090909091, + "Filipino": 0.32954545454545453, + "Spanish": 0.42045454545454547, + "Malay": 0.3693181818181818 + }, + "consistency_score_2": 0.575487012987013, + "consistency_score_3": 0.40811688311688316, + "consistency_score_4": 0.3181818181818182, + "consistency_score_5": 0.2624458874458875, + "consistency_score_6": 0.2248376623376623, + "consistency_score_7": 0.19886363636363635, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5795454545454546, + "English,Chinese": 0.5625, + "English,Indonesian": 0.5568181818181818, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.6306818181818182, + "English,Malay": 0.5170454545454546, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.6022727272727273, + "Vietnamese,Filipino": 0.5170454545454546, + "Vietnamese,Spanish": 0.6761363636363636, + "Vietnamese,Malay": 0.6534090909090909, + "Chinese,Indonesian": 0.5738636363636364, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.5625, + "Chinese,Malay": 0.5795454545454546, + "Indonesian,Filipino": 0.5511363636363636, + "Indonesian,Spanish": 0.6363636363636364, + "Indonesian,Malay": 0.6761363636363636, + "Filipino,Spanish": 0.5227272727272727, + "Filipino,Malay": 0.625, + "Spanish,Malay": 0.5965909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.38636363636363635, + "English,Vietnamese,Indonesian": 0.42045454545454547, + "English,Vietnamese,Filipino": 0.32386363636363635, + "English,Vietnamese,Spanish": 0.48295454545454547, + "English,Vietnamese,Malay": 0.4318181818181818, + "English,Chinese,Indonesian": 0.4034090909090909, + "English,Chinese,Filipino": 0.3181818181818182, + "English,Chinese,Spanish": 0.4375, + "English,Chinese,Malay": 0.36363636363636365, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.44886363636363635, + "English,Indonesian,Malay": 0.4090909090909091, + "English,Filipino,Spanish": 0.3465909090909091, + "English,Filipino,Malay": 0.3409090909090909, + "English,Spanish,Malay": 0.4090909090909091, + "Vietnamese,Chinese,Indonesian": 0.4147727272727273, + "Vietnamese,Chinese,Filipino": 0.3409090909090909, + "Vietnamese,Chinese,Spanish": 0.4318181818181818, + "Vietnamese,Chinese,Malay": 0.4318181818181818, + "Vietnamese,Indonesian,Filipino": 0.38636363636363635, + "Vietnamese,Indonesian,Spanish": 0.48863636363636365, + "Vietnamese,Indonesian,Malay": 0.4943181818181818, + "Vietnamese,Filipino,Spanish": 0.3977272727272727, + "Vietnamese,Filipino,Malay": 0.42613636363636365, + "Vietnamese,Spanish,Malay": 0.4943181818181818, + "Chinese,Indonesian,Filipino": 0.3693181818181818, + "Chinese,Indonesian,Spanish": 0.4318181818181818, + "Chinese,Indonesian,Malay": 0.45454545454545453, + "Chinese,Filipino,Spanish": 0.3352272727272727, + "Chinese,Filipino,Malay": 0.375, + "Chinese,Spanish,Malay": 0.4090909090909091, + "Indonesian,Filipino,Spanish": 0.3977272727272727, + "Indonesian,Filipino,Malay": 0.4602272727272727, + "Indonesian,Spanish,Malay": 0.48295454545454547, + "Filipino,Spanish,Malay": 0.4090909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "English,Vietnamese,Chinese,Filipino": 0.26136363636363635, + "English,Vietnamese,Chinese,Spanish": 0.3522727272727273, + "English,Vietnamese,Chinese,Malay": 0.32386363636363635, + "English,Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "English,Vietnamese,Indonesian,Spanish": 0.3693181818181818, + "English,Vietnamese,Indonesian,Malay": 0.3465909090909091, + "English,Vietnamese,Filipino,Spanish": 0.2897727272727273, + "English,Vietnamese,Filipino,Malay": 0.2897727272727273, + "English,Vietnamese,Spanish,Malay": 0.36363636363636365, + "English,Chinese,Indonesian,Filipino": 0.26704545454545453, + "English,Chinese,Indonesian,Spanish": 0.3409090909090909, + "English,Chinese,Indonesian,Malay": 0.3125, + "English,Chinese,Filipino,Spanish": 0.2727272727272727, + "English,Chinese,Filipino,Malay": 0.25, + "English,Chinese,Spanish,Malay": 0.3181818181818182, + "English,Indonesian,Filipino,Spanish": 0.2840909090909091, + "English,Indonesian,Filipino,Malay": 0.2727272727272727, + "English,Indonesian,Spanish,Malay": 0.3409090909090909, + "English,Filipino,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Spanish": 0.35795454545454547, + "Vietnamese,Chinese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Filipino,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.3693181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.32386363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish,Malay": 0.4090909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.3465909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.2897727272727273, + "Chinese,Indonesian,Filipino,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.35795454545454547, + "Chinese,Filipino,Spanish,Malay": 0.29545454545454547, + "Indonesian,Filipino,Spanish,Malay": 0.3409090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2784090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Spanish,Malay": 0.29545454545454547, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.3068181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.26136363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Filipino,Malay": 0.2215909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.23295454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2897727272727273, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.26136363636363635 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635 + } + }, + "AC3_2": 0.4760024920830186, + "AC3_3": 0.40697734649655315, + "AC3_4": 0.35670607414559635, + "AC3_5": 0.31876018713354193, + "AC3_6": 0.2893663607490507, + "AC3_7": 0.2669310554775174 + }, + "prompt_4": { + "overall_acc": 0.4099025974025974, + "language_acc": { + "English": 0.48295454545454547, + "Vietnamese": 0.4147727272727273, + "Chinese": 0.42613636363636365, + "Indonesian": 0.4147727272727273, + "Filipino": 0.32954545454545453, + "Spanish": 0.42045454545454547, + "Malay": 0.3806818181818182 + }, + "consistency_score_2": 0.569534632034632, + "consistency_score_3": 0.4021103896103896, + "consistency_score_4": 0.3137987012987014, + "consistency_score_5": 0.2591991341991342, + "consistency_score_6": 0.2215909090909091, + "consistency_score_7": 0.19318181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5511363636363636, + "English,Chinese": 0.5454545454545454, + "English,Indonesian": 0.5625, + "English,Filipino": 0.44886363636363635, + "English,Spanish": 0.6136363636363636, + "English,Malay": 0.48295454545454547, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.5852272727272727, + "Vietnamese,Filipino": 0.5056818181818182, + "Vietnamese,Spanish": 0.6988636363636364, + "Vietnamese,Malay": 0.6420454545454546, + "Chinese,Indonesian": 0.5909090909090909, + "Chinese,Filipino": 0.48863636363636365, + "Chinese,Spanish": 0.5965909090909091, + "Chinese,Malay": 0.5909090909090909, + "Indonesian,Filipino": 0.5625, + "Indonesian,Spanish": 0.6079545454545454, + "Indonesian,Malay": 0.6818181818181818, + "Filipino,Spanish": 0.48295454545454547, + "Filipino,Malay": 0.5965909090909091, + "Spanish,Malay": 0.5852272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3693181818181818, + "English,Vietnamese,Indonesian": 0.4034090909090909, + "English,Vietnamese,Filipino": 0.30113636363636365, + "English,Vietnamese,Spanish": 0.4715909090909091, + "English,Vietnamese,Malay": 0.39204545454545453, + "English,Chinese,Indonesian": 0.4090909090909091, + "English,Chinese,Filipino": 0.3125, + "English,Chinese,Spanish": 0.4375, + "English,Chinese,Malay": 0.35795454545454547, + "English,Indonesian,Filipino": 0.3352272727272727, + "English,Indonesian,Spanish": 0.4431818181818182, + "English,Indonesian,Malay": 0.3977272727272727, + "English,Filipino,Spanish": 0.3409090909090909, + "English,Filipino,Malay": 0.3181818181818182, + "English,Spanish,Malay": 0.39204545454545453, + "Vietnamese,Chinese,Indonesian": 0.42045454545454547, + "Vietnamese,Chinese,Filipino": 0.32386363636363635, + "Vietnamese,Chinese,Spanish": 0.45454545454545453, + "Vietnamese,Chinese,Malay": 0.4318181818181818, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish": 0.48863636363636365, + "Vietnamese,Indonesian,Malay": 0.4772727272727273, + "Vietnamese,Filipino,Spanish": 0.3806818181818182, + "Vietnamese,Filipino,Malay": 0.4090909090909091, + "Vietnamese,Spanish,Malay": 0.48863636363636365, + "Chinese,Indonesian,Filipino": 0.39204545454545453, + "Chinese,Indonesian,Spanish": 0.4375, + "Chinese,Indonesian,Malay": 0.4715909090909091, + "Chinese,Filipino,Spanish": 0.3409090909090909, + "Chinese,Filipino,Malay": 0.38636363636363635, + "Chinese,Spanish,Malay": 0.42045454545454547, + "Indonesian,Filipino,Spanish": 0.3806818181818182, + "Indonesian,Filipino,Malay": 0.4602272727272727, + "Indonesian,Spanish,Malay": 0.48295454545454547, + "Filipino,Spanish,Malay": 0.375 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "English,Vietnamese,Chinese,Filipino": 0.23863636363636365, + "English,Vietnamese,Chinese,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese,Malay": 0.29545454545454547, + "English,Vietnamese,Indonesian,Filipino": 0.25, + "English,Vietnamese,Indonesian,Spanish": 0.3693181818181818, + "English,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "English,Vietnamese,Filipino,Spanish": 0.2840909090909091, + "English,Vietnamese,Filipino,Malay": 0.25, + "English,Vietnamese,Spanish,Malay": 0.3522727272727273, + "English,Chinese,Indonesian,Filipino": 0.2727272727272727, + "English,Chinese,Indonesian,Spanish": 0.3522727272727273, + "English,Chinese,Indonesian,Malay": 0.32386363636363635, + "English,Chinese,Filipino,Spanish": 0.2727272727272727, + "English,Chinese,Filipino,Malay": 0.2556818181818182, + "English,Chinese,Spanish,Malay": 0.3125, + "English,Indonesian,Filipino,Spanish": 0.2784090909090909, + "English,Indonesian,Filipino,Malay": 0.2784090909090909, + "English,Indonesian,Spanish,Malay": 0.3409090909090909, + "English,Filipino,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3693181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Filipino,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Filipino,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Spanish,Malay": 0.375, + "Vietnamese,Indonesian,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,Spanish,Malay": 0.4034090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.32386363636363635, + "Chinese,Indonesian,Filipino,Spanish": 0.3068181818181818, + "Chinese,Indonesian,Filipino,Malay": 0.3409090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.36363636363636365, + "Chinese,Filipino,Spanish,Malay": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Malay": 0.3352272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Spanish,Malay": 0.2784090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.30113636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.24431818181818182, + "English,Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Spanish,Malay": 0.2840909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.23295454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2840909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2784090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.25, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.25 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 + } + }, + "AC3_2": 0.4767099267648461, + "AC3_3": 0.4059691058141524, + "AC3_4": 0.35546959204564216, + "AC3_5": 0.3175792060274967, + "AC3_6": 0.2876694320647523, + "AC3_7": 0.2626024715091302 + }, + "prompt_5": { + "overall_acc": 0.4050324675324676, + "language_acc": { + "English": 0.4715909090909091, + "Vietnamese": 0.4318181818181818, + "Chinese": 0.42613636363636365, + "Indonesian": 0.4147727272727273, + "Filipino": 0.30113636363636365, + "Spanish": 0.4090909090909091, + "Malay": 0.3806818181818182 + }, + "consistency_score_2": 0.593073593073593, + "consistency_score_3": 0.43522727272727263, + "consistency_score_4": 0.35081168831168835, + "consistency_score_5": 0.2970779220779221, + "consistency_score_6": 0.2597402597402597, + "consistency_score_7": 0.23295454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6022727272727273, + "English,Chinese": 0.5681818181818182, + "English,Indonesian": 0.5795454545454546, + "English,Filipino": 0.4943181818181818, + "English,Spanish": 0.6136363636363636, + "English,Malay": 0.5284090909090909, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.6363636363636364, + "Vietnamese,Filipino": 0.5284090909090909, + "Vietnamese,Spanish": 0.6875, + "Vietnamese,Malay": 0.6931818181818182, + "Chinese,Indonesian": 0.6136363636363636, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.6022727272727273, + "Chinese,Malay": 0.6022727272727273, + "Indonesian,Filipino": 0.5568181818181818, + "Indonesian,Spanish": 0.6590909090909091, + "Indonesian,Malay": 0.7329545454545454, + "Filipino,Spanish": 0.5113636363636364, + "Filipino,Malay": 0.5965909090909091, + "Spanish,Malay": 0.6306818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.4034090909090909, + "English,Vietnamese,Indonesian": 0.44886363636363635, + "English,Vietnamese,Filipino": 0.36363636363636365, + "English,Vietnamese,Spanish": 0.48863636363636365, + "English,Vietnamese,Malay": 0.4602272727272727, + "English,Chinese,Indonesian": 0.4375, + "English,Chinese,Filipino": 0.3352272727272727, + "English,Chinese,Spanish": 0.4431818181818182, + "English,Chinese,Malay": 0.3977272727272727, + "English,Indonesian,Filipino": 0.3693181818181818, + "English,Indonesian,Spanish": 0.44886363636363635, + "English,Indonesian,Malay": 0.4602272727272727, + "English,Filipino,Spanish": 0.3693181818181818, + "English,Filipino,Malay": 0.36363636363636365, + "English,Spanish,Malay": 0.4318181818181818, + "Vietnamese,Chinese,Indonesian": 0.45454545454545453, + "Vietnamese,Chinese,Filipino": 0.3522727272727273, + "Vietnamese,Chinese,Spanish": 0.45454545454545453, + "Vietnamese,Chinese,Malay": 0.4602272727272727, + "Vietnamese,Indonesian,Filipino": 0.3977272727272727, + "Vietnamese,Indonesian,Spanish": 0.5340909090909091, + "Vietnamese,Indonesian,Malay": 0.5511363636363636, + "Vietnamese,Filipino,Spanish": 0.4034090909090909, + "Vietnamese,Filipino,Malay": 0.4318181818181818, + "Vietnamese,Spanish,Malay": 0.5340909090909091, + "Chinese,Indonesian,Filipino": 0.3977272727272727, + "Chinese,Indonesian,Spanish": 0.4772727272727273, + "Chinese,Indonesian,Malay": 0.5113636363636364, + "Chinese,Filipino,Spanish": 0.35795454545454547, + "Chinese,Filipino,Malay": 0.39204545454545453, + "Chinese,Spanish,Malay": 0.44886363636363635, + "Indonesian,Filipino,Spanish": 0.4090909090909091, + "Indonesian,Filipino,Malay": 0.48863636363636365, + "Indonesian,Spanish,Malay": 0.5454545454545454, + "Filipino,Spanish,Malay": 0.4090909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.36363636363636365, + "English,Vietnamese,Chinese,Filipino": 0.2840909090909091, + "English,Vietnamese,Chinese,Spanish": 0.35795454545454547, + "English,Vietnamese,Chinese,Malay": 0.3522727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.3125, + "English,Vietnamese,Indonesian,Spanish": 0.38636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.3977272727272727, + "English,Vietnamese,Filipino,Spanish": 0.3181818181818182, + "English,Vietnamese,Filipino,Malay": 0.3181818181818182, + "English,Vietnamese,Spanish,Malay": 0.39204545454545453, + "English,Chinese,Indonesian,Filipino": 0.3125, + "English,Chinese,Indonesian,Spanish": 0.35795454545454547, + "English,Chinese,Indonesian,Malay": 0.3806818181818182, + "English,Chinese,Filipino,Spanish": 0.2840909090909091, + "English,Chinese,Filipino,Malay": 0.29545454545454547, + "English,Chinese,Spanish,Malay": 0.3409090909090909, + "English,Indonesian,Filipino,Spanish": 0.3068181818181818, + "English,Indonesian,Filipino,Malay": 0.32954545454545453, + "English,Indonesian,Spanish,Malay": 0.38636363636363635, + "English,Filipino,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.4034090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.4147727272727273, + "Vietnamese,Chinese,Filipino,Spanish": 0.3125, + "Vietnamese,Chinese,Filipino,Malay": 0.3125, + "Vietnamese,Chinese,Spanish,Malay": 0.3977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish": 0.3465909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,Spanish,Malay": 0.4659090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.3352272727272727, + "Chinese,Indonesian,Filipino,Malay": 0.35795454545454547, + "Chinese,Indonesian,Spanish,Malay": 0.42045454545454547, + "Chinese,Filipino,Spanish,Malay": 0.32954545454545453, + "Indonesian,Filipino,Spanish,Malay": 0.3693181818181818 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.2727272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.3352272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Filipino,Malay": 0.26136363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.3125, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.2784090909090909, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.3465909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.2840909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.26704545454545453, + "English,Chinese,Indonesian,Filipino,Malay": 0.2840909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.26136363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.3693181818181818, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.3125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.3125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.25, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.29545454545454547, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.24431818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.2727272727272727 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + } + }, + "AC3_2": 0.4813397499280115, + "AC3_3": 0.4195873436342011, + "AC3_4": 0.3759773033599061, + "AC3_5": 0.3427557990653627, + "AC3_6": 0.31650888788984777, + "AC3_7": 0.2957870691185692 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.47572815533980584 + }, + "prompt_2": { + "accuracy": 0.4368932038834951 + }, + "prompt_3": { + "accuracy": 0.4563106796116505 + }, + "prompt_4": { + "accuracy": 0.46601941747572817 + }, + "prompt_5": { + "accuracy": 0.4563106796116505 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3142857142857143 + }, + "prompt_2": { + "accuracy": 0.3047619047619048 + }, + "prompt_3": { + "accuracy": 0.3142857142857143 + }, + "prompt_4": { + "accuracy": 0.34285714285714286 + }, + "prompt_5": { + "accuracy": 0.3238095238095238 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5887850467289719 + }, + "prompt_2": { + "accuracy": 0.5981308411214953 + }, + "prompt_3": { + "accuracy": 0.5607476635514018 + }, + "prompt_4": { + "accuracy": 0.4672897196261682 + }, + "prompt_5": { + "accuracy": 0.5607476635514018 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.6, + "culture": 0.7, + "film": 0.4, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.6, + "culture": 0.7, + "film": 0.4, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.2, + "literature": 0.2, + "politics": 0.6, + "culture": 0.6, + "film": 0.4, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.37, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.5, + "culture": 0.6, + "film": 0.4, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.6, + "culture": 0.7, + "film": 0.4, + "law": 0.3, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.24699554333446355 + }, + "prompt_2": { + "bleu_score": 0.24017983365854653 + }, + "prompt_3": { + "bleu_score": 0.24649095240996952 + }, + "prompt_4": { + "bleu_score": 0.2609684053416734 + }, + "prompt_5": { + "bleu_score": 0.22261873469888768 + } }, "indommlu": { "prompt_1": -1, @@ -8510,179 +74439,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.3122212028273276 + }, + "prompt_2": { + "bleu_score": 0.31359510329215484 + }, + "prompt_3": { + "bleu_score": 0.3177929583074654 + }, + "prompt_4": { + "bleu_score": 0.3101882967342109 + }, + "prompt_5": { + "bleu_score": 0.2872573651484855 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2553068264369229 + }, + "prompt_2": { + "bleu_score": 0.24919765395494614 + }, + "prompt_3": { + "bleu_score": 0.253537960642285 + }, + "prompt_4": { + "bleu_score": 0.2523536911193856 + }, + "prompt_5": { + "bleu_score": 0.23682613378283537 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.19407457896674812 + }, + "prompt_2": { + "bleu_score": 0.19383158954603297 + }, + "prompt_3": { + "bleu_score": 0.19674878624216052 + }, + "prompt_4": { + "bleu_score": 0.19217668568036725 + }, + "prompt_5": { + "bleu_score": 0.17968078768009316 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.27961784686946833 + }, + "prompt_2": { + "bleu_score": 0.27552617087944853 + }, + "prompt_3": { + "bleu_score": 0.28298364333983317 + }, + "prompt_4": { + "bleu_score": 0.2750635881653299 + }, + "prompt_5": { + "bleu_score": 0.24901620937495633 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5029171528588098 + }, + "prompt_2": { + "accuracy": 0.5017502917152858 + }, + "prompt_3": { + "accuracy": 0.5005834305717619 + }, + "prompt_4": { + "accuracy": 0.5029171528588098 + }, + "prompt_5": { + "accuracy": 0.4970828471411902 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.487665355738291, + "category_acc": { + "high_school_european_history": 0.6402439024390244, + "business_ethics": 0.45454545454545453, + "clinical_knowledge": 0.5113636363636364, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.6748768472906403, + "high_school_physics": 0.3, + "high_school_world_history": 0.6822033898305084, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.47257383966244726, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.5436893203883495, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.3594306049822064, + "philosophy": 0.5483870967741935, + "professional_medicine": 0.4833948339483395, + "nutrition": 0.49836065573770494, + "global_facts": 0.3333333333333333, + "machine_learning": 0.38738738738738737, + "security_studies": 0.5573770491803278, + "public_relations": 0.6055045871559633, + "professional_psychology": 0.486088379705401, + "prehistory": 0.544891640866873, + "anatomy": 0.5149253731343284, + "human_sexuality": 0.5923076923076923, + "college_medicine": 0.46511627906976744, + "high_school_government_and_politics": 0.65625, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5987654320987654, + "high_school_geography": 0.6142131979695431, + "elementary_mathematics": 0.3156498673740053, + "human_aging": 0.581081081081081, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.6893382352941176, + "formal_logic": 0.4, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.625, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.3504273504273504, + "miscellaneous": 0.6918158567774936, + "high_school_chemistry": 0.35148514851485146, + "marketing": 0.7811158798283262, + "professional_law": 0.3789954337899543, + "management": 0.6274509803921569, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.5794392523364486, + "world_religions": 0.7235294117647059, + "sociology": 0.7, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.4910025706940874, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.5333333333333333, + "electrical_engineering": 0.4930555555555556, + "astronomy": 0.44370860927152317, + "college_biology": 0.5104895104895105 + } + }, + "prompt_2": { + "accuracy": 0.48823739721129783, + "category_acc": { + "high_school_european_history": 0.6402439024390244, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.5265151515151515, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.6896551724137931, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.6694915254237288, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.4472573839662447, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.5598705501618123, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.3594306049822064, + "philosophy": 0.5516129032258065, + "professional_medicine": 0.4870848708487085, + "nutrition": 0.5016393442622951, + "global_facts": 0.3434343434343434, + "machine_learning": 0.36936936936936937, + "security_studies": 0.5450819672131147, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.49427168576104746, + "prehistory": 0.541795665634675, + "anatomy": 0.5373134328358209, + "human_sexuality": 0.5538461538461539, + "college_medicine": 0.48255813953488375, + "high_school_government_and_politics": 0.671875, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6243654822335025, + "elementary_mathematics": 0.29973474801061006, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.6764705882352942, + "formal_logic": 0.328, + "high_school_statistics": 0.3581395348837209, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.32342007434944237, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.3547008547008547, + "miscellaneous": 0.6956521739130435, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.776824034334764, + "professional_law": 0.3842139595564253, + "management": 0.6372549019607843, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.5700934579439252, + "world_religions": 0.7352941176470589, + "sociology": 0.685, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.4832904884318766, + "computer_security": 0.6060606060606061, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.5362318840579711, + "electrical_engineering": 0.4861111111111111, + "astronomy": 0.4768211920529801, + "college_biology": 0.48951048951048953 + } + }, + "prompt_3": { + "accuracy": 0.48802288165892027, + "category_acc": { + "high_school_european_history": 0.6219512195121951, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.5189393939393939, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.6847290640394089, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.6694915254237288, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.45569620253164556, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.5372168284789643, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.35587188612099646, + "philosophy": 0.5516129032258065, + "professional_medicine": 0.4797047970479705, + "nutrition": 0.5081967213114754, + "global_facts": 0.32323232323232326, + "machine_learning": 0.3783783783783784, + "security_studies": 0.5491803278688525, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.5122749590834698, + "prehistory": 0.5479876160990712, + "anatomy": 0.5149253731343284, + "human_sexuality": 0.5769230769230769, + "college_medicine": 0.4476744186046512, + "high_school_government_and_politics": 0.6875, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.6172839506172839, + "high_school_geography": 0.6091370558375635, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.6819852941176471, + "formal_logic": 0.32, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.6333333333333333, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.6879795396419437, + "high_school_chemistry": 0.36633663366336633, + "marketing": 0.7811158798283262, + "professional_law": 0.38290932811480755, + "management": 0.6568627450980392, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.5700934579439252, + "world_religions": 0.7235294117647059, + "sociology": 0.7, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.4781491002570694, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.553623188405797, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.4503311258278146, + "college_biology": 0.4965034965034965 + } + }, + "prompt_4": { + "accuracy": 0.49081158383982837, + "category_acc": { + "high_school_european_history": 0.6280487804878049, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.5303030303030303, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.6896551724137931, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.6864406779661016, + "virology": 0.4, + "high_school_microeconomics": 0.46835443037974683, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.5533980582524272, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.5483870967741935, + "professional_medicine": 0.45387453874538747, + "nutrition": 0.5081967213114754, + "global_facts": 0.3333333333333333, + "machine_learning": 0.36036036036036034, + "security_studies": 0.5737704918032787, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.4877250409165303, + "prehistory": 0.5572755417956656, + "anatomy": 0.5223880597014925, + "human_sexuality": 0.6076923076923076, + "college_medicine": 0.45348837209302323, + "high_school_government_and_politics": 0.6510416666666666, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.6049382716049383, + "high_school_geography": 0.6243654822335025, + "elementary_mathematics": 0.3129973474801061, + "human_aging": 0.5765765765765766, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.6856617647058824, + "formal_logic": 0.368, + "high_school_statistics": 0.39069767441860465, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.3308550185873606, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.6879795396419437, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.7725321888412017, + "professional_law": 0.38486627527723416, + "management": 0.6568627450980392, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.7176470588235294, + "sociology": 0.68, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.4755784061696658, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.5565217391304348, + "electrical_engineering": 0.4791666666666667, + "astronomy": 0.48344370860927155, + "college_biology": 0.4825174825174825 + } + }, + "prompt_5": { + "accuracy": 0.4853771898462639, + "category_acc": { + "high_school_european_history": 0.6341463414634146, + "business_ethics": 0.494949494949495, + "clinical_knowledge": 0.5113636363636364, + "medical_genetics": 0.5050505050505051, + "high_school_us_history": 0.6650246305418719, + "high_school_physics": 0.28, + "high_school_world_history": 0.6694915254237288, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.45569620253164556, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.46464646464646464, + "high_school_biology": 0.5339805825242718, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.5548387096774193, + "professional_medicine": 0.44649446494464945, + "nutrition": 0.5016393442622951, + "global_facts": 0.3434343434343434, + "machine_learning": 0.3333333333333333, + "security_studies": 0.5532786885245902, + "public_relations": 0.6055045871559633, + "professional_psychology": 0.5040916530278232, + "prehistory": 0.541795665634675, + "anatomy": 0.5074626865671642, + "human_sexuality": 0.5769230769230769, + "college_medicine": 0.45348837209302323, + "high_school_government_and_politics": 0.6510416666666666, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.5925925925925926, + "high_school_geography": 0.5786802030456852, + "elementary_mathematics": 0.3156498673740053, + "human_aging": 0.5585585585585585, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6764705882352942, + "formal_logic": 0.352, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.6333333333333333, + "high_school_mathematics": 0.34944237918215615, + "high_school_computer_science": 0.5050505050505051, + "conceptual_physics": 0.358974358974359, + "miscellaneous": 0.6879795396419437, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.7467811158798283, + "professional_law": 0.3776908023483366, + "management": 0.6568627450980392, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.7, + "sociology": 0.695, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.442159383033419, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.5565217391304348, + "electrical_engineering": 0.4930555555555556, + "astronomy": 0.4900662251655629, + "college_biology": 0.5244755244755245 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37667161961367013 + }, + "prompt_2": { + "accuracy": 0.3774145616641902 + }, + "prompt_3": { + "accuracy": 0.37592867756315007 + }, + "prompt_4": { + "accuracy": 0.36701337295690933 + }, + "prompt_5": { + "accuracy": 0.37518573551263 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3823163138231631, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.47619047619047616, + "college_physics": 0.375, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.25, + "college_economics": 0.25, + "business_administration": 0.4473684210526316, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.5, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4444444444444444, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.5, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.375, + "high_school_history": 0.48, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.4230769230769231, + "sports_science": 0.5, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.4074074074074074, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.35185185185185186, + "physician": 0.2222222222222222 + } + }, + "prompt_2": { + "accuracy": 0.38107098381070986, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.47619047619047616, + "college_physics": 0.375, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.31666666666666665, + "business_administration": 0.5263157894736842, + "marxism": 0.5, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.48148148148148145, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.40384615384615385, + "sports_science": 0.5, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.375, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.3888888888888889, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.37982565379825656, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.5238095238095238, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.375, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.31666666666666665, + "business_administration": 0.4473684210526316, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.5, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4444444444444444, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.5, + "high_school_chinese": 0.375, + "high_school_history": 0.48, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4166666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.375, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.3888888888888889, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3333333333333333, + "physician": 0.18518518518518517 + } + }, + "prompt_4": { + "accuracy": 0.3823163138231631, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.4523809523809524, + "college_physics": 0.375, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.31666666666666665, + "business_administration": 0.47368421052631576, + "marxism": 0.5, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.5918367346938775, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.375, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.5185185185185185, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.4230769230769231, + "sports_science": 0.4583333333333333, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.375, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.4444444444444444, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3148148148148148, + "physician": 0.2777777777777778 + } + }, + "prompt_5": { + "accuracy": 0.37484433374844334, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.4523809523809524, + "college_physics": 0.4583333333333333, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.25, + "college_economics": 0.31666666666666665, + "business_administration": 0.47368421052631576, + "marxism": 0.5, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4074074074074074, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.44, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.40384615384615385, + "sports_science": 0.5, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.37037037037037035, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.35185185185185186, + "physician": 0.16666666666666666 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3010752688172043 + }, + "prompt_2": { + "accuracy": 0.3046594982078853 + }, + "prompt_3": { + "accuracy": 0.30824372759856633 + }, + "prompt_4": { + "accuracy": 0.3154121863799283 + }, + "prompt_5": { + "accuracy": 0.3046594982078853 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3597824209981005, + "category_acc": { + "agronomy": 0.30177514792899407, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.2804878048780488, + "arts": 0.38125, + "astronomy": 0.3393939393939394, + "business_ethics": 0.41626794258373206, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.4961832061068702, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.3684210526315789, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.4245810055865922, + "clinical_knowledge": 0.33755274261603374, + "college_actuarial_science": 0.3018867924528302, + "college_education": 0.4766355140186916, + "college_engineering_hydrology": 0.3490566037735849, + "college_law": 0.26851851851851855, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.2857142857142857, + "computer_science": 0.3382352941176471, + "computer_security": 0.36257309941520466, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.33093525179856115, + "economics": 0.3710691823899371, + "education": 0.38650306748466257, + "electrical_engineering": 0.36046511627906974, + "elementary_chinese": 0.25, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.5084033613445378, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.3925925925925926, + "food_science": 0.3986013986013986, + "genetics": 0.3181818181818182, + "global_facts": 0.3422818791946309, + "high_school_biology": 0.31952662721893493, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.4444444444444444, + "international_law": 0.34594594594594597, + "journalism": 0.4476744186046512, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.6261682242990654, + "logical": 0.3333333333333333, + "machine_learning": 0.319672131147541, + "management": 0.4095238095238095, + "marketing": 0.45555555555555555, + "marxist_theory": 0.4497354497354497, + "modern_chinese": 0.3448275862068966, + "nutrition": 0.2827586206896552, + "philosophy": 0.4095238095238095, + "professional_accounting": 0.3942857142857143, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.25, + "professional_psychology": 0.3922413793103448, + "public_relations": 0.40804597701149425, + "security_study": 0.3851851851851852, + "sociology": 0.45132743362831856, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.38461538461538464, + "world_history": 0.4472049689440994, + "world_religions": 0.43125 + } + }, + "prompt_2": { + "accuracy": 0.35891901226040407, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.27439024390243905, + "arts": 0.3875, + "astronomy": 0.3515151515151515, + "business_ethics": 0.4354066985645933, + "chinese_civil_service_exam": 0.3375, + "chinese_driving_rule": 0.549618320610687, + "chinese_food_culture": 0.3161764705882353, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.37770897832817335, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.39664804469273746, + "clinical_knowledge": 0.3459915611814346, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.4766355140186916, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.28703703703703703, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.2967032967032967, + "computer_science": 0.35784313725490197, + "computer_security": 0.3684210526315789, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.35251798561151076, + "economics": 0.33962264150943394, + "education": 0.37423312883435583, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.23809523809523808, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.49159663865546216, + "elementary_mathematics": 0.30434782608695654, + "ethnology": 0.37777777777777777, + "food_science": 0.3776223776223776, + "genetics": 0.3181818181818182, + "global_facts": 0.3624161073825503, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.3728813559322034, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.4603174603174603, + "international_law": 0.35135135135135137, + "journalism": 0.4186046511627907, + "jurisprudence": 0.35523114355231145, + "legal_and_moral_basis": 0.6588785046728972, + "logical": 0.3089430894308943, + "machine_learning": 0.3114754098360656, + "management": 0.4238095238095238, + "marketing": 0.4388888888888889, + "marxist_theory": 0.4497354497354497, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.2413793103448276, + "philosophy": 0.4380952380952381, + "professional_accounting": 0.3657142857142857, + "professional_law": 0.3412322274881517, + "professional_medicine": 0.24202127659574468, + "professional_psychology": 0.375, + "public_relations": 0.43103448275862066, + "security_study": 0.37777777777777777, + "sociology": 0.43805309734513276, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.2918918918918919, + "virology": 0.378698224852071, + "world_history": 0.45962732919254656, + "world_religions": 0.41875 + } + }, + "prompt_3": { + "accuracy": 0.3585736487653255, + "category_acc": { + "agronomy": 0.30177514792899407, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2621951219512195, + "arts": 0.375, + "astronomy": 0.34545454545454546, + "business_ethics": 0.4354066985645933, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.5267175572519084, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.3931888544891641, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.40782122905027934, + "clinical_knowledge": 0.32489451476793246, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.4672897196261682, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.2777777777777778, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.29304029304029305, + "computer_science": 0.3333333333333333, + "computer_security": 0.3508771929824561, + "conceptual_physics": 0.3673469387755102, + "construction_project_management": 0.34532374100719426, + "economics": 0.37735849056603776, + "education": 0.3619631901840491, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.23015873015873015, + "elementary_commonsense": 0.30808080808080807, + "elementary_information_and_technology": 0.5168067226890757, + "elementary_mathematics": 0.3173913043478261, + "ethnology": 0.37037037037037035, + "food_science": 0.3986013986013986, + "genetics": 0.3181818181818182, + "global_facts": 0.3959731543624161, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3559322033898305, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3706293706293706, + "human_sexuality": 0.4603174603174603, + "international_law": 0.372972972972973, + "journalism": 0.43023255813953487, + "jurisprudence": 0.34306569343065696, + "legal_and_moral_basis": 0.6495327102803738, + "logical": 0.3170731707317073, + "machine_learning": 0.3360655737704918, + "management": 0.40476190476190477, + "marketing": 0.4388888888888889, + "marxist_theory": 0.4444444444444444, + "modern_chinese": 0.33620689655172414, + "nutrition": 0.2482758620689655, + "philosophy": 0.4, + "professional_accounting": 0.4, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.2393617021276596, + "professional_psychology": 0.38362068965517243, + "public_relations": 0.42528735632183906, + "security_study": 0.37777777777777777, + "sociology": 0.415929203539823, + "sports_science": 0.3939393939393939, + "traditional_chinese_medicine": 0.3081081081081081, + "virology": 0.3609467455621302, + "world_history": 0.4472049689440994, + "world_religions": 0.43125 + } + }, + "prompt_4": { + "accuracy": 0.35788292177516834, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.20945945945945946, + "ancient_chinese": 0.2865853658536585, + "arts": 0.38125, + "astronomy": 0.3151515151515151, + "business_ethics": 0.41148325358851673, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.5038167938931297, + "chinese_food_culture": 0.33088235294117646, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.3684210526315789, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.3854748603351955, + "clinical_knowledge": 0.35864978902953587, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.4485981308411215, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.28703703703703703, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.3076923076923077, + "computer_science": 0.3431372549019608, + "computer_security": 0.38011695906432746, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.3381294964028777, + "economics": 0.36477987421383645, + "education": 0.36809815950920244, + "electrical_engineering": 0.3953488372093023, + "elementary_chinese": 0.27380952380952384, + "elementary_commonsense": 0.3282828282828283, + "elementary_information_and_technology": 0.5, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.37777777777777777, + "food_science": 0.36363636363636365, + "genetics": 0.2840909090909091, + "global_facts": 0.33557046979865773, + "high_school_biology": 0.33136094674556216, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.3644067796610169, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.4126984126984127, + "international_law": 0.3567567567567568, + "journalism": 0.4186046511627907, + "jurisprudence": 0.35279805352798055, + "legal_and_moral_basis": 0.616822429906542, + "logical": 0.34959349593495936, + "machine_learning": 0.3114754098360656, + "management": 0.4238095238095238, + "marketing": 0.45555555555555555, + "marxist_theory": 0.42328042328042326, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.2620689655172414, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.37714285714285717, + "professional_law": 0.36492890995260663, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.39655172413793105, + "public_relations": 0.41379310344827586, + "security_study": 0.37777777777777777, + "sociology": 0.43805309734513276, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.3081081081081081, + "virology": 0.33727810650887574, + "world_history": 0.4409937888198758, + "world_religions": 0.43125 + } + }, + "prompt_5": { + "accuracy": 0.3586599896390951, + "category_acc": { + "agronomy": 0.3254437869822485, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.2865853658536585, + "arts": 0.3625, + "astronomy": 0.3212121212121212, + "business_ethics": 0.44019138755980863, + "chinese_civil_service_exam": 0.31875, + "chinese_driving_rule": 0.5419847328244275, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.3560371517027864, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.4134078212290503, + "clinical_knowledge": 0.34177215189873417, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.4766355140186916, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.3148148148148148, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.30036630036630035, + "computer_science": 0.3235294117647059, + "computer_security": 0.3742690058479532, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.34532374100719426, + "economics": 0.4025157232704403, + "education": 0.3619631901840491, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.46638655462184875, + "elementary_mathematics": 0.30869565217391304, + "ethnology": 0.34814814814814815, + "food_science": 0.40559440559440557, + "genetics": 0.2897727272727273, + "global_facts": 0.3959731543624161, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.25, + "high_school_physics": 0.3, + "high_school_politics": 0.3776223776223776, + "human_sexuality": 0.3968253968253968, + "international_law": 0.3675675675675676, + "journalism": 0.4186046511627907, + "jurisprudence": 0.35766423357664234, + "legal_and_moral_basis": 0.6588785046728972, + "logical": 0.35772357723577236, + "machine_learning": 0.3442622950819672, + "management": 0.3952380952380952, + "marketing": 0.45, + "marxist_theory": 0.455026455026455, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.3103448275862069, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.37714285714285717, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.2473404255319149, + "professional_psychology": 0.4051724137931034, + "public_relations": 0.41379310344827586, + "security_study": 0.37777777777777777, + "sociology": 0.4247787610619469, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.3027027027027027, + "virology": 0.34911242603550297, + "world_history": 0.4409937888198758, + "world_religions": 0.43125 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.24242424242424243 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.30303030303030304 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4818181818181818 + }, + "prompt_2": { + "accuracy": 0.4727272727272727 + }, + "prompt_3": { + "accuracy": 0.475 + }, + "prompt_4": { + "accuracy": 0.45227272727272727 + }, + "prompt_5": { + "accuracy": 0.43636363636363634 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36542372881355933 + }, + "prompt_2": { + "accuracy": 0.3440677966101695 + }, + "prompt_3": { + "accuracy": 0.35559322033898305 + }, + "prompt_4": { + "accuracy": 0.34779661016949154 + }, + "prompt_5": { + "accuracy": 0.35491525423728815 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6518324607329843 + }, + "prompt_2": { + "accuracy": 0.6473448017950636 + }, + "prompt_3": { + "accuracy": 0.6492146596858639 + }, + "prompt_4": { + "accuracy": 0.6574420344053852 + }, + "prompt_5": { + "accuracy": 0.6492146596858639 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7765801077902988 + }, + "prompt_2": { + "accuracy": 0.766780989710926 + }, + "prompt_3": { + "accuracy": 0.772170504654581 + }, + "prompt_4": { + "accuracy": 0.7765801077902988 + }, + "prompt_5": { + "accuracy": 0.7726604605585498 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.35655117314879275, + "rouge2": 0.1486114782820443, + "rougeL": 0.27565522860423286, + "avg_rouge": 0.26027262667835666 + }, + "prompt_2": { + "rouge1": 0.3872416232292271, + "rouge2": 0.16247215028002124, + "rougeL": 0.30063195283797134, + "avg_rouge": 0.2834485754490732 + }, + "prompt_3": { + "rouge1": 0.38323063603986324, + "rouge2": 0.16178444440387335, + "rougeL": 0.30019626450712755, + "avg_rouge": 0.28173711498362136 + }, + "prompt_4": { + "rouge1": 0.3668446924934507, + "rouge2": 0.15193837903115778, + "rougeL": 0.2819556103322241, + "avg_rouge": 0.2669128939522775 + }, + "prompt_5": { + "rouge1": 0.382766827581556, + "rouge2": 0.15757256811962397, + "rougeL": 0.2967483253746856, + "avg_rouge": 0.27902924035862187 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.22886959391444991, + "rouge2": 0.06524105139359554, + "rougeL": 0.16929310489217744, + "avg_rouge": 0.15446791673340762 + }, + "prompt_2": { + "rouge1": 0.22900247841317076, + "rouge2": 0.06312768747100184, + "rougeL": 0.1698442870172634, + "avg_rouge": 0.15399148430047868 + }, + "prompt_3": { + "rouge1": 0.23187103916013074, + "rouge2": 0.06329366636769138, + "rougeL": 0.17104271505285415, + "avg_rouge": 0.1554024735268921 + }, + "prompt_4": { + "rouge1": 0.23087938182410062, + "rouge2": 0.06333023398819367, + "rougeL": 0.17020882927003325, + "avg_rouge": 0.15480614836077586 + }, + "prompt_5": { + "rouge1": 0.236219549505177, + "rouge2": 0.06617888950226161, + "rougeL": 0.1745597307631992, + "avg_rouge": 0.15898605659021262 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7740825688073395 + }, + "prompt_2": { + "accuracy": 0.8004587155963303 + }, + "prompt_3": { + "accuracy": 0.805045871559633 + }, + "prompt_4": { + "accuracy": 0.7454128440366973 + }, + "prompt_5": { + "accuracy": 0.9162844036697247 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7142857142857143 + }, + "prompt_2": { + "accuracy": 0.7056567593480345 + }, + "prompt_3": { + "accuracy": 0.7018216682646213 + }, + "prompt_4": { + "accuracy": 0.7142857142857143 + }, + "prompt_5": { + "accuracy": 0.713326941514861 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.495 + }, + "prompt_2": { + "accuracy": 0.4745 + }, + "prompt_3": { + "accuracy": 0.5045 + }, + "prompt_4": { + "accuracy": 0.46 + }, + "prompt_5": { + "accuracy": 0.453 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.392 + }, + "prompt_2": { + "accuracy": 0.368 + }, + "prompt_3": { + "accuracy": 0.3975 + }, + "prompt_4": { + "accuracy": 0.4 + }, + "prompt_5": { + "accuracy": 0.372 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5215 + }, + "prompt_2": { + "accuracy": 0.553 + }, + "prompt_3": { + "accuracy": 0.5195 + }, + "prompt_4": { + "accuracy": 0.5315 + }, + "prompt_5": { + "accuracy": 0.5245 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49295774647887325 + }, + "prompt_2": { + "accuracy": 0.43661971830985913 + }, + "prompt_3": { + "accuracy": 0.43661971830985913 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.43661971830985913 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5270758122743683 + }, + "prompt_2": { + "accuracy": 0.5415162454873647 + }, + "prompt_3": { + "accuracy": 0.5306859205776173 + }, + "prompt_4": { + "accuracy": 0.5270758122743683 + }, + "prompt_5": { + "accuracy": 0.5379061371841155 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4632352941176471 + }, + "prompt_2": { + "accuracy": 0.46078431372549017 + }, + "prompt_3": { + "accuracy": 0.4681372549019608 + }, + "prompt_4": { + "accuracy": 0.5220588235294118 + }, + "prompt_5": { + "accuracy": 0.47058823529411764 + } } }, "five_shot": { @@ -8792,53 +75911,1733 @@ "model_link": "https://huggingface.co/lmsys/vicuna-13b-v1.5", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.5228571428571429, + "language_acc": { + "Malay": 0.47333333333333333, + "English": 0.6733333333333333, + "Vietnamese": 0.47333333333333333, + "Spanish": 0.5466666666666666, + "Indonesian": 0.5066666666666667, + "Filipino": 0.46, + "Chinese": 0.5266666666666666 + }, + "consistency_score_2": 0.5926984126984126, + "consistency_score_3": 0.4363809523809524, + "consistency_score_4": 0.35371428571428576, + "consistency_score_5": 0.3022222222222223, + "consistency_score_6": 0.26857142857142857, + "consistency_score_7": 0.24666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5733333333333334, + "Malay,Vietnamese": 0.5933333333333334, + "Malay,Spanish": 0.5666666666666667, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Filipino": 0.5466666666666666, + "Malay,Chinese": 0.4866666666666667, + "English,Vietnamese": 0.6466666666666666, + "English,Spanish": 0.74, + "English,Indonesian": 0.6133333333333333, + "English,Filipino": 0.5466666666666666, + "English,Chinese": 0.6533333333333333, + "Vietnamese,Spanish": 0.62, + "Vietnamese,Indonesian": 0.6333333333333333, + "Vietnamese,Filipino": 0.5533333333333333, + "Vietnamese,Chinese": 0.6, + "Spanish,Indonesian": 0.6733333333333333, + "Spanish,Filipino": 0.52, + "Spanish,Chinese": 0.6266666666666667, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.6, + "Filipino,Chinese": 0.52 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.46, + "Malay,English,Spanish": 0.48, + "Malay,English,Indonesian": 0.44, + "Malay,English,Filipino": 0.38666666666666666, + "Malay,English,Chinese": 0.42, + "Malay,Vietnamese,Spanish": 0.44, + "Malay,Vietnamese,Indonesian": 0.4666666666666667, + "Malay,Vietnamese,Filipino": 0.4, + "Malay,Vietnamese,Chinese": 0.4066666666666667, + "Malay,Spanish,Indonesian": 0.47333333333333333, + "Malay,Spanish,Filipino": 0.36666666666666664, + "Malay,Spanish,Chinese": 0.4066666666666667, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.4, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.5333333333333333, + "English,Vietnamese,Indonesian": 0.49333333333333335, + "English,Vietnamese,Filipino": 0.4066666666666667, + "English,Vietnamese,Chinese": 0.4866666666666667, + "English,Spanish,Indonesian": 0.54, + "English,Spanish,Filipino": 0.44, + "English,Spanish,Chinese": 0.54, + "English,Indonesian,Filipino": 0.3933333333333333, + "English,Indonesian,Chinese": 0.49333333333333335, + "English,Filipino,Chinese": 0.41333333333333333, + "Vietnamese,Spanish,Indonesian": 0.49333333333333335, + "Vietnamese,Spanish,Filipino": 0.38, + "Vietnamese,Spanish,Chinese": 0.4666666666666667, + "Vietnamese,Indonesian,Filipino": 0.4, + "Vietnamese,Indonesian,Chinese": 0.46, + "Vietnamese,Filipino,Chinese": 0.3933333333333333, + "Spanish,Indonesian,Filipino": 0.3933333333333333, + "Spanish,Indonesian,Chinese": 0.5, + "Spanish,Filipino,Chinese": 0.3933333333333333, + "Indonesian,Filipino,Chinese": 0.38666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.41333333333333333, + "Malay,English,Vietnamese,Indonesian": 0.3933333333333333, + "Malay,English,Vietnamese,Filipino": 0.3333333333333333, + "Malay,English,Vietnamese,Chinese": 0.38, + "Malay,English,Spanish,Indonesian": 0.42, + "Malay,English,Spanish,Filipino": 0.34, + "Malay,English,Spanish,Chinese": 0.38, + "Malay,English,Indonesian,Filipino": 0.31333333333333335, + "Malay,English,Indonesian,Chinese": 0.36, + "Malay,English,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Indonesian": 0.38666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.36, + "Malay,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.36, + "Malay,Spanish,Filipino,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.3333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.41333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.4, + "English,Vietnamese,Filipino,Chinese": 0.34, + "English,Spanish,Indonesian,Filipino": 0.35333333333333333, + "English,Spanish,Indonesian,Chinese": 0.4533333333333333, + "English,Spanish,Filipino,Chinese": 0.3466666666666667, + "English,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.4066666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.3333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.3, + "Malay,English,Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.3, + "Malay,English,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.28, + "Malay,English,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.32, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.36666666666666664, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + } + }, + "AC3_2": 0.5555915114551965, + "AC3_3": 0.47572109378905386, + "AC3_4": 0.4219668466644711, + "AC3_5": 0.3830396305807398, + "AC3_6": 0.3548633315693958, + "AC3_7": 0.33519801975842106 + }, + "prompt_2": { + "overall_acc": 0.5161904761904762, + "language_acc": { + "Malay": 0.4533333333333333, + "English": 0.6733333333333333, + "Vietnamese": 0.48, + "Spanish": 0.52, + "Indonesian": 0.5066666666666667, + "Filipino": 0.4533333333333333, + "Chinese": 0.5266666666666666 + }, + "consistency_score_2": 0.5860317460317459, + "consistency_score_3": 0.42400000000000004, + "consistency_score_4": 0.33352380952380956, + "consistency_score_5": 0.273968253968254, + "consistency_score_6": 0.23142857142857148, + "consistency_score_7": 0.2, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.54, + "Malay,Vietnamese": 0.58, + "Malay,Spanish": 0.5533333333333333, + "Malay,Indonesian": 0.6266666666666667, + "Malay,Filipino": 0.5333333333333333, + "Malay,Chinese": 0.4866666666666667, + "English,Vietnamese": 0.6333333333333333, + "English,Spanish": 0.68, + "English,Indonesian": 0.6066666666666667, + "English,Filipino": 0.5333333333333333, + "English,Chinese": 0.6266666666666667, + "Vietnamese,Spanish": 0.6333333333333333, + "Vietnamese,Indonesian": 0.6866666666666666, + "Vietnamese,Filipino": 0.54, + "Vietnamese,Chinese": 0.6133333333333333, + "Spanish,Indonesian": 0.6666666666666666, + "Spanish,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.5866666666666667, + "Indonesian,Filipino": 0.5133333333333333, + "Indonesian,Chinese": 0.6, + "Filipino,Chinese": 0.54 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.42, + "Malay,English,Spanish": 0.43333333333333335, + "Malay,English,Indonesian": 0.41333333333333333, + "Malay,English,Filipino": 0.35333333333333333, + "Malay,English,Chinese": 0.37333333333333335, + "Malay,Vietnamese,Spanish": 0.43333333333333335, + "Malay,Vietnamese,Indonesian": 0.49333333333333335, + "Malay,Vietnamese,Filipino": 0.38666666666666666, + "Malay,Vietnamese,Chinese": 0.4066666666666667, + "Malay,Spanish,Indonesian": 0.46, + "Malay,Spanish,Filipino": 0.35333333333333333, + "Malay,Spanish,Chinese": 0.36666666666666664, + "Malay,Indonesian,Filipino": 0.38666666666666666, + "Malay,Indonesian,Chinese": 0.4, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.5066666666666667, + "English,Vietnamese,Indonesian": 0.49333333333333335, + "English,Vietnamese,Filipino": 0.3933333333333333, + "English,Vietnamese,Chinese": 0.47333333333333333, + "English,Spanish,Indonesian": 0.5066666666666667, + "English,Spanish,Filipino": 0.4266666666666667, + "English,Spanish,Chinese": 0.47333333333333333, + "English,Indonesian,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese": 0.46, + "English,Filipino,Chinese": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian": 0.5266666666666666, + "Vietnamese,Spanish,Filipino": 0.3933333333333333, + "Vietnamese,Spanish,Chinese": 0.4533333333333333, + "Vietnamese,Indonesian,Filipino": 0.4, + "Vietnamese,Indonesian,Chinese": 0.48, + "Vietnamese,Filipino,Chinese": 0.41333333333333333, + "Spanish,Indonesian,Filipino": 0.4, + "Spanish,Indonesian,Chinese": 0.47333333333333333, + "Spanish,Filipino,Chinese": 0.38, + "Indonesian,Filipino,Chinese": 0.38666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.36, + "Malay,English,Vietnamese,Indonesian": 0.38, + "Malay,English,Vietnamese,Filipino": 0.29333333333333333, + "Malay,English,Vietnamese,Chinese": 0.34, + "Malay,English,Spanish,Indonesian": 0.38, + "Malay,English,Spanish,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Chinese": 0.30666666666666664, + "Malay,English,Indonesian,Filipino": 0.2733333333333333, + "Malay,English,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.41333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.3, + "Malay,Vietnamese,Spanish,Chinese": 0.34, + "Malay,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Malay,Vietnamese,Filipino,Chinese": 0.31333333333333335, + "Malay,Spanish,Indonesian,Filipino": 0.3, + "Malay,Spanish,Indonesian,Chinese": 0.34, + "Malay,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.42, + "English,Vietnamese,Spanish,Filipino": 0.32, + "English,Vietnamese,Spanish,Chinese": 0.38666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "English,Vietnamese,Indonesian,Chinese": 0.38, + "English,Vietnamese,Filipino,Chinese": 0.32666666666666666, + "English,Spanish,Indonesian,Filipino": 0.3333333333333333, + "English,Spanish,Indonesian,Chinese": 0.4066666666666667, + "English,Spanish,Filipino,Chinese": 0.32, + "English,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.4066666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.3, + "Malay,English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.22, + "Malay,English,Indonesian,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.34, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "English,Spanish,Indonesian,Filipino,Chinese": 0.28, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.28 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.28, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2 + } + }, + "AC3_2": 0.5488983980191056, + "AC3_3": 0.4655753646182279, + "AC3_4": 0.40522283062235054, + "AC3_5": 0.35795289746565156, + "AC3_6": 0.3195777979554342, + "AC3_7": 0.2882978723001711 + }, + "prompt_3": { + "overall_acc": 0.5009523809523809, + "language_acc": { + "Malay": 0.43333333333333335, + "English": 0.6533333333333333, + "Vietnamese": 0.4666666666666667, + "Spanish": 0.52, + "Indonesian": 0.49333333333333335, + "Filipino": 0.43333333333333335, + "Chinese": 0.5066666666666667 + }, + "consistency_score_2": 0.5904761904761905, + "consistency_score_3": 0.42780952380952386, + "consistency_score_4": 0.33638095238095245, + "consistency_score_5": 0.27714285714285714, + "consistency_score_6": 0.23619047619047623, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.56, + "Malay,Vietnamese": 0.6066666666666667, + "Malay,Spanish": 0.58, + "Malay,Indonesian": 0.6266666666666667, + "Malay,Filipino": 0.5, + "Malay,Chinese": 0.4866666666666667, + "English,Vietnamese": 0.64, + "English,Spanish": 0.7266666666666667, + "English,Indonesian": 0.62, + "English,Filipino": 0.5333333333333333, + "English,Chinese": 0.6466666666666666, + "Vietnamese,Spanish": 0.6466666666666666, + "Vietnamese,Indonesian": 0.6666666666666666, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Chinese": 0.6, + "Spanish,Indonesian": 0.6933333333333334, + "Spanish,Filipino": 0.54, + "Spanish,Chinese": 0.6, + "Indonesian,Filipino": 0.49333333333333335, + "Indonesian,Chinese": 0.5933333333333334, + "Filipino,Chinese": 0.5066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.44, + "Malay,English,Spanish": 0.4666666666666667, + "Malay,English,Indonesian": 0.43333333333333335, + "Malay,English,Filipino": 0.34, + "Malay,English,Chinese": 0.4, + "Malay,Vietnamese,Spanish": 0.46, + "Malay,Vietnamese,Indonesian": 0.4866666666666667, + "Malay,Vietnamese,Filipino": 0.38, + "Malay,Vietnamese,Chinese": 0.4, + "Malay,Spanish,Indonesian": 0.48, + "Malay,Spanish,Filipino": 0.36, + "Malay,Spanish,Chinese": 0.4, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.4066666666666667, + "Malay,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish": 0.5266666666666666, + "English,Vietnamese,Indonesian": 0.49333333333333335, + "English,Vietnamese,Filipino": 0.38, + "English,Vietnamese,Chinese": 0.48, + "English,Spanish,Indonesian": 0.54, + "English,Spanish,Filipino": 0.43333333333333335, + "English,Spanish,Chinese": 0.52, + "English,Indonesian,Filipino": 0.36, + "English,Indonesian,Chinese": 0.4866666666666667, + "English,Filipino,Chinese": 0.3933333333333333, + "Vietnamese,Spanish,Indonesian": 0.5266666666666666, + "Vietnamese,Spanish,Filipino": 0.38666666666666666, + "Vietnamese,Spanish,Chinese": 0.44666666666666666, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.4666666666666667, + "Vietnamese,Filipino,Chinese": 0.38, + "Spanish,Indonesian,Filipino": 0.4066666666666667, + "Spanish,Indonesian,Chinese": 0.4866666666666667, + "Spanish,Filipino,Chinese": 0.37333333333333335, + "Indonesian,Filipino,Chinese": 0.36666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.3933333333333333, + "Malay,English,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,English,Vietnamese,Filipino": 0.2866666666666667, + "Malay,English,Vietnamese,Chinese": 0.35333333333333333, + "Malay,English,Spanish,Indonesian": 0.4066666666666667, + "Malay,English,Spanish,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Chinese": 0.36, + "Malay,English,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Indonesian,Chinese": 0.35333333333333333, + "Malay,English,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian": 0.41333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.3466666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.32, + "Malay,Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.28, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.36, + "Malay,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.31333333333333335, + "English,Vietnamese,Spanish,Chinese": 0.4, + "English,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.3933333333333333, + "English,Vietnamese,Filipino,Chinese": 0.30666666666666664, + "English,Spanish,Indonesian,Filipino": 0.34, + "English,Spanish,Indonesian,Chinese": 0.43333333333333335, + "English,Spanish,Filipino,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.4, + "Vietnamese,Spanish,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.32 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26, + "Malay,English,Vietnamese,Spanish,Chinese": 0.32, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.26, + "Malay,English,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.32, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.28, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.29333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.22, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + } + }, + "AC3_2": 0.5420427158150418, + "AC3_3": 0.4615008398268678, + "AC3_4": 0.4024940421547999, + "AC3_5": 0.35685959079169577, + "AC3_6": 0.3210237479569425, + "AC3_7": 0.2926155226145474 + }, + "prompt_4": { + "overall_acc": 0.5266666666666666, + "language_acc": { + "Malay": 0.44, + "English": 0.6733333333333333, + "Vietnamese": 0.4866666666666667, + "Spanish": 0.56, + "Indonesian": 0.5266666666666666, + "Filipino": 0.4666666666666667, + "Chinese": 0.5333333333333333 + }, + "consistency_score_2": 0.594920634920635, + "consistency_score_3": 0.4384761904761904, + "consistency_score_4": 0.35561904761904756, + "consistency_score_5": 0.30158730158730157, + "consistency_score_6": 0.26285714285714284, + "consistency_score_7": 0.23333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.56, + "Malay,Vietnamese": 0.56, + "Malay,Spanish": 0.5733333333333334, + "Malay,Indonesian": 0.64, + "Malay,Filipino": 0.54, + "Malay,Chinese": 0.52, + "English,Vietnamese": 0.64, + "English,Spanish": 0.7333333333333333, + "English,Indonesian": 0.6333333333333333, + "English,Filipino": 0.5533333333333333, + "English,Chinese": 0.6333333333333333, + "Vietnamese,Spanish": 0.62, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Filipino": 0.5466666666666666, + "Vietnamese,Chinese": 0.6333333333333333, + "Spanish,Indonesian": 0.7, + "Spanish,Filipino": 0.54, + "Spanish,Chinese": 0.6133333333333333, + "Indonesian,Filipino": 0.5133333333333333, + "Indonesian,Chinese": 0.5933333333333334, + "Filipino,Chinese": 0.52 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.4533333333333333, + "Malay,English,Spanish": 0.47333333333333333, + "Malay,English,Indonesian": 0.44666666666666666, + "Malay,English,Filipino": 0.38666666666666666, + "Malay,English,Chinese": 0.42, + "Malay,Vietnamese,Spanish": 0.43333333333333335, + "Malay,Vietnamese,Indonesian": 0.46, + "Malay,Vietnamese,Filipino": 0.38666666666666666, + "Malay,Vietnamese,Chinese": 0.42, + "Malay,Spanish,Indonesian": 0.48, + "Malay,Spanish,Filipino": 0.38, + "Malay,Spanish,Chinese": 0.41333333333333333, + "Malay,Indonesian,Filipino": 0.38666666666666666, + "Malay,Indonesian,Chinese": 0.41333333333333333, + "Malay,Filipino,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish": 0.5266666666666666, + "English,Vietnamese,Indonesian": 0.5, + "English,Vietnamese,Filipino": 0.4, + "English,Vietnamese,Chinese": 0.4866666666666667, + "English,Spanish,Indonesian": 0.56, + "English,Spanish,Filipino": 0.44666666666666666, + "English,Spanish,Chinese": 0.52, + "English,Indonesian,Filipino": 0.3933333333333333, + "English,Indonesian,Chinese": 0.4866666666666667, + "English,Filipino,Chinese": 0.3933333333333333, + "Vietnamese,Spanish,Indonesian": 0.5066666666666667, + "Vietnamese,Spanish,Filipino": 0.4, + "Vietnamese,Spanish,Chinese": 0.4666666666666667, + "Vietnamese,Indonesian,Filipino": 0.4, + "Vietnamese,Indonesian,Chinese": 0.47333333333333333, + "Vietnamese,Filipino,Chinese": 0.4, + "Spanish,Indonesian,Filipino": 0.42, + "Spanish,Indonesian,Chinese": 0.5066666666666667, + "Spanish,Filipino,Chinese": 0.38, + "Indonesian,Filipino,Chinese": 0.37333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.4066666666666667, + "Malay,English,Vietnamese,Indonesian": 0.3933333333333333, + "Malay,English,Vietnamese,Filipino": 0.34, + "Malay,English,Vietnamese,Chinese": 0.37333333333333335, + "Malay,English,Spanish,Indonesian": 0.42, + "Malay,English,Spanish,Filipino": 0.34, + "Malay,English,Spanish,Chinese": 0.36666666666666664, + "Malay,English,Indonesian,Filipino": 0.30666666666666664, + "Malay,English,Indonesian,Chinese": 0.36, + "Malay,English,Filipino,Chinese": 0.3, + "Malay,Vietnamese,Spanish,Indonesian": 0.4, + "Malay,Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.36, + "Malay,Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Malay,Vietnamese,Filipino,Chinese": 0.32, + "Malay,Spanish,Indonesian,Filipino": 0.32, + "Malay,Spanish,Indonesian,Chinese": 0.37333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.44666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.3466666666666667, + "English,Vietnamese,Spanish,Chinese": 0.41333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.32, + "English,Vietnamese,Indonesian,Chinese": 0.4066666666666667, + "English,Vietnamese,Filipino,Chinese": 0.3333333333333333, + "English,Spanish,Indonesian,Filipino": 0.36, + "English,Spanish,Indonesian,Chinese": 0.4533333333333333, + "English,Spanish,Filipino,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,Filipino": 0.34, + "Vietnamese,Spanish,Indonesian,Chinese": 0.42, + "Vietnamese,Spanish,Filipino,Chinese": 0.32, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.30666666666666664, + "Malay,English,Vietnamese,Spanish,Chinese": 0.3333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.26, + "Malay,English,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.34, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.3, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.38, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.28, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "English,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + } + }, + "AC3_2": 0.5587168600313248, + "AC3_3": 0.47854220112139295, + "AC3_4": 0.4245624639703018, + "AC3_5": 0.3835441420260062, + "AC3_6": 0.35068757534762096, + "AC3_7": 0.3233918128229455 + }, + "prompt_5": { + "overall_acc": 0.5085714285714286, + "language_acc": { + "Malay": 0.41333333333333333, + "English": 0.6533333333333333, + "Vietnamese": 0.4666666666666667, + "Spanish": 0.5333333333333333, + "Indonesian": 0.4866666666666667, + "Filipino": 0.49333333333333335, + "Chinese": 0.5133333333333333 + }, + "consistency_score_2": 0.5917460317460317, + "consistency_score_3": 0.4316190476190475, + "consistency_score_4": 0.3449523809523809, + "consistency_score_5": 0.2882539682539683, + "consistency_score_6": 0.24666666666666665, + "consistency_score_7": 0.21333333333333335, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5333333333333333, + "Malay,Vietnamese": 0.5666666666666667, + "Malay,Spanish": 0.58, + "Malay,Indonesian": 0.64, + "Malay,Filipino": 0.5066666666666667, + "Malay,Chinese": 0.52, + "English,Vietnamese": 0.6133333333333333, + "English,Spanish": 0.7133333333333334, + "English,Indonesian": 0.64, + "English,Filipino": 0.58, + "English,Chinese": 0.6266666666666667, + "Vietnamese,Spanish": 0.6, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Filipino": 0.5666666666666667, + "Vietnamese,Chinese": 0.6333333333333333, + "Spanish,Indonesian": 0.6466666666666666, + "Spanish,Filipino": 0.56, + "Spanish,Chinese": 0.62, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.5933333333333334, + "Filipino,Chinese": 0.5533333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.43333333333333335, + "Malay,English,Spanish": 0.46, + "Malay,English,Indonesian": 0.44, + "Malay,English,Filipino": 0.37333333333333335, + "Malay,English,Chinese": 0.41333333333333333, + "Malay,Vietnamese,Spanish": 0.42, + "Malay,Vietnamese,Indonesian": 0.44666666666666666, + "Malay,Vietnamese,Filipino": 0.38, + "Malay,Vietnamese,Chinese": 0.41333333333333333, + "Malay,Spanish,Indonesian": 0.4666666666666667, + "Malay,Spanish,Filipino": 0.36666666666666664, + "Malay,Spanish,Chinese": 0.41333333333333333, + "Malay,Indonesian,Filipino": 0.36, + "Malay,Indonesian,Chinese": 0.42, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.5066666666666667, + "English,Vietnamese,Indonesian": 0.49333333333333335, + "English,Vietnamese,Filipino": 0.41333333333333333, + "English,Vietnamese,Chinese": 0.47333333333333333, + "English,Spanish,Indonesian": 0.52, + "English,Spanish,Filipino": 0.4666666666666667, + "English,Spanish,Chinese": 0.52, + "English,Indonesian,Filipino": 0.3933333333333333, + "English,Indonesian,Chinese": 0.4866666666666667, + "English,Filipino,Chinese": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish,Filipino": 0.4, + "Vietnamese,Spanish,Chinese": 0.4533333333333333, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.46, + "Vietnamese,Filipino,Chinese": 0.42, + "Spanish,Indonesian,Filipino": 0.3933333333333333, + "Spanish,Indonesian,Chinese": 0.47333333333333333, + "Spanish,Filipino,Chinese": 0.4066666666666667, + "Indonesian,Filipino,Chinese": 0.38666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.38, + "Malay,English,Vietnamese,Indonesian": 0.38666666666666666, + "Malay,English,Vietnamese,Filipino": 0.31333333333333335, + "Malay,English,Vietnamese,Chinese": 0.36, + "Malay,English,Spanish,Indonesian": 0.4, + "Malay,English,Spanish,Filipino": 0.32666666666666666, + "Malay,English,Spanish,Chinese": 0.36, + "Malay,English,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Indonesian,Chinese": 0.35333333333333333, + "Malay,English,Filipino,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.38, + "Malay,Vietnamese,Spanish,Filipino": 0.3, + "Malay,Vietnamese,Spanish,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Malay,Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "Malay,Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Malay,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.36666666666666664, + "Malay,Spanish,Filipino,Chinese": 0.28, + "Malay,Indonesian,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.42, + "English,Vietnamese,Spanish,Filipino": 0.3466666666666667, + "English,Vietnamese,Spanish,Chinese": 0.41333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.32, + "English,Vietnamese,Indonesian,Chinese": 0.38666666666666666, + "English,Vietnamese,Filipino,Chinese": 0.35333333333333333, + "English,Spanish,Indonesian,Filipino": 0.3466666666666667, + "English,Spanish,Indonesian,Chinese": 0.44, + "English,Spanish,Filipino,Chinese": 0.36, + "English,Indonesian,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.32, + "Vietnamese,Spanish,Indonesian,Chinese": 0.38666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.32, + "Spanish,Indonesian,Filipino,Chinese": 0.32 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.35333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.32666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.31333333333333335, + "Malay,English,Vietnamese,Filipino,Chinese": 0.2733333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.3333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Indonesian,Filipino,Chinese": 0.24, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.36, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.3, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.3, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + } + }, + "AC3_2": 0.547015085268891, + "AC3_3": 0.46694605227726244, + "AC3_4": 0.4110791595781963, + "AC3_5": 0.3679544677967366, + "AC3_6": 0.3322068095398717, + "AC3_7": 0.3005804748923998 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4131493506493506, + "language_acc": { + "English": 0.5056818181818182, + "Vietnamese": 0.38636363636363635, + "Chinese": 0.44886363636363635, + "Indonesian": 0.42045454545454547, + "Filipino": 0.3068181818181818, + "Spanish": 0.4659090909090909, + "Malay": 0.35795454545454547 + }, + "consistency_score_2": 0.5384199134199135, + "consistency_score_3": 0.35633116883116883, + "consistency_score_4": 0.25876623376623376, + "consistency_score_5": 0.19778138528138528, + "consistency_score_6": 0.15584415584415584, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5454545454545454, + "English,Chinese": 0.6193181818181818, + "English,Indonesian": 0.5340909090909091, + "English,Filipino": 0.42045454545454547, + "English,Spanish": 0.6931818181818182, + "English,Malay": 0.5113636363636364, + "Vietnamese,Chinese": 0.5227272727272727, + "Vietnamese,Indonesian": 0.5454545454545454, + "Vietnamese,Filipino": 0.4659090909090909, + "Vietnamese,Spanish": 0.6136363636363636, + "Vietnamese,Malay": 0.5227272727272727, + "Chinese,Indonesian": 0.5170454545454546, + "Chinese,Filipino": 0.4602272727272727, + "Chinese,Spanish": 0.6136363636363636, + "Chinese,Malay": 0.5227272727272727, + "Indonesian,Filipino": 0.5284090909090909, + "Indonesian,Spanish": 0.6136363636363636, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.48863636363636365, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.5056818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.39204545454545453, + "English,Vietnamese,Indonesian": 0.3409090909090909, + "English,Vietnamese,Filipino": 0.26704545454545453, + "English,Vietnamese,Spanish": 0.4659090909090909, + "English,Vietnamese,Malay": 0.3409090909090909, + "English,Chinese,Indonesian": 0.3806818181818182, + "English,Chinese,Filipino": 0.3125, + "English,Chinese,Spanish": 0.4943181818181818, + "English,Chinese,Malay": 0.36363636363636365, + "English,Indonesian,Filipino": 0.3068181818181818, + "English,Indonesian,Spanish": 0.4431818181818182, + "English,Indonesian,Malay": 0.3522727272727273, + "English,Filipino,Spanish": 0.36363636363636365, + "English,Filipino,Malay": 0.29545454545454547, + "English,Spanish,Malay": 0.38636363636363635, + "Vietnamese,Chinese,Indonesian": 0.32954545454545453, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.4090909090909091, + "Vietnamese,Chinese,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,Filipino": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish": 0.42045454545454547, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish": 0.3352272727272727, + "Vietnamese,Filipino,Malay": 0.3068181818181818, + "Vietnamese,Spanish,Malay": 0.35795454545454547, + "Chinese,Indonesian,Filipino": 0.3068181818181818, + "Chinese,Indonesian,Spanish": 0.4034090909090909, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Filipino,Spanish": 0.3352272727272727, + "Chinese,Filipino,Malay": 0.3181818181818182, + "Chinese,Spanish,Malay": 0.38636363636363635, + "Indonesian,Filipino,Spanish": 0.35795454545454547, + "Indonesian,Filipino,Malay": 0.3465909090909091, + "Indonesian,Spanish,Malay": 0.375, + "Filipino,Spanish,Malay": 0.3181818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Chinese,Filipino": 0.20454545454545456, + "English,Vietnamese,Chinese,Spanish": 0.35795454545454547, + "English,Vietnamese,Chinese,Malay": 0.2727272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.3068181818181818, + "English,Vietnamese,Indonesian,Malay": 0.23863636363636365, + "English,Vietnamese,Filipino,Spanish": 0.24431818181818182, + "English,Vietnamese,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Spanish,Malay": 0.2897727272727273, + "English,Chinese,Indonesian,Filipino": 0.2215909090909091, + "English,Chinese,Indonesian,Spanish": 0.3352272727272727, + "English,Chinese,Indonesian,Malay": 0.2727272727272727, + "English,Chinese,Filipino,Spanish": 0.2784090909090909, + "English,Chinese,Filipino,Malay": 0.23863636363636365, + "English,Chinese,Spanish,Malay": 0.3125, + "English,Indonesian,Filipino,Spanish": 0.26136363636363635, + "English,Indonesian,Filipino,Malay": 0.24431818181818182, + "English,Indonesian,Spanish,Malay": 0.2897727272727273, + "English,Filipino,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Filipino,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino,Malay": 0.24431818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Filipino,Spanish,Malay": 0.24431818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Filipino,Malay": 0.23295454545454544, + "Chinese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Chinese,Filipino,Spanish,Malay": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2556818181818182 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Chinese,Spanish,Malay": 0.24431818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.1875, + "English,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Chinese,Filipino,Spanish,Malay": 0.2159090909090909, + "English,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + } + }, + "AC3_2": 0.4675389296023921, + "AC3_3": 0.3826425420073716, + "AC3_4": 0.31822182403336674, + "AC3_5": 0.26750414107638665, + "AC3_6": 0.22631861714902737, + "AC3_7": 0.19193061836554168 + }, + "prompt_2": { + "overall_acc": 0.422077922077922, + "language_acc": { + "English": 0.5227272727272727, + "Vietnamese": 0.3977272727272727, + "Chinese": 0.44886363636363635, + "Indonesian": 0.4147727272727273, + "Filipino": 0.32954545454545453, + "Spanish": 0.45454545454545453, + "Malay": 0.38636363636363635 + }, + "consistency_score_2": 0.5451839826839827, + "consistency_score_3": 0.3620129870129871, + "consistency_score_4": 0.2626623376623377, + "consistency_score_5": 0.20075757575757572, + "consistency_score_6": 0.1590909090909091, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5965909090909091, + "English,Chinese": 0.5965909090909091, + "English,Indonesian": 0.5340909090909091, + "English,Filipino": 0.4375, + "English,Spanish": 0.7045454545454546, + "English,Malay": 0.5113636363636364, + "Vietnamese,Chinese": 0.5511363636363636, + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,Spanish": 0.5965909090909091, + "Vietnamese,Malay": 0.5284090909090909, + "Chinese,Indonesian": 0.5170454545454546, + "Chinese,Filipino": 0.4375, + "Chinese,Spanish": 0.5681818181818182, + "Chinese,Malay": 0.5227272727272727, + "Indonesian,Filipino": 0.5568181818181818, + "Indonesian,Spanish": 0.6079545454545454, + "Indonesian,Malay": 0.6363636363636364, + "Filipino,Spanish": 0.48863636363636365, + "Filipino,Malay": 0.5227272727272727, + "Spanish,Malay": 0.5056818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.4090909090909091, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Filipino": 0.30113636363636365, + "English,Vietnamese,Spanish": 0.4715909090909091, + "English,Vietnamese,Malay": 0.36363636363636365, + "English,Chinese,Indonesian": 0.36363636363636365, + "English,Chinese,Filipino": 0.29545454545454547, + "English,Chinese,Spanish": 0.4602272727272727, + "English,Chinese,Malay": 0.35795454545454547, + "English,Indonesian,Filipino": 0.3181818181818182, + "English,Indonesian,Spanish": 0.4375, + "English,Indonesian,Malay": 0.3806818181818182, + "English,Filipino,Spanish": 0.3522727272727273, + "English,Filipino,Malay": 0.3181818181818182, + "English,Spanish,Malay": 0.38636363636363635, + "Vietnamese,Chinese,Indonesian": 0.35795454545454547, + "Vietnamese,Chinese,Filipino": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.39204545454545453, + "Vietnamese,Chinese,Malay": 0.3522727272727273, + "Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,Spanish": 0.4147727272727273, + "Vietnamese,Indonesian,Malay": 0.38636363636363635, + "Vietnamese,Filipino,Spanish": 0.32386363636363635, + "Vietnamese,Filipino,Malay": 0.32386363636363635, + "Vietnamese,Spanish,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.375, + "Chinese,Indonesian,Malay": 0.38636363636363635, + "Chinese,Filipino,Spanish": 0.29545454545454547, + "Chinese,Filipino,Malay": 0.3352272727272727, + "Chinese,Spanish,Malay": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.3806818181818182, + "Indonesian,Filipino,Malay": 0.39204545454545453, + "Indonesian,Spanish,Malay": 0.3977272727272727, + "Filipino,Spanish,Malay": 0.32386363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2840909090909091, + "English,Vietnamese,Chinese,Filipino": 0.21022727272727273, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Chinese,Malay": 0.2727272727272727, + "English,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "English,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish": 0.25, + "English,Vietnamese,Filipino,Malay": 0.23295454545454544, + "English,Vietnamese,Spanish,Malay": 0.2840909090909091, + "English,Chinese,Indonesian,Filipino": 0.2215909090909091, + "English,Chinese,Indonesian,Spanish": 0.3068181818181818, + "English,Chinese,Indonesian,Malay": 0.2784090909090909, + "English,Chinese,Filipino,Spanish": 0.23863636363636365, + "English,Chinese,Filipino,Malay": 0.2556818181818182, + "English,Chinese,Spanish,Malay": 0.2784090909090909, + "English,Indonesian,Filipino,Spanish": 0.26704545454545453, + "English,Indonesian,Filipino,Malay": 0.26136363636363635, + "English,Indonesian,Spanish,Malay": 0.3181818181818182, + "English,Filipino,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Filipino,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.23863636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Filipino,Malay": 0.26704545454545453, + "Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, + "Chinese,Filipino,Spanish,Malay": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2840909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Filipino,Malay": 0.1875, + "English,Vietnamese,Chinese,Spanish,Malay": 0.2159090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.1875, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Indonesian,Filipino,Malay": 0.20454545454545456, + "English,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Chinese,Filipino,Spanish,Malay": 0.20454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.16477272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + } + }, + "AC3_2": 0.4757969302931948, + "AC3_3": 0.38974483068891297, + "AC3_4": 0.3238132185027708, + "AC3_5": 0.2720954110612814, + "AC3_6": 0.23108176735485497, + "AC3_7": 0.19957282068209034 + }, + "prompt_3": { + "overall_acc": 0.4155844155844156, + "language_acc": { + "English": 0.4943181818181818, + "Vietnamese": 0.38636363636363635, + "Chinese": 0.4431818181818182, + "Indonesian": 0.42613636363636365, + "Filipino": 0.32954545454545453, + "Spanish": 0.45454545454545453, + "Malay": 0.375 + }, + "consistency_score_2": 0.5465367965367965, + "consistency_score_3": 0.36769480519480524, + "consistency_score_4": 0.272077922077922, + "consistency_score_5": 0.21266233766233766, + "consistency_score_6": 0.17207792207792208, + "consistency_score_7": 0.14204545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5795454545454546, + "English,Chinese": 0.6079545454545454, + "English,Indonesian": 0.5454545454545454, + "English,Filipino": 0.4602272727272727, + "English,Spanish": 0.7159090909090909, + "English,Malay": 0.5227272727272727, + "Vietnamese,Chinese": 0.5454545454545454, + "Vietnamese,Indonesian": 0.5681818181818182, + "Vietnamese,Filipino": 0.44886363636363635, + "Vietnamese,Spanish": 0.5965909090909091, + "Vietnamese,Malay": 0.5511363636363636, + "Chinese,Indonesian": 0.5056818181818182, + "Chinese,Filipino": 0.4147727272727273, + "Chinese,Spanish": 0.5909090909090909, + "Chinese,Malay": 0.5170454545454546, + "Indonesian,Filipino": 0.5454545454545454, + "Indonesian,Spanish": 0.5965909090909091, + "Indonesian,Malay": 0.625, + "Filipino,Spanish": 0.48863636363636365, + "Filipino,Malay": 0.5227272727272727, + "Spanish,Malay": 0.5284090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.4034090909090909, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Filipino": 0.29545454545454547, + "English,Vietnamese,Spanish": 0.4715909090909091, + "English,Vietnamese,Malay": 0.375, + "English,Chinese,Indonesian": 0.375, + "English,Chinese,Filipino": 0.30113636363636365, + "English,Chinese,Spanish": 0.48295454545454547, + "English,Chinese,Malay": 0.375, + "English,Indonesian,Filipino": 0.32954545454545453, + "English,Indonesian,Spanish": 0.45454545454545453, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Filipino,Spanish": 0.3693181818181818, + "English,Filipino,Malay": 0.3181818181818182, + "English,Spanish,Malay": 0.42613636363636365, + "Vietnamese,Chinese,Indonesian": 0.35795454545454547, + "Vietnamese,Chinese,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Spanish": 0.4090909090909091, + "Vietnamese,Chinese,Malay": 0.36363636363636365, + "Vietnamese,Indonesian,Filipino": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish": 0.4090909090909091, + "Vietnamese,Indonesian,Malay": 0.3977272727272727, + "Vietnamese,Filipino,Spanish": 0.3181818181818182, + "Vietnamese,Filipino,Malay": 0.3352272727272727, + "Vietnamese,Spanish,Malay": 0.3693181818181818, + "Chinese,Indonesian,Filipino": 0.30113636363636365, + "Chinese,Indonesian,Spanish": 0.3806818181818182, + "Chinese,Indonesian,Malay": 0.3693181818181818, + "Chinese,Filipino,Spanish": 0.3068181818181818, + "Chinese,Filipino,Malay": 0.32386363636363635, + "Chinese,Spanish,Malay": 0.375, + "Indonesian,Filipino,Spanish": 0.36363636363636365, + "Indonesian,Filipino,Malay": 0.39204545454545453, + "Indonesian,Spanish,Malay": 0.4034090909090909, + "Filipino,Spanish,Malay": 0.3409090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.29545454545454547, + "English,Vietnamese,Chinese,Filipino": 0.22727272727272727, + "English,Vietnamese,Chinese,Spanish": 0.3409090909090909, + "English,Vietnamese,Chinese,Malay": 0.29545454545454547, + "English,Vietnamese,Indonesian,Filipino": 0.22727272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "English,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.2556818181818182, + "English,Vietnamese,Filipino,Malay": 0.23863636363636365, + "English,Vietnamese,Spanish,Malay": 0.3125, + "English,Chinese,Indonesian,Filipino": 0.23295454545454544, + "English,Chinese,Indonesian,Spanish": 0.32954545454545453, + "English,Chinese,Indonesian,Malay": 0.2840909090909091, + "English,Chinese,Filipino,Spanish": 0.26136363636363635, + "English,Chinese,Filipino,Malay": 0.25, + "English,Chinese,Spanish,Malay": 0.32386363636363635, + "English,Indonesian,Filipino,Spanish": 0.2727272727272727, + "English,Indonesian,Filipino,Malay": 0.26136363636363635, + "English,Indonesian,Spanish,Malay": 0.3352272727272727, + "English,Filipino,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Filipino,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.25, + "Chinese,Indonesian,Filipino,Spanish": 0.23863636363636365, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.2840909090909091, + "Chinese,Filipino,Spanish,Malay": 0.2556818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.2840909090909091 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.18181818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.19886363636363635, + "English,Vietnamese,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.23863636363636365, + "English,Vietnamese,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, + "English,Chinese,Filipino,Spanish,Malay": 0.2215909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.20454545454545456 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456 + } + }, + "AC3_2": 0.4721487735620171, + "AC3_3": 0.3901756274315139, + "AC3_4": 0.328857167336257, + "AC3_5": 0.2813517231674047, + "AC3_6": 0.2433809284223863, + "AC3_7": 0.2117242291535095 + }, + "prompt_4": { + "overall_acc": 0.4269480519480519, + "language_acc": { + "English": 0.5056818181818182, + "Vietnamese": 0.4090909090909091, + "Chinese": 0.4602272727272727, + "Indonesian": 0.42045454545454547, + "Filipino": 0.32954545454545453, + "Spanish": 0.4943181818181818, + "Malay": 0.3693181818181818 + }, + "consistency_score_2": 0.568181818181818, + "consistency_score_3": 0.3933441558441559, + "consistency_score_4": 0.29512987012987013, + "consistency_score_5": 0.23079004329004324, + "consistency_score_6": 0.18425324675324672, + "consistency_score_7": 0.14772727272727273, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6136363636363636, + "English,Chinese": 0.625, + "English,Indonesian": 0.5965909090909091, + "English,Filipino": 0.48295454545454547, + "English,Spanish": 0.75, + "English,Malay": 0.5397727272727273, + "Vietnamese,Chinese": 0.5340909090909091, + "Vietnamese,Indonesian": 0.5909090909090909, + "Vietnamese,Filipino": 0.48863636363636365, + "Vietnamese,Spanish": 0.6477272727272727, + "Vietnamese,Malay": 0.5511363636363636, + "Chinese,Indonesian": 0.5227272727272727, + "Chinese,Filipino": 0.4772727272727273, + "Chinese,Spanish": 0.6363636363636364, + "Chinese,Malay": 0.4943181818181818, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Spanish": 0.6590909090909091, + "Indonesian,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.5397727272727273, + "Filipino,Malay": 0.5113636363636364, + "Spanish,Malay": 0.5340909090909091 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.4318181818181818, + "English,Vietnamese,Indonesian": 0.42613636363636365, + "English,Vietnamese,Filipino": 0.3352272727272727, + "English,Vietnamese,Spanish": 0.5227272727272727, + "English,Vietnamese,Malay": 0.3977272727272727, + "English,Chinese,Indonesian": 0.42045454545454547, + "English,Chinese,Filipino": 0.3352272727272727, + "English,Chinese,Spanish": 0.5227272727272727, + "English,Chinese,Malay": 0.36363636363636365, + "English,Indonesian,Filipino": 0.3522727272727273, + "English,Indonesian,Spanish": 0.5227272727272727, + "English,Indonesian,Malay": 0.4090909090909091, + "English,Filipino,Spanish": 0.42045454545454547, + "English,Filipino,Malay": 0.32954545454545453, + "English,Spanish,Malay": 0.4375, + "Vietnamese,Chinese,Indonesian": 0.36363636363636365, + "Vietnamese,Chinese,Filipino": 0.29545454545454547, + "Vietnamese,Chinese,Spanish": 0.44886363636363635, + "Vietnamese,Chinese,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish": 0.4772727272727273, + "Vietnamese,Indonesian,Malay": 0.3977272727272727, + "Vietnamese,Filipino,Spanish": 0.3806818181818182, + "Vietnamese,Filipino,Malay": 0.32954545454545453, + "Vietnamese,Spanish,Malay": 0.4034090909090909, + "Chinese,Indonesian,Filipino": 0.32954545454545453, + "Chinese,Indonesian,Spanish": 0.4375, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Filipino,Spanish": 0.3693181818181818, + "Chinese,Filipino,Malay": 0.32386363636363635, + "Chinese,Spanish,Malay": 0.38636363636363635, + "Indonesian,Filipino,Spanish": 0.4090909090909091, + "Indonesian,Filipino,Malay": 0.3806818181818182, + "Indonesian,Spanish,Malay": 0.42045454545454547, + "Filipino,Spanish,Malay": 0.3522727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.32954545454545453, + "English,Vietnamese,Chinese,Filipino": 0.23863636363636365, + "English,Vietnamese,Chinese,Spanish": 0.39204545454545453, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.25, + "English,Vietnamese,Indonesian,Spanish": 0.38636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.3068181818181818, + "English,Vietnamese,Filipino,Spanish": 0.3068181818181818, + "English,Vietnamese,Filipino,Malay": 0.2556818181818182, + "English,Vietnamese,Spanish,Malay": 0.3409090909090909, + "English,Chinese,Indonesian,Filipino": 0.2556818181818182, + "English,Chinese,Indonesian,Spanish": 0.3806818181818182, + "English,Chinese,Indonesian,Malay": 0.29545454545454547, + "English,Chinese,Filipino,Spanish": 0.30113636363636365, + "English,Chinese,Filipino,Malay": 0.2556818181818182, + "English,Chinese,Spanish,Malay": 0.3181818181818182, + "English,Indonesian,Filipino,Spanish": 0.3068181818181818, + "English,Indonesian,Filipino,Malay": 0.2840909090909091, + "English,Indonesian,Spanish,Malay": 0.35795454545454547, + "English,Filipino,Spanish,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Filipino,Spanish": 0.26136363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.32386363636363635, + "Vietnamese,Filipino,Spanish,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino,Spanish": 0.2840909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.2556818181818182, + "Chinese,Indonesian,Spanish,Malay": 0.29545454545454547, + "Chinese,Filipino,Spanish,Malay": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.26704545454545453, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Chinese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.22727272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.2215909090909091 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + } + }, + "AC3_2": 0.48754263675954923, + "AC3_3": 0.4094577992258131, + "AC3_4": 0.34900699565380583, + "AC3_5": 0.29961883032194053, + "AC3_6": 0.2574162225135483, + "AC3_7": 0.21950436565261014 + }, + "prompt_5": { + "overall_acc": 0.4261363636363636, + "language_acc": { + "English": 0.5170454545454546, + "Vietnamese": 0.42613636363636365, + "Chinese": 0.4715909090909091, + "Indonesian": 0.42613636363636365, + "Filipino": 0.3352272727272727, + "Spanish": 0.4659090909090909, + "Malay": 0.3409090909090909 + }, + "consistency_score_2": 0.5627705627705626, + "consistency_score_3": 0.38538961038961034, + "consistency_score_4": 0.2892857142857143, + "consistency_score_5": 0.22970779220779214, + "consistency_score_6": 0.18912337662337664, + "consistency_score_7": 0.1590909090909091, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6420454545454546, + "English,Chinese": 0.6193181818181818, + "English,Indonesian": 0.5795454545454546, + "English,Filipino": 0.48863636363636365, + "English,Spanish": 0.7386363636363636, + "English,Malay": 0.5284090909090909, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.5454545454545454, + "Vietnamese,Filipino": 0.45454545454545453, + "Vietnamese,Spanish": 0.625, + "Vietnamese,Malay": 0.4943181818181818, + "Chinese,Indonesian": 0.5568181818181818, + "Chinese,Filipino": 0.48295454545454547, + "Chinese,Spanish": 0.6477272727272727, + "Chinese,Malay": 0.5738636363636364, + "Indonesian,Filipino": 0.5568181818181818, + "Indonesian,Spanish": 0.6136363636363636, + "Indonesian,Malay": 0.5397727272727273, + "Filipino,Spanish": 0.5454545454545454, + "Filipino,Malay": 0.5397727272727273, + "Spanish,Malay": 0.5056818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.44886363636363635, + "English,Vietnamese,Indonesian": 0.42613636363636365, + "English,Vietnamese,Filipino": 0.3352272727272727, + "English,Vietnamese,Spanish": 0.5284090909090909, + "English,Vietnamese,Malay": 0.3693181818181818, + "English,Chinese,Indonesian": 0.42613636363636365, + "English,Chinese,Filipino": 0.3409090909090909, + "English,Chinese,Spanish": 0.5284090909090909, + "English,Chinese,Malay": 0.38636363636363635, + "English,Indonesian,Filipino": 0.3522727272727273, + "English,Indonesian,Spanish": 0.48295454545454547, + "English,Indonesian,Malay": 0.375, + "English,Filipino,Spanish": 0.4147727272727273, + "English,Filipino,Malay": 0.3409090909090909, + "English,Spanish,Malay": 0.4147727272727273, + "Vietnamese,Chinese,Indonesian": 0.36363636363636365, + "Vietnamese,Chinese,Filipino": 0.2784090909090909, + "Vietnamese,Chinese,Spanish": 0.4375, + "Vietnamese,Chinese,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish": 0.42613636363636365, + "Vietnamese,Indonesian,Malay": 0.32954545454545453, + "Vietnamese,Filipino,Spanish": 0.35795454545454547, + "Vietnamese,Filipino,Malay": 0.30113636363636365, + "Vietnamese,Spanish,Malay": 0.3465909090909091, + "Chinese,Indonesian,Filipino": 0.3465909090909091, + "Chinese,Indonesian,Spanish": 0.4375, + "Chinese,Indonesian,Malay": 0.3806818181818182, + "Chinese,Filipino,Spanish": 0.375, + "Chinese,Filipino,Malay": 0.35795454545454547, + "Chinese,Spanish,Malay": 0.4090909090909091, + "Indonesian,Filipino,Spanish": 0.4034090909090909, + "Indonesian,Filipino,Malay": 0.3806818181818182, + "Indonesian,Spanish,Malay": 0.3693181818181818, + "Filipino,Spanish,Malay": 0.35795454545454547 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.3352272727272727, + "English,Vietnamese,Chinese,Filipino": 0.23863636363636365, + "English,Vietnamese,Chinese,Spanish": 0.38636363636363635, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.25, + "English,Vietnamese,Indonesian,Spanish": 0.375, + "English,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "English,Vietnamese,Filipino,Spanish": 0.30113636363636365, + "English,Vietnamese,Filipino,Malay": 0.25, + "English,Vietnamese,Spanish,Malay": 0.30113636363636365, + "English,Chinese,Indonesian,Filipino": 0.26136363636363635, + "English,Chinese,Indonesian,Spanish": 0.375, + "English,Chinese,Indonesian,Malay": 0.30113636363636365, + "English,Chinese,Filipino,Spanish": 0.3125, + "English,Chinese,Filipino,Malay": 0.2727272727272727, + "English,Chinese,Spanish,Malay": 0.3409090909090909, + "English,Indonesian,Filipino,Spanish": 0.3125, + "English,Indonesian,Filipino,Malay": 0.2840909090909091, + "English,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Filipino,Spanish,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.26136363636363635, + "Vietnamese,Chinese,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,Filipino,Malay": 0.25, + "Vietnamese,Indonesian,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Filipino,Spanish,Malay": 0.2556818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.29545454545454547, + "Chinese,Indonesian,Filipino,Malay": 0.2784090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.3068181818181818, + "Chinese,Filipino,Spanish,Malay": 0.2897727272727273, + "Indonesian,Filipino,Spanish,Malay": 0.2897727272727273 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Chinese,Filipino,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Spanish,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.22727272727272727, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.2159090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.24431818181818182, + "English,Vietnamese,Filipino,Spanish,Malay": 0.22727272727272727, + "English,Chinese,Indonesian,Filipino,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Chinese,Indonesian,Spanish,Malay": 0.2727272727272727, + "English,Chinese,Filipino,Spanish,Malay": 0.25, + "English,Indonesian,Filipino,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.23863636363636365 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.1875, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091 + } + }, + "AC3_2": 0.48501430165472265, + "AC3_3": 0.40474003886700033, + "AC3_4": 0.3446221919191542, + "AC3_5": 0.29850641309579923, + "AC3_6": 0.26197829211381235, + "AC3_7": 0.23168578989862806 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5631067961165048 + }, + "prompt_2": { + "accuracy": 0.5242718446601942 + }, + "prompt_3": { + "accuracy": 0.5436893203883495 + }, + "prompt_4": { + "accuracy": 0.5728155339805825 + }, + "prompt_5": { + "accuracy": 0.5728155339805825 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.3238095238095238 + }, + "prompt_3": { + "accuracy": 0.3142857142857143 + }, + "prompt_4": { + "accuracy": 0.3333333333333333 + }, + "prompt_5": { + "accuracy": 0.3523809523809524 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5981308411214953 + }, + "prompt_2": { + "accuracy": 0.6074766355140186 + }, + "prompt_3": { + "accuracy": 0.5981308411214953 + }, + "prompt_4": { + "accuracy": 0.6448598130841121 + }, + "prompt_5": { + "accuracy": 0.6448598130841121 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.52, + "category_acc": { + "brand": 0.6, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.8, + "culture": 0.7, + "film": 0.5, + "law": 0.5, + "geography": 0.8 + } + }, + "prompt_2": { + "accuracy": 0.53, + "category_acc": { + "brand": 0.6, + "demographics": 0.2, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.9, + "culture": 0.7, + "film": 0.6, + "law": 0.5, + "geography": 0.8 + } + }, + "prompt_3": { + "accuracy": 0.53, + "category_acc": { + "brand": 0.6, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.7, + "film": 0.6, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_4": { + "accuracy": 0.52, + "category_acc": { + "brand": 0.6, + "demographics": 0.4, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.5, + "politics": 0.7, + "culture": 0.7, + "film": 0.5, + "law": 0.5, + "geography": 0.8 + } + }, + "prompt_5": { + "accuracy": 0.55, + "category_acc": { + "brand": 0.6, + "demographics": 0.6, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.5, + "politics": 0.8, + "culture": 0.7, + "film": 0.6, + "law": 0.5, + "geography": 0.8 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2821064910608106 + }, + "prompt_2": { + "bleu_score": 0.2756306866130451 + }, + "prompt_3": { + "bleu_score": 0.2815262175028489 + }, + "prompt_4": { + "bleu_score": 0.2816407927506464 + }, + "prompt_5": { + "bleu_score": 0.2463115071916279 + } }, "indommlu": { "prompt_1": -1, @@ -8848,179 +77647,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.34141331543740916 + }, + "prompt_2": { + "bleu_score": 0.3449912984700076 + }, + "prompt_3": { + "bleu_score": 0.3445615848807213 + }, + "prompt_4": { + "bleu_score": 0.340986680964171 + }, + "prompt_5": { + "bleu_score": 0.32412330377534115 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2772482581672767 + }, + "prompt_2": { + "bleu_score": 0.28219932675261594 + }, + "prompt_3": { + "bleu_score": 0.2812695182267722 + }, + "prompt_4": { + "bleu_score": 0.27492909142296673 + }, + "prompt_5": { + "bleu_score": 0.2636925084347246 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21730914527444836 + }, + "prompt_2": { + "bleu_score": 0.21986784265716008 + }, + "prompt_3": { + "bleu_score": 0.22271539895507222 + }, + "prompt_4": { + "bleu_score": 0.21887672629086805 + }, + "prompt_5": { + "bleu_score": 0.2105616703617541 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.3256610646918388 + }, + "prompt_2": { + "bleu_score": 0.3320879523973804 + }, + "prompt_3": { + "bleu_score": 0.32982324910563166 + }, + "prompt_4": { + "bleu_score": 0.32845712754631945 + }, + "prompt_5": { + "bleu_score": 0.3050090244182611 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5705950991831972 + }, + "prompt_2": { + "accuracy": 0.5530921820303384 + }, + "prompt_3": { + "accuracy": 0.5659276546091015 + }, + "prompt_4": { + "accuracy": 0.5659276546091015 + }, + "prompt_5": { + "accuracy": 0.5577596266044341 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5410797282803004, + "category_acc": { + "high_school_european_history": 0.7073170731707317, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5795454545454546, + "medical_genetics": 0.5656565656565656, + "high_school_us_history": 0.7487684729064039, + "high_school_physics": 0.32, + "high_school_world_history": 0.7457627118644068, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.5527426160337553, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.6504854368932039, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.4199288256227758, + "philosophy": 0.6064516129032258, + "professional_medicine": 0.5498154981549815, + "nutrition": 0.5967213114754099, + "global_facts": 0.41414141414141414, + "machine_learning": 0.36936936936936937, + "security_studies": 0.5860655737704918, + "public_relations": 0.6972477064220184, + "professional_psychology": 0.5417348608837971, + "prehistory": 0.6160990712074303, + "anatomy": 0.5074626865671642, + "human_sexuality": 0.6307692307692307, + "college_medicine": 0.5348837209302325, + "high_school_government_and_politics": 0.7760416666666666, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.6851851851851852, + "high_school_geography": 0.7055837563451777, + "elementary_mathematics": 0.33156498673740054, + "human_aging": 0.6216216216216216, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.7628676470588235, + "formal_logic": 0.36, + "high_school_statistics": 0.4232558139534884, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.2899628252788104, + "high_school_computer_science": 0.5353535353535354, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.7710997442455243, + "high_school_chemistry": 0.44554455445544555, + "marketing": 0.8326180257510729, + "professional_law": 0.42139595564253096, + "management": 0.7156862745098039, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.7352941176470589, + "sociology": 0.775, + "us_foreign_policy": 0.7676767676767676, + "high_school_macroeconomics": 0.5475578406169666, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.6144927536231884, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.5496688741721855, + "college_biology": 0.6013986013986014 + } + }, + "prompt_2": { + "accuracy": 0.5402216660707901, + "category_acc": { + "high_school_european_history": 0.6951219512195121, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.5833333333333334, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.7372881355932204, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.5822784810126582, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.6537216828478964, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.39501779359430605, + "philosophy": 0.6096774193548387, + "professional_medicine": 0.5535055350553506, + "nutrition": 0.5868852459016394, + "global_facts": 0.42424242424242425, + "machine_learning": 0.36036036036036034, + "security_studies": 0.5778688524590164, + "public_relations": 0.6880733944954128, + "professional_psychology": 0.5548281505728314, + "prehistory": 0.6068111455108359, + "anatomy": 0.5223880597014925, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.563953488372093, + "high_school_government_and_politics": 0.7864583333333334, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.6851851851851852, + "high_school_geography": 0.700507614213198, + "elementary_mathematics": 0.3050397877984085, + "human_aging": 0.6036036036036037, + "college_mathematics": 0.42424242424242425, + "high_school_psychology": 0.7628676470588235, + "formal_logic": 0.32, + "high_school_statistics": 0.4418604651162791, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.3048327137546468, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.405982905982906, + "miscellaneous": 0.7647058823529411, + "high_school_chemistry": 0.43564356435643564, + "marketing": 0.8369098712446352, + "professional_law": 0.4155251141552511, + "management": 0.7254901960784313, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.7588235294117647, + "sociology": 0.77, + "us_foreign_policy": 0.7878787878787878, + "high_school_macroeconomics": 0.5449871465295629, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.6115942028985507, + "electrical_engineering": 0.4930555555555556, + "astronomy": 0.5629139072847682, + "college_biology": 0.5874125874125874 + } + }, + "prompt_3": { + "accuracy": 0.5406506971755453, + "category_acc": { + "high_school_european_history": 0.7195121951219512, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.5833333333333334, + "medical_genetics": 0.5757575757575758, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.7372881355932204, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.5654008438818565, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.6537216828478964, + "abstract_algebra": 0.35353535353535354, + "professional_accounting": 0.4092526690391459, + "philosophy": 0.6129032258064516, + "professional_medicine": 0.5571955719557196, + "nutrition": 0.5836065573770491, + "global_facts": 0.42424242424242425, + "machine_learning": 0.3063063063063063, + "security_studies": 0.6065573770491803, + "public_relations": 0.6880733944954128, + "professional_psychology": 0.5450081833060556, + "prehistory": 0.6006191950464397, + "anatomy": 0.5298507462686567, + "human_sexuality": 0.6538461538461539, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.8020833333333334, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.7037037037037037, + "high_school_geography": 0.7157360406091371, + "elementary_mathematics": 0.3129973474801061, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.7426470588235294, + "formal_logic": 0.312, + "high_school_statistics": 0.4325581395348837, + "international_law": 0.7166666666666667, + "high_school_mathematics": 0.3048327137546468, + "high_school_computer_science": 0.5757575757575758, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.7672634271099744, + "high_school_chemistry": 0.400990099009901, + "marketing": 0.8326180257510729, + "professional_law": 0.41748206131767773, + "management": 0.7156862745098039, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.7470588235294118, + "sociology": 0.76, + "us_foreign_policy": 0.7878787878787878, + "high_school_macroeconomics": 0.5526992287917738, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.24049217002237136, + "moral_disputes": 0.6318840579710145, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.5562913907284768, + "college_biology": 0.5734265734265734 + } + }, + "prompt_4": { + "accuracy": 0.5447979978548445, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5946969696969697, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.34, + "high_school_world_history": 0.7330508474576272, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.5611814345991561, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.6763754045307443, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.40569395017793597, + "philosophy": 0.6225806451612903, + "professional_medicine": 0.5498154981549815, + "nutrition": 0.5967213114754099, + "global_facts": 0.40404040404040403, + "machine_learning": 0.34234234234234234, + "security_studies": 0.6229508196721312, + "public_relations": 0.6697247706422018, + "professional_psychology": 0.5499181669394435, + "prehistory": 0.5944272445820433, + "anatomy": 0.5223880597014925, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5348837209302325, + "high_school_government_and_politics": 0.7604166666666666, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.700507614213198, + "elementary_mathematics": 0.3421750663129973, + "human_aging": 0.6126126126126126, + "college_mathematics": 0.41414141414141414, + "high_school_psychology": 0.7738970588235294, + "formal_logic": 0.36, + "high_school_statistics": 0.4, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.30855018587360594, + "high_school_computer_science": 0.5555555555555556, + "conceptual_physics": 0.4188034188034188, + "miscellaneous": 0.7634271099744245, + "high_school_chemistry": 0.4801980198019802, + "marketing": 0.8326180257510729, + "professional_law": 0.4318330071754729, + "management": 0.7156862745098039, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.7588235294117647, + "sociology": 0.755, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5501285347043702, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.6202898550724638, + "electrical_engineering": 0.5, + "astronomy": 0.5827814569536424, + "college_biology": 0.5804195804195804 + } + }, + "prompt_5": { + "accuracy": 0.5400786557025384, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.5681818181818182, + "medical_genetics": 0.5353535353535354, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.32, + "high_school_world_history": 0.7457627118644068, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.5569620253164557, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.6537216828478964, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.40213523131672596, + "philosophy": 0.6161290322580645, + "professional_medicine": 0.5461254612546126, + "nutrition": 0.5868852459016394, + "global_facts": 0.3939393939393939, + "machine_learning": 0.34234234234234234, + "security_studies": 0.6024590163934426, + "public_relations": 0.6880733944954128, + "professional_psychology": 0.5319148936170213, + "prehistory": 0.5789473684210527, + "anatomy": 0.5223880597014925, + "human_sexuality": 0.6538461538461539, + "college_medicine": 0.5174418604651163, + "high_school_government_and_politics": 0.7395833333333334, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.6954314720812182, + "elementary_mathematics": 0.32360742705570295, + "human_aging": 0.6216216216216216, + "college_mathematics": 0.43434343434343436, + "high_school_psychology": 0.7702205882352942, + "formal_logic": 0.36, + "high_school_statistics": 0.413953488372093, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.31970260223048325, + "high_school_computer_science": 0.5151515151515151, + "conceptual_physics": 0.39316239316239315, + "miscellaneous": 0.768542199488491, + "high_school_chemistry": 0.47029702970297027, + "marketing": 0.8197424892703863, + "professional_law": 0.4292237442922374, + "management": 0.7254901960784313, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.7470588235294118, + "sociology": 0.745, + "us_foreign_policy": 0.7777777777777778, + "high_school_macroeconomics": 0.5347043701799485, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.6144927536231884, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.5960264900662252, + "college_biology": 0.5944055944055944 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40193164933135217 + }, + "prompt_2": { + "accuracy": 0.4078751857355126 + }, + "prompt_3": { + "accuracy": 0.4145616641901932 + }, + "prompt_4": { + "accuracy": 0.40193164933135217 + }, + "prompt_5": { + "accuracy": 0.3974739970282318 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40473225404732255, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.35714285714285715, + "college_physics": 0.375, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.375, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.38333333333333336, + "business_administration": 0.39473684210526316, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.25, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.48148148148148145, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.5, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.44, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.5192307692307693, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.5, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.3888888888888889, + "fire_engineer": 0.5277777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3333333333333333, + "physician": 0.3888888888888889 + } + }, + "prompt_2": { + "accuracy": 0.40099626400996263, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.30952380952380953, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.3333333333333333, + "business_administration": 0.5, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.673469387755102, + "high_school_politics": 0.625, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.5185185185185185, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.46153846153846156, + "sports_science": 0.3333333333333333, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.37037037037037035, + "fire_engineer": 0.5277777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3333333333333333, + "physician": 0.4444444444444444 + } + }, + "prompt_3": { + "accuracy": 0.40846824408468246, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.35714285714285715, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.35, + "business_administration": 0.47368421052631576, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.5555555555555556, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.5294117647058824, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.375, + "high_school_history": 0.48, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.4807692307692308, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.3888888888888889, + "fire_engineer": 0.5277777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.35185185185185186, + "physician": 0.4444444444444444 + } + }, + "prompt_4": { + "accuracy": 0.3972602739726027, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.38095238095238093, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.375, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.42105263157894735, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.5925925925925926, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.5, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.48, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.5192307692307693, + "sports_science": 0.3333333333333333, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.3888888888888889, + "fire_engineer": 0.5555555555555556, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3333333333333333, + "physician": 0.37037037037037035 + } + }, + "prompt_5": { + "accuracy": 0.40971357409713577, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.38095238095238093, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.36666666666666664, + "business_administration": 0.39473684210526316, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.25, + "middle_school_politics": 0.5, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.48148148148148145, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.5, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.52, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.46153846153846156, + "sports_science": 0.3333333333333333, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.37037037037037035, + "fire_engineer": 0.6111111111111112, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.3148148148148148, + "physician": 0.46296296296296297 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37992831541218636 + }, + "prompt_2": { + "accuracy": 0.36917562724014336 + }, + "prompt_3": { + "accuracy": 0.3763440860215054 + }, + "prompt_4": { + "accuracy": 0.3978494623655914 + }, + "prompt_5": { + "accuracy": 0.3763440860215054 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40148506302883785, + "category_acc": { + "agronomy": 0.35502958579881655, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.27439024390243905, + "arts": 0.44375, + "astronomy": 0.2787878787878788, + "business_ethics": 0.46411483253588515, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.5877862595419847, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.48297213622291024, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.49162011173184356, + "clinical_knowledge": 0.3333333333333333, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.2962962962962963, + "college_mathematics": 0.17142857142857143, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.31868131868131866, + "computer_science": 0.4068627450980392, + "computer_security": 0.43859649122807015, + "conceptual_physics": 0.3469387755102041, + "construction_project_management": 0.3381294964028777, + "economics": 0.44654088050314467, + "education": 0.44171779141104295, + "electrical_engineering": 0.38953488372093026, + "elementary_chinese": 0.29365079365079366, + "elementary_commonsense": 0.35353535353535354, + "elementary_information_and_technology": 0.634453781512605, + "elementary_mathematics": 0.3217391304347826, + "ethnology": 0.4, + "food_science": 0.46153846153846156, + "genetics": 0.375, + "global_facts": 0.40268456375838924, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.3813559322033898, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.46853146853146854, + "human_sexuality": 0.42857142857142855, + "international_law": 0.41081081081081083, + "journalism": 0.48255813953488375, + "jurisprudence": 0.41605839416058393, + "legal_and_moral_basis": 0.7149532710280374, + "logical": 0.4065040650406504, + "machine_learning": 0.32786885245901637, + "management": 0.43333333333333335, + "marketing": 0.49444444444444446, + "marxist_theory": 0.5026455026455027, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.4068965517241379, + "philosophy": 0.4666666666666667, + "professional_accounting": 0.4228571428571429, + "professional_law": 0.35545023696682465, + "professional_medicine": 0.2978723404255319, + "professional_psychology": 0.4353448275862069, + "public_relations": 0.4942528735632184, + "security_study": 0.43703703703703706, + "sociology": 0.45132743362831856, + "sports_science": 0.4666666666666667, + "traditional_chinese_medicine": 0.34594594594594597, + "virology": 0.46745562130177515, + "world_history": 0.4658385093167702, + "world_religions": 0.4375 + } + }, + "prompt_2": { + "accuracy": 0.40390260749438783, + "category_acc": { + "agronomy": 0.39644970414201186, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.27439024390243905, + "arts": 0.46875, + "astronomy": 0.24848484848484848, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.35, + "chinese_driving_rule": 0.5801526717557252, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.46439628482972134, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.4748603351955307, + "clinical_knowledge": 0.3670886075949367, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.48598130841121495, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.3148148148148148, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.32075471698113206, + "college_medicine": 0.326007326007326, + "computer_science": 0.4068627450980392, + "computer_security": 0.4444444444444444, + "conceptual_physics": 0.3469387755102041, + "construction_project_management": 0.3597122302158273, + "economics": 0.41509433962264153, + "education": 0.4539877300613497, + "electrical_engineering": 0.3953488372093023, + "elementary_chinese": 0.2976190476190476, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.6218487394957983, + "elementary_mathematics": 0.32608695652173914, + "ethnology": 0.4148148148148148, + "food_science": 0.46853146853146854, + "genetics": 0.3806818181818182, + "global_facts": 0.3959731543624161, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.4406779661016949, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.46853146853146854, + "human_sexuality": 0.3968253968253968, + "international_law": 0.43243243243243246, + "journalism": 0.46511627906976744, + "jurisprudence": 0.41362530413625304, + "legal_and_moral_basis": 0.7149532710280374, + "logical": 0.4065040650406504, + "machine_learning": 0.30327868852459017, + "management": 0.49047619047619045, + "marketing": 0.49444444444444446, + "marxist_theory": 0.48677248677248675, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.4, + "philosophy": 0.49523809523809526, + "professional_accounting": 0.4, + "professional_law": 0.38388625592417064, + "professional_medicine": 0.2978723404255319, + "professional_psychology": 0.41810344827586204, + "public_relations": 0.4942528735632184, + "security_study": 0.42962962962962964, + "sociology": 0.45132743362831856, + "sports_science": 0.44242424242424244, + "traditional_chinese_medicine": 0.32432432432432434, + "virology": 0.46153846153846156, + "world_history": 0.4906832298136646, + "world_religions": 0.44375 + } + }, + "prompt_3": { + "accuracy": 0.39768606458297356, + "category_acc": { + "agronomy": 0.3668639053254438, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2865853658536585, + "arts": 0.45625, + "astronomy": 0.2787878787878788, + "business_ethics": 0.45933014354066987, + "chinese_civil_service_exam": 0.36875, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.44891640866873067, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.48044692737430167, + "clinical_knowledge": 0.35443037974683544, + "college_actuarial_science": 0.3018867924528302, + "college_education": 0.48598130841121495, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.32407407407407407, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.32234432234432236, + "computer_science": 0.4117647058823529, + "computer_security": 0.4619883040935672, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3597122302158273, + "economics": 0.4276729559748428, + "education": 0.44785276073619634, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.30158730158730157, + "elementary_commonsense": 0.31313131313131315, + "elementary_information_and_technology": 0.6428571428571429, + "elementary_mathematics": 0.34347826086956523, + "ethnology": 0.4148148148148148, + "food_science": 0.4405594405594406, + "genetics": 0.4090909090909091, + "global_facts": 0.3959731543624161, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.3968253968253968, + "international_law": 0.41621621621621624, + "journalism": 0.47093023255813954, + "jurisprudence": 0.39172749391727496, + "legal_and_moral_basis": 0.7102803738317757, + "logical": 0.3902439024390244, + "machine_learning": 0.3524590163934426, + "management": 0.4380952380952381, + "marketing": 0.5, + "marxist_theory": 0.4708994708994709, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.38620689655172413, + "philosophy": 0.42857142857142855, + "professional_accounting": 0.38857142857142857, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.2978723404255319, + "professional_psychology": 0.4267241379310345, + "public_relations": 0.4942528735632184, + "security_study": 0.42962962962962964, + "sociology": 0.4247787610619469, + "sports_science": 0.47878787878787876, + "traditional_chinese_medicine": 0.32432432432432434, + "virology": 0.4378698224852071, + "world_history": 0.4720496894409938, + "world_religions": 0.45 + } + }, + "prompt_4": { + "accuracy": 0.3967363149715075, + "category_acc": { + "agronomy": 0.3668639053254438, + "anatomy": 0.25, + "ancient_chinese": 0.2682926829268293, + "arts": 0.41875, + "astronomy": 0.28484848484848485, + "business_ethics": 0.44976076555023925, + "chinese_civil_service_exam": 0.375, + "chinese_driving_rule": 0.5343511450381679, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.5233644859813084, + "chinese_history": 0.47058823529411764, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.4581005586592179, + "clinical_knowledge": 0.3037974683544304, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.4672897196261682, + "college_engineering_hydrology": 0.42452830188679247, + "college_law": 0.3055555555555556, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.326007326007326, + "computer_science": 0.4215686274509804, + "computer_security": 0.4327485380116959, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.3237410071942446, + "economics": 0.44654088050314467, + "education": 0.44785276073619634, + "electrical_engineering": 0.3953488372093023, + "elementary_chinese": 0.27380952380952384, + "elementary_commonsense": 0.3282828282828283, + "elementary_information_and_technology": 0.634453781512605, + "elementary_mathematics": 0.30434782608695654, + "ethnology": 0.37037037037037035, + "food_science": 0.4405594405594406, + "genetics": 0.375, + "global_facts": 0.38926174496644295, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.3983050847457627, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.4126984126984127, + "international_law": 0.41081081081081083, + "journalism": 0.4941860465116279, + "jurisprudence": 0.41605839416058393, + "legal_and_moral_basis": 0.7009345794392523, + "logical": 0.3983739837398374, + "machine_learning": 0.3442622950819672, + "management": 0.44285714285714284, + "marketing": 0.4777777777777778, + "marxist_theory": 0.4973544973544973, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.42758620689655175, + "philosophy": 0.4666666666666667, + "professional_accounting": 0.38857142857142857, + "professional_law": 0.3886255924170616, + "professional_medicine": 0.29521276595744683, + "professional_psychology": 0.41379310344827586, + "public_relations": 0.4827586206896552, + "security_study": 0.4222222222222222, + "sociology": 0.4247787610619469, + "sports_science": 0.4666666666666667, + "traditional_chinese_medicine": 0.34054054054054056, + "virology": 0.4437869822485207, + "world_history": 0.4658385093167702, + "world_religions": 0.45 + } + }, + "prompt_5": { + "accuracy": 0.4015714039026075, + "category_acc": { + "agronomy": 0.3668639053254438, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.29878048780487804, + "arts": 0.45625, + "astronomy": 0.296969696969697, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.375, + "chinese_driving_rule": 0.5954198473282443, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.4891640866873065, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.48044692737430167, + "clinical_knowledge": 0.33755274261603374, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.3076923076923077, + "computer_science": 0.4166666666666667, + "computer_security": 0.4327485380116959, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3597122302158273, + "economics": 0.44025157232704404, + "education": 0.4233128834355828, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.29365079365079366, + "elementary_commonsense": 0.35353535353535354, + "elementary_information_and_technology": 0.6092436974789915, + "elementary_mathematics": 0.3391304347826087, + "ethnology": 0.4, + "food_science": 0.4755244755244755, + "genetics": 0.38636363636363635, + "global_facts": 0.37583892617449666, + "high_school_biology": 0.30177514792899407, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.3983050847457627, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.36363636363636365, + "high_school_politics": 0.45454545454545453, + "human_sexuality": 0.40476190476190477, + "international_law": 0.42162162162162165, + "journalism": 0.48255813953488375, + "jurisprudence": 0.38929440389294406, + "legal_and_moral_basis": 0.7242990654205608, + "logical": 0.3983739837398374, + "machine_learning": 0.32786885245901637, + "management": 0.44285714285714284, + "marketing": 0.4722222222222222, + "marxist_theory": 0.48148148148148145, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.4206896551724138, + "philosophy": 0.44761904761904764, + "professional_accounting": 0.38857142857142857, + "professional_law": 0.36492890995260663, + "professional_medicine": 0.2898936170212766, + "professional_psychology": 0.40948275862068967, + "public_relations": 0.5, + "security_study": 0.4666666666666667, + "sociology": 0.4469026548672566, + "sports_science": 0.4909090909090909, + "traditional_chinese_medicine": 0.32972972972972975, + "virology": 0.4437869822485207, + "world_history": 0.4720496894409938, + "world_religions": 0.4375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30303030303030304 + }, + "prompt_2": { + "accuracy": 0.2727272727272727 + }, + "prompt_3": { + "accuracy": 0.3333333333333333 + }, + "prompt_4": { + "accuracy": 0.24242424242424243 + }, + "prompt_5": { + "accuracy": 0.2727272727272727 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4590909090909091 + }, + "prompt_2": { + "accuracy": 0.4590909090909091 + }, + "prompt_3": { + "accuracy": 0.4340909090909091 + }, + "prompt_4": { + "accuracy": 0.41818181818181815 + }, + "prompt_5": { + "accuracy": 0.4318181818181818 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35491525423728815 + }, + "prompt_2": { + "accuracy": 0.3576271186440678 + }, + "prompt_3": { + "accuracy": 0.3525423728813559 + }, + "prompt_4": { + "accuracy": 0.3369491525423729 + }, + "prompt_5": { + "accuracy": 0.33322033898305087 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7378459237097981 + }, + "prompt_2": { + "accuracy": 0.7378459237097981 + }, + "prompt_3": { + "accuracy": 0.7378459237097981 + }, + "prompt_4": { + "accuracy": 0.7415856394913987 + }, + "prompt_5": { + "accuracy": 0.7382198952879581 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8388045075943165 + }, + "prompt_2": { + "accuracy": 0.8309652131308183 + }, + "prompt_3": { + "accuracy": 0.8319451249387555 + }, + "prompt_4": { + "accuracy": 0.8383145516903479 + }, + "prompt_5": { + "accuracy": 0.8304752572268496 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.35185478484124616, + "rouge2": 0.14181904540121956, + "rougeL": 0.2689576475232696, + "avg_rouge": 0.2542104925885784 + }, + "prompt_2": { + "rouge1": 0.3723213705247076, + "rouge2": 0.1528643753942022, + "rougeL": 0.28748626820586326, + "avg_rouge": 0.27089067137492434 + }, + "prompt_3": { + "rouge1": 0.3656993440823091, + "rouge2": 0.14648126506847492, + "rougeL": 0.280153560979756, + "avg_rouge": 0.2641113900435133 + }, + "prompt_4": { + "rouge1": 0.3578361081815546, + "rouge2": 0.14432165145765763, + "rougeL": 0.2747355178992767, + "avg_rouge": 0.25896442584616297 + }, + "prompt_5": { + "rouge1": 0.36672198351717084, + "rouge2": 0.14630474105530808, + "rougeL": 0.28363156336945433, + "avg_rouge": 0.2655527626473111 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.23109817785492331, + "rouge2": 0.06584073865888068, + "rougeL": 0.17204709285699302, + "avg_rouge": 0.1563286697902657 + }, + "prompt_2": { + "rouge1": 0.2291486662888163, + "rouge2": 0.06411793160872988, + "rougeL": 0.16897189248118805, + "avg_rouge": 0.1540794967929114 + }, + "prompt_3": { + "rouge1": 0.22678784735577454, + "rouge2": 0.06448925560430474, + "rougeL": 0.16750465010191423, + "avg_rouge": 0.15292725102066448 + }, + "prompt_4": { + "rouge1": 0.2278911755892714, + "rouge2": 0.06404630089186289, + "rougeL": 0.16784012914813337, + "avg_rouge": 0.15325920187642258 + }, + "prompt_5": { + "rouge1": 0.22771313050280845, + "rouge2": 0.061091683611133145, + "rougeL": 0.17234118352421832, + "avg_rouge": 0.1537153325460533 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8543577981651376 + }, + "prompt_2": { + "accuracy": 0.8211009174311926 + }, + "prompt_3": { + "accuracy": 0.8268348623853211 + }, + "prompt_4": { + "accuracy": 0.7855504587155964 + }, + "prompt_5": { + "accuracy": 0.6548165137614679 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7555129434324065 + }, + "prompt_2": { + "accuracy": 0.7430488974113135 + }, + "prompt_3": { + "accuracy": 0.7718120805369127 + }, + "prompt_4": { + "accuracy": 0.7718120805369127 + }, + "prompt_5": { + "accuracy": 0.7420901246404602 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.583 + }, + "prompt_2": { + "accuracy": 0.635 + }, + "prompt_3": { + "accuracy": 0.614 + }, + "prompt_4": { + "accuracy": 0.606 + }, + "prompt_5": { + "accuracy": 0.6445 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.515 + }, + "prompt_2": { + "accuracy": 0.466 + }, + "prompt_3": { + "accuracy": 0.5155 + }, + "prompt_4": { + "accuracy": 0.538 + }, + "prompt_5": { + "accuracy": 0.527 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.589 + }, + "prompt_2": { + "accuracy": 0.622 + }, + "prompt_3": { + "accuracy": 0.5335 + }, + "prompt_4": { + "accuracy": 0.553 + }, + "prompt_5": { + "accuracy": 0.5335 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5492957746478874 + }, + "prompt_2": { + "accuracy": 0.6056338028169014 + }, + "prompt_3": { + "accuracy": 0.5070422535211268 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5595667870036101 + }, + "prompt_2": { + "accuracy": 0.6859205776173285 + }, + "prompt_3": { + "accuracy": 0.6895306859205776 + }, + "prompt_4": { + "accuracy": 0.5523465703971119 + }, + "prompt_5": { + "accuracy": 0.6353790613718412 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6323529411764706 + }, + "prompt_2": { + "accuracy": 0.6470588235294118 + }, + "prompt_3": { + "accuracy": 0.6740196078431373 + }, + "prompt_4": { + "accuracy": 0.7156862745098039 + }, + "prompt_5": { + "accuracy": 0.6838235294117647 + } } }, "five_shot": { @@ -9130,235 +79119,3134 @@ "model_link": "https://huggingface.co/bigscience/bloomz-7b1", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3961904761904762, + "language_acc": { + "Malay": 0.38, + "English": 0.4666666666666667, + "Vietnamese": 0.37333333333333335, + "Spanish": 0.46, + "Indonesian": 0.38, + "Filipino": 0.26, + "Chinese": 0.4533333333333333 + }, + "consistency_score_2": 0.6180952380952383, + "consistency_score_3": 0.4651428571428572, + "consistency_score_4": 0.3758095238095238, + "consistency_score_5": 0.31428571428571417, + "consistency_score_6": 0.26857142857142857, + "consistency_score_7": 0.23333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6666666666666666, + "Malay,Vietnamese": 0.7066666666666667, + "Malay,Spanish": 0.6466666666666666, + "Malay,Indonesian": 0.72, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.7, + "English,Vietnamese": 0.66, + "English,Spanish": 0.7666666666666667, + "English,Indonesian": 0.6866666666666666, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.7333333333333333, + "Vietnamese,Spanish": 0.64, + "Vietnamese,Indonesian": 0.66, + "Vietnamese,Filipino": 0.41333333333333333, + "Vietnamese,Chinese": 0.6466666666666666, + "Spanish,Indonesian": 0.7266666666666667, + "Spanish,Filipino": 0.48, + "Spanish,Chinese": 0.64, + "Indonesian,Filipino": 0.44, + "Indonesian,Chinese": 0.6866666666666666, + "Filipino,Chinese": 0.44666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.5466666666666666, + "Malay,English,Spanish": 0.5533333333333333, + "Malay,English,Indonesian": 0.5666666666666667, + "Malay,English,Filipino": 0.36, + "Malay,English,Chinese": 0.5666666666666667, + "Malay,Vietnamese,Spanish": 0.52, + "Malay,Vietnamese,Indonesian": 0.5533333333333333, + "Malay,Vietnamese,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Chinese": 0.56, + "Malay,Spanish,Indonesian": 0.56, + "Malay,Spanish,Filipino": 0.35333333333333333, + "Malay,Spanish,Chinese": 0.5133333333333333, + "Malay,Indonesian,Filipino": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.58, + "Malay,Filipino,Chinese": 0.36, + "English,Vietnamese,Spanish": 0.5466666666666666, + "English,Vietnamese,Indonesian": 0.5266666666666666, + "English,Vietnamese,Filipino": 0.32666666666666666, + "English,Vietnamese,Chinese": 0.5466666666666666, + "English,Spanish,Indonesian": 0.6, + "English,Spanish,Filipino": 0.4, + "English,Spanish,Chinese": 0.58, + "English,Indonesian,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese": 0.5866666666666667, + "English,Filipino,Chinese": 0.37333333333333335, + "Vietnamese,Spanish,Indonesian": 0.5266666666666666, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.5, + "Vietnamese,Indonesian,Filipino": 0.32, + "Vietnamese,Indonesian,Chinese": 0.5333333333333333, + "Vietnamese,Filipino,Chinese": 0.32, + "Spanish,Indonesian,Filipino": 0.38, + "Spanish,Indonesian,Chinese": 0.5466666666666666, + "Spanish,Filipino,Chinese": 0.3466666666666667, + "Indonesian,Filipino,Chinese": 0.35333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.46, + "Malay,English,Vietnamese,Indonesian": 0.4666666666666667, + "Malay,English,Vietnamese,Filipino": 0.28, + "Malay,English,Vietnamese,Chinese": 0.48, + "Malay,English,Spanish,Indonesian": 0.4866666666666667, + "Malay,English,Spanish,Filipino": 0.32666666666666666, + "Malay,English,Spanish,Chinese": 0.47333333333333333, + "Malay,English,Indonesian,Filipino": 0.32, + "Malay,English,Indonesian,Chinese": 0.5066666666666667, + "Malay,English,Filipino,Chinese": 0.32666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.4533333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.28, + "Malay,Vietnamese,Spanish,Chinese": 0.4533333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Indonesian,Chinese": 0.48, + "Malay,Vietnamese,Filipino,Chinese": 0.3, + "Malay,Spanish,Indonesian,Filipino": 0.32, + "Malay,Spanish,Indonesian,Chinese": 0.4666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.31333333333333335, + "Malay,Indonesian,Filipino,Chinese": 0.32, + "English,Vietnamese,Spanish,Indonesian": 0.4666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.46, + "English,Vietnamese,Indonesian,Filipino": 0.28, + "English,Vietnamese,Indonesian,Chinese": 0.4666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian,Filipino": 0.34, + "English,Spanish,Indonesian,Chinese": 0.5066666666666667, + "English,Spanish,Filipino,Chinese": 0.32, + "English,Indonesian,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.44666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino,Chinese": 0.28, + "Spanish,Indonesian,Filipino,Chinese": 0.32 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26, + "Malay,English,Vietnamese,Spanish,Chinese": 0.41333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Spanish,Indonesian,Filipino": 0.3, + "Malay,English,Spanish,Indonesian,Chinese": 0.43333333333333335, + "Malay,English,Spanish,Filipino,Chinese": 0.29333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.3, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.41333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "English,Spanish,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.37333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.28, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.23333333333333334 + } + }, + "AC3_2": 0.4828687681169361, + "AC3_3": 0.4279067415943465, + "AC3_4": 0.38573096936716267, + "AC3_5": 0.35051704322910177, + "AC3_6": 0.3201309864438608, + "AC3_7": 0.2936964195196599 + }, + "prompt_2": { + "overall_acc": 0.41619047619047617, + "language_acc": { + "Malay": 0.38, + "English": 0.49333333333333335, + "Vietnamese": 0.38666666666666666, + "Spanish": 0.48, + "Indonesian": 0.4066666666666667, + "Filipino": 0.31333333333333335, + "Chinese": 0.4533333333333333 + }, + "consistency_score_2": 0.6206349206349208, + "consistency_score_3": 0.4683809523809525, + "consistency_score_4": 0.3767619047619048, + "consistency_score_5": 0.31206349206349204, + "consistency_score_6": 0.2638095238095238, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.64, + "Malay,Vietnamese": 0.7133333333333334, + "Malay,Spanish": 0.6533333333333333, + "Malay,Indonesian": 0.7, + "Malay,Filipino": 0.4533333333333333, + "Malay,Chinese": 0.6466666666666666, + "English,Vietnamese": 0.6533333333333333, + "English,Spanish": 0.7933333333333333, + "English,Indonesian": 0.7066666666666667, + "English,Filipino": 0.46, + "English,Chinese": 0.7266666666666667, + "Vietnamese,Spanish": 0.6733333333333333, + "Vietnamese,Indonesian": 0.6933333333333334, + "Vietnamese,Filipino": 0.42, + "Vietnamese,Chinese": 0.6733333333333333, + "Spanish,Indonesian": 0.74, + "Spanish,Filipino": 0.4533333333333333, + "Spanish,Chinese": 0.6266666666666667, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.68, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.54, + "Malay,English,Spanish": 0.5666666666666667, + "Malay,English,Indonesian": 0.5533333333333333, + "Malay,English,Filipino": 0.34, + "Malay,English,Chinese": 0.52, + "Malay,Vietnamese,Spanish": 0.54, + "Malay,Vietnamese,Indonesian": 0.5666666666666667, + "Malay,Vietnamese,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Chinese": 0.56, + "Malay,Spanish,Indonesian": 0.56, + "Malay,Spanish,Filipino": 0.3466666666666667, + "Malay,Spanish,Chinese": 0.4866666666666667, + "Malay,Indonesian,Filipino": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.5466666666666666, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.58, + "English,Vietnamese,Indonesian": 0.5466666666666666, + "English,Vietnamese,Filipino": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.5466666666666666, + "English,Spanish,Indonesian": 0.64, + "English,Spanish,Filipino": 0.38666666666666666, + "English,Spanish,Chinese": 0.5866666666666667, + "English,Indonesian,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese": 0.58, + "English,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Spanish,Indonesian": 0.56, + "Vietnamese,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Spanish,Chinese": 0.52, + "Vietnamese,Indonesian,Filipino": 0.34, + "Vietnamese,Indonesian,Chinese": 0.5533333333333333, + "Vietnamese,Filipino,Chinese": 0.3466666666666667, + "Spanish,Indonesian,Filipino": 0.38666666666666666, + "Spanish,Indonesian,Chinese": 0.5533333333333333, + "Spanish,Filipino,Chinese": 0.34, + "Indonesian,Filipino,Chinese": 0.36666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.4866666666666667, + "Malay,English,Vietnamese,Indonesian": 0.47333333333333333, + "Malay,English,Vietnamese,Filipino": 0.2733333333333333, + "Malay,English,Vietnamese,Chinese": 0.4666666666666667, + "Malay,English,Spanish,Indonesian": 0.5, + "Malay,English,Spanish,Filipino": 0.30666666666666664, + "Malay,English,Spanish,Chinese": 0.46, + "Malay,English,Indonesian,Filipino": 0.30666666666666664, + "Malay,English,Indonesian,Chinese": 0.48, + "Malay,English,Filipino,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.47333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.28, + "Malay,Vietnamese,Spanish,Chinese": 0.44666666666666666, + "Malay,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Indonesian,Chinese": 0.4866666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.3, + "Malay,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.44666666666666666, + "Malay,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.30666666666666664, + "English,Vietnamese,Spanish,Indonesian": 0.5066666666666667, + "English,Vietnamese,Spanish,Filipino": 0.3, + "English,Vietnamese,Spanish,Chinese": 0.4866666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.48, + "English,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian,Filipino": 0.3466666666666667, + "English,Spanish,Indonesian,Chinese": 0.52, + "English,Spanish,Filipino,Chinese": 0.31333333333333335, + "English,Indonesian,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian,Filipino": 0.3, + "Vietnamese,Spanish,Indonesian,Chinese": 0.4666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.32 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Spanish,Chinese": 0.42, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.25333333333333335, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.4266666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.28, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.4066666666666667, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.25333333333333335, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.44, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "English,Spanish,Indonesian,Filipino,Chinese": 0.3, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.38666666666666666, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.23333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + } + }, + "AC3_2": 0.49825620384195085, + "AC3_3": 0.44074607271175076, + "AC3_4": 0.39549592207587997, + "AC3_5": 0.35668285792420273, + "AC3_6": 0.3229265038874181, + "AC3_7": 0.29349135797903714 + }, + "prompt_3": { + "overall_acc": 0.42, + "language_acc": { + "Malay": 0.36, + "English": 0.5133333333333333, + "Vietnamese": 0.42, + "Spanish": 0.4866666666666667, + "Indonesian": 0.4066666666666667, + "Filipino": 0.2866666666666667, + "Chinese": 0.4666666666666667 + }, + "consistency_score_2": 0.6228571428571429, + "consistency_score_3": 0.46971428571428575, + "consistency_score_4": 0.3767619047619047, + "consistency_score_5": 0.3117460317460317, + "consistency_score_6": 0.2638095238095238, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6333333333333333, + "Malay,Vietnamese": 0.6933333333333334, + "Malay,Spanish": 0.6133333333333333, + "Malay,Indonesian": 0.7066666666666667, + "Malay,Filipino": 0.44, + "Malay,Chinese": 0.68, + "English,Vietnamese": 0.68, + "English,Spanish": 0.7666666666666667, + "English,Indonesian": 0.6866666666666666, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.7866666666666666, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,Indonesian": 0.6866666666666666, + "Vietnamese,Filipino": 0.43333333333333335, + "Vietnamese,Chinese": 0.7066666666666667, + "Spanish,Indonesian": 0.68, + "Spanish,Filipino": 0.46, + "Spanish,Chinese": 0.6733333333333333, + "Indonesian,Filipino": 0.4533333333333333, + "Indonesian,Chinese": 0.7, + "Filipino,Chinese": 0.4666666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.5266666666666666, + "Malay,English,Spanish": 0.5333333333333333, + "Malay,English,Indonesian": 0.5466666666666666, + "Malay,English,Filipino": 0.32666666666666666, + "Malay,English,Chinese": 0.5666666666666667, + "Malay,Vietnamese,Spanish": 0.5133333333333333, + "Malay,Vietnamese,Indonesian": 0.56, + "Malay,Vietnamese,Filipino": 0.34, + "Malay,Vietnamese,Chinese": 0.5666666666666667, + "Malay,Spanish,Indonesian": 0.5266666666666666, + "Malay,Spanish,Filipino": 0.32666666666666666, + "Malay,Spanish,Chinese": 0.5133333333333333, + "Malay,Indonesian,Filipino": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.5733333333333334, + "Malay,Filipino,Chinese": 0.3466666666666667, + "English,Vietnamese,Spanish": 0.58, + "English,Vietnamese,Indonesian": 0.5466666666666666, + "English,Vietnamese,Filipino": 0.34, + "English,Vietnamese,Chinese": 0.5933333333333334, + "English,Spanish,Indonesian": 0.5866666666666667, + "English,Spanish,Filipino": 0.38, + "English,Spanish,Chinese": 0.6333333333333333, + "English,Indonesian,Filipino": 0.36, + "English,Indonesian,Chinese": 0.6066666666666667, + "English,Filipino,Chinese": 0.38, + "Vietnamese,Spanish,Indonesian": 0.5266666666666666, + "Vietnamese,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.5533333333333333, + "Vietnamese,Indonesian,Filipino": 0.36, + "Vietnamese,Indonesian,Chinese": 0.5733333333333334, + "Vietnamese,Filipino,Chinese": 0.35333333333333333, + "Spanish,Indonesian,Filipino": 0.35333333333333333, + "Spanish,Indonesian,Chinese": 0.5533333333333333, + "Spanish,Filipino,Chinese": 0.3466666666666667, + "Indonesian,Filipino,Chinese": 0.36666666666666664 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.46, + "Malay,English,Vietnamese,Indonesian": 0.46, + "Malay,English,Vietnamese,Filipino": 0.26, + "Malay,English,Vietnamese,Chinese": 0.48, + "Malay,English,Spanish,Indonesian": 0.4666666666666667, + "Malay,English,Spanish,Filipino": 0.3, + "Malay,English,Spanish,Chinese": 0.48, + "Malay,English,Indonesian,Filipino": 0.29333333333333333, + "Malay,English,Indonesian,Chinese": 0.5133333333333333, + "Malay,English,Filipino,Chinese": 0.29333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.4533333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.46, + "Malay,Vietnamese,Indonesian,Filipino": 0.3, + "Malay,Vietnamese,Indonesian,Chinese": 0.49333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.29333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.4666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.31333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.47333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.3, + "English,Vietnamese,Spanish,Chinese": 0.52, + "English,Vietnamese,Indonesian,Filipino": 0.3, + "English,Vietnamese,Indonesian,Chinese": 0.5066666666666667, + "English,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian,Filipino": 0.32666666666666666, + "English,Spanish,Indonesian,Chinese": 0.52, + "English,Spanish,Filipino,Chinese": 0.32, + "English,Indonesian,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.47333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino,Chinese": 0.31333333333333335, + "Spanish,Indonesian,Filipino,Chinese": 0.31333333333333335 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.4266666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.44, + "Malay,English,Vietnamese,Filipino,Chinese": 0.24, + "Malay,English,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.44, + "Malay,English,Spanish,Filipino,Chinese": 0.26666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.28, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.42, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.44666666666666666, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.3933333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + } + }, + "AC3_2": 0.5016986300888782, + "AC3_3": 0.4434682080426417, + "AC3_4": 0.39720774558724975, + "AC3_5": 0.35786550971248254, + "AC3_6": 0.32406685232029664, + "AC3_7": 0.29443298964519077 + }, + "prompt_4": { + "overall_acc": 0.4047619047619047, + "language_acc": { + "Malay": 0.38666666666666666, + "English": 0.4666666666666667, + "Vietnamese": 0.38666666666666666, + "Spanish": 0.4533333333333333, + "Indonesian": 0.4066666666666667, + "Filipino": 0.28, + "Chinese": 0.4533333333333333 + }, + "consistency_score_2": 0.6092063492063492, + "consistency_score_3": 0.45142857142857146, + "consistency_score_4": 0.3577142857142859, + "consistency_score_5": 0.29396825396825405, + "consistency_score_6": 0.24761904761904757, + "consistency_score_7": 0.21333333333333335, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6333333333333333, + "Malay,Vietnamese": 0.68, + "Malay,Spanish": 0.64, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Filipino": 0.44666666666666666, + "Malay,Chinese": 0.6666666666666666, + "English,Vietnamese": 0.62, + "English,Spanish": 0.7133333333333334, + "English,Indonesian": 0.7066666666666667, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.7333333333333333, + "Vietnamese,Spanish": 0.6333333333333333, + "Vietnamese,Indonesian": 0.6333333333333333, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Chinese": 0.6733333333333333, + "Spanish,Indonesian": 0.7333333333333333, + "Spanish,Filipino": 0.4666666666666667, + "Spanish,Chinese": 0.6466666666666666, + "Indonesian,Filipino": 0.43333333333333335, + "Indonesian,Chinese": 0.7, + "Filipino,Chinese": 0.44666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.5066666666666667, + "Malay,English,Spanish": 0.5133333333333333, + "Malay,English,Indonesian": 0.5466666666666666, + "Malay,English,Filipino": 0.3333333333333333, + "Malay,English,Chinese": 0.54, + "Malay,Vietnamese,Spanish": 0.5066666666666667, + "Malay,Vietnamese,Indonesian": 0.5266666666666666, + "Malay,Vietnamese,Filipino": 0.3333333333333333, + "Malay,Vietnamese,Chinese": 0.54, + "Malay,Spanish,Indonesian": 0.5333333333333333, + "Malay,Spanish,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese": 0.5, + "Malay,Indonesian,Filipino": 0.34, + "Malay,Indonesian,Chinese": 0.5666666666666667, + "Malay,Filipino,Chinese": 0.32666666666666666, + "English,Vietnamese,Spanish": 0.5066666666666667, + "English,Vietnamese,Indonesian": 0.5066666666666667, + "English,Vietnamese,Filipino": 0.32, + "English,Vietnamese,Chinese": 0.54, + "English,Spanish,Indonesian": 0.5933333333333334, + "English,Spanish,Filipino": 0.37333333333333335, + "English,Spanish,Chinese": 0.56, + "English,Indonesian,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese": 0.5933333333333334, + "English,Filipino,Chinese": 0.38, + "Vietnamese,Spanish,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Vietnamese,Spanish,Chinese": 0.5133333333333333, + "Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Chinese": 0.5266666666666666, + "Vietnamese,Filipino,Chinese": 0.32666666666666666, + "Spanish,Indonesian,Filipino": 0.36, + "Spanish,Indonesian,Chinese": 0.56, + "Spanish,Filipino,Chinese": 0.34, + "Indonesian,Filipino,Chinese": 0.3466666666666667 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.43333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.44666666666666666, + "Malay,English,Vietnamese,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Chinese": 0.4533333333333333, + "Malay,English,Spanish,Indonesian": 0.44, + "Malay,English,Spanish,Filipino": 0.29333333333333333, + "Malay,English,Spanish,Chinese": 0.44, + "Malay,English,Indonesian,Filipino": 0.3, + "Malay,English,Indonesian,Chinese": 0.49333333333333335, + "Malay,English,Filipino,Chinese": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.43333333333333335, + "Malay,Vietnamese,Indonesian,Filipino": 0.28, + "Malay,Vietnamese,Indonesian,Chinese": 0.46, + "Malay,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.4533333333333333, + "Malay,Spanish,Filipino,Chinese": 0.2866666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "English,Vietnamese,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.44666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.46, + "English,Vietnamese,Filipino,Chinese": 0.2866666666666667, + "English,Spanish,Indonesian,Filipino": 0.32666666666666666, + "English,Spanish,Indonesian,Chinese": 0.49333333333333335, + "English,Spanish,Filipino,Chinese": 0.30666666666666664, + "English,Indonesian,Filipino,Chinese": 0.32, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.44666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino,Chinese": 0.26666666666666666, + "Spanish,Indonesian,Filipino,Chinese": 0.30666666666666664 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.37333333333333335, + "Malay,English,Vietnamese,Spanish,Filipino": 0.24, + "Malay,English,Vietnamese,Spanish,Chinese": 0.38666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Malay,English,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Spanish,Indonesian,Chinese": 0.4, + "Malay,English,Spanish,Filipino,Chinese": 0.26, + "Malay,English,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.38666666666666666, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.3933333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Spanish,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.3466666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.22666666666666666, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.24, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.24666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.21333333333333335 + } + }, + "AC3_2": 0.4863732593371893, + "AC3_3": 0.42682345458227944, + "AC3_4": 0.3797865885801465, + "AC3_5": 0.3405811210866718, + "AC3_6": 0.3072645115969747, + "AC3_7": 0.27940421156239736 + }, + "prompt_5": { + "overall_acc": 0.4133333333333333, + "language_acc": { + "Malay": 0.38666666666666666, + "English": 0.5, + "Vietnamese": 0.38666666666666666, + "Spanish": 0.48, + "Indonesian": 0.3933333333333333, + "Filipino": 0.2866666666666667, + "Chinese": 0.46 + }, + "consistency_score_2": 0.6234920634920635, + "consistency_score_3": 0.4720000000000001, + "consistency_score_4": 0.3820952380952381, + "consistency_score_5": 0.32, + "consistency_score_6": 0.27428571428571435, + "consistency_score_7": 0.24, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.6333333333333333, + "Malay,Vietnamese": 0.72, + "Malay,Spanish": 0.66, + "Malay,Indonesian": 0.7, + "Malay,Filipino": 0.4666666666666667, + "Malay,Chinese": 0.68, + "English,Vietnamese": 0.6466666666666666, + "English,Spanish": 0.7533333333333333, + "English,Indonesian": 0.6866666666666666, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.78, + "Vietnamese,Spanish": 0.64, + "Vietnamese,Indonesian": 0.6333333333333333, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Chinese": 0.6866666666666666, + "Spanish,Indonesian": 0.74, + "Spanish,Filipino": 0.44, + "Spanish,Chinese": 0.7066666666666667, + "Indonesian,Filipino": 0.44666666666666666, + "Indonesian,Chinese": 0.72, + "Filipino,Chinese": 0.44 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.52, + "Malay,English,Spanish": 0.5533333333333333, + "Malay,English,Indonesian": 0.5466666666666666, + "Malay,English,Filipino": 0.3333333333333333, + "Malay,English,Chinese": 0.5733333333333334, + "Malay,Vietnamese,Spanish": 0.54, + "Malay,Vietnamese,Indonesian": 0.5533333333333333, + "Malay,Vietnamese,Filipino": 0.36666666666666664, + "Malay,Vietnamese,Chinese": 0.5666666666666667, + "Malay,Spanish,Indonesian": 0.5666666666666667, + "Malay,Spanish,Filipino": 0.34, + "Malay,Spanish,Chinese": 0.5533333333333333, + "Malay,Indonesian,Filipino": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.5866666666666667, + "Malay,Filipino,Chinese": 0.35333333333333333, + "English,Vietnamese,Spanish": 0.54, + "English,Vietnamese,Indonesian": 0.5, + "English,Vietnamese,Filipino": 0.34, + "English,Vietnamese,Chinese": 0.5666666666666667, + "English,Spanish,Indonesian": 0.6066666666666667, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Spanish,Chinese": 0.6266666666666667, + "English,Indonesian,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese": 0.6133333333333333, + "English,Filipino,Chinese": 0.38666666666666666, + "Vietnamese,Spanish,Indonesian": 0.52, + "Vietnamese,Spanish,Filipino": 0.31333333333333335, + "Vietnamese,Spanish,Chinese": 0.5466666666666666, + "Vietnamese,Indonesian,Filipino": 0.34, + "Vietnamese,Indonesian,Chinese": 0.5466666666666666, + "Vietnamese,Filipino,Chinese": 0.3333333333333333, + "Spanish,Indonesian,Filipino": 0.35333333333333333, + "Spanish,Indonesian,Chinese": 0.6066666666666667, + "Spanish,Filipino,Chinese": 0.3466666666666667, + "Indonesian,Filipino,Chinese": 0.37333333333333335 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.46, + "Malay,English,Vietnamese,Indonesian": 0.4533333333333333, + "Malay,English,Vietnamese,Filipino": 0.28, + "Malay,English,Vietnamese,Chinese": 0.48, + "Malay,English,Spanish,Indonesian": 0.48, + "Malay,English,Spanish,Filipino": 0.30666666666666664, + "Malay,English,Spanish,Chinese": 0.5, + "Malay,English,Indonesian,Filipino": 0.30666666666666664, + "Malay,English,Indonesian,Chinese": 0.5266666666666666, + "Malay,English,Filipino,Chinese": 0.30666666666666664, + "Malay,Vietnamese,Spanish,Indonesian": 0.47333333333333333, + "Malay,Vietnamese,Spanish,Filipino": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.47333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Indonesian,Chinese": 0.49333333333333335, + "Malay,Vietnamese,Filipino,Chinese": 0.3, + "Malay,Spanish,Indonesian,Filipino": 0.30666666666666664, + "Malay,Spanish,Indonesian,Chinese": 0.5066666666666667, + "Malay,Spanish,Filipino,Chinese": 0.30666666666666664, + "Malay,Indonesian,Filipino,Chinese": 0.3333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.44666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Chinese": 0.49333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.47333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.29333333333333333, + "English,Spanish,Indonesian,Filipino": 0.32666666666666666, + "English,Spanish,Indonesian,Chinese": 0.54, + "English,Spanish,Filipino,Chinese": 0.32, + "English,Indonesian,Filipino,Chinese": 0.34, + "Vietnamese,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.47333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino,Chinese": 0.3, + "Spanish,Indonesian,Filipino,Chinese": 0.32666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.4066666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.26, + "Malay,English,Vietnamese,Spanish,Chinese": 0.4266666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.26666666666666666, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.44, + "Malay,English,Vietnamese,Filipino,Chinese": 0.26, + "Malay,English,Spanish,Indonesian,Filipino": 0.28, + "Malay,English,Spanish,Indonesian,Chinese": 0.46, + "Malay,English,Spanish,Filipino,Chinese": 0.28, + "Malay,English,Indonesian,Filipino,Chinese": 0.3, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.43333333333333335, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.2866666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.4266666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.2733333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.26666666666666666 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.3933333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.24, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.26, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.2733333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.25333333333333335 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.24 + } + }, + "AC3_2": 0.49711369662484456, + "AC3_3": 0.44072289151648464, + "AC3_4": 0.3971008939475228, + "AC3_5": 0.3607272726780826, + "AC3_6": 0.32975069247282024, + "AC3_7": 0.3036734693412744 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4034090909090909, + "language_acc": { + "English": 0.4431818181818182, + "Vietnamese": 0.3977272727272727, + "Chinese": 0.4375, + "Indonesian": 0.4034090909090909, + "Filipino": 0.3068181818181818, + "Spanish": 0.4147727272727273, + "Malay": 0.42045454545454547 + }, + "consistency_score_2": 0.523538961038961, + "consistency_score_3": 0.33896103896103885, + "consistency_score_4": 0.23603896103896105, + "consistency_score_5": 0.17018398268398263, + "consistency_score_6": 0.12418831168831168, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5738636363636364, + "English,Chinese": 0.5909090909090909, + "English,Indonesian": 0.5795454545454546, + "English,Filipino": 0.3409090909090909, + "English,Spanish": 0.7272727272727273, + "English,Malay": 0.5284090909090909, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Filipino": 0.3181818181818182, + "Vietnamese,Spanish": 0.5909090909090909, + "Vietnamese,Malay": 0.6136363636363636, + "Chinese,Indonesian": 0.5568181818181818, + "Chinese,Filipino": 0.3522727272727273, + "Chinese,Spanish": 0.5852272727272727, + "Chinese,Malay": 0.5738636363636364, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,Spanish": 0.625, + "Indonesian,Malay": 0.6363636363636364, + "Filipino,Spanish": 0.375, + "Filipino,Malay": 0.35795454545454547, + "Spanish,Malay": 0.5795454545454546 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3806818181818182, + "English,Vietnamese,Indonesian": 0.4147727272727273, + "English,Vietnamese,Filipino": 0.19886363636363635, + "English,Vietnamese,Spanish": 0.4772727272727273, + "English,Vietnamese,Malay": 0.4034090909090909, + "English,Chinese,Indonesian": 0.4034090909090909, + "English,Chinese,Filipino": 0.2159090909090909, + "English,Chinese,Spanish": 0.48863636363636365, + "English,Chinese,Malay": 0.38636363636363635, + "English,Indonesian,Filipino": 0.2215909090909091, + "English,Indonesian,Spanish": 0.4943181818181818, + "English,Indonesian,Malay": 0.4034090909090909, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.19886363636363635, + "English,Spanish,Malay": 0.4431818181818182, + "Vietnamese,Chinese,Indonesian": 0.375, + "Vietnamese,Chinese,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Spanish": 0.39204545454545453, + "Vietnamese,Chinese,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish": 0.4318181818181818, + "Vietnamese,Indonesian,Malay": 0.4602272727272727, + "Vietnamese,Filipino,Spanish": 0.22727272727272727, + "Vietnamese,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Spanish,Malay": 0.4318181818181818, + "Chinese,Indonesian,Filipino": 0.22727272727272727, + "Chinese,Indonesian,Spanish": 0.3977272727272727, + "Chinese,Indonesian,Malay": 0.42613636363636365, + "Chinese,Filipino,Spanish": 0.22727272727272727, + "Chinese,Filipino,Malay": 0.22727272727272727, + "Chinese,Spanish,Malay": 0.42045454545454547, + "Indonesian,Filipino,Spanish": 0.25, + "Indonesian,Filipino,Malay": 0.24431818181818182, + "Indonesian,Spanish,Malay": 0.44886363636363635, + "Filipino,Spanish,Malay": 0.22727272727272727 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.29545454545454547, + "English,Vietnamese,Chinese,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese,Malay": 0.30113636363636365, + "English,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.3806818181818182, + "English,Vietnamese,Indonesian,Malay": 0.32386363636363635, + "English,Vietnamese,Filipino,Spanish": 0.1875, + "English,Vietnamese,Filipino,Malay": 0.14204545454545456, + "English,Vietnamese,Spanish,Malay": 0.3522727272727273, + "English,Chinese,Indonesian,Filipino": 0.1534090909090909, + "English,Chinese,Indonesian,Spanish": 0.3522727272727273, + "English,Chinese,Indonesian,Malay": 0.30113636363636365, + "English,Chinese,Filipino,Spanish": 0.19886363636363635, + "English,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Spanish,Malay": 0.3465909090909091, + "English,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Indonesian,Filipino,Malay": 0.1590909090909091, + "English,Indonesian,Spanish,Malay": 0.3693181818181818, + "English,Filipino,Spanish,Malay": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Filipino": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Spanish,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.3465909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Malay": 0.17045454545454544, + "Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, + "Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Chinese,Spanish,Malay": 0.2784090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.3068181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091 + } + }, + "AC3_2": 0.45568977864849936, + "AC3_3": 0.36838757126819077, + "AC3_4": 0.2978201665854733, + "AC3_5": 0.23938143220526453, + "AC3_6": 0.18991258737659164, + "AC3_7": 0.14838035524688983 + }, + "prompt_2": { + "overall_acc": 0.40665584415584416, + "language_acc": { + "English": 0.4715909090909091, + "Vietnamese": 0.4147727272727273, + "Chinese": 0.4431818181818182, + "Indonesian": 0.4090909090909091, + "Filipino": 0.30113636363636365, + "Spanish": 0.39204545454545453, + "Malay": 0.4147727272727273 + }, + "consistency_score_2": 0.5162337662337662, + "consistency_score_3": 0.33262987012987005, + "consistency_score_4": 0.23230519480519482, + "consistency_score_5": 0.1672077922077922, + "consistency_score_6": 0.11931818181818181, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5454545454545454, + "English,Chinese": 0.5454545454545454, + "English,Indonesian": 0.5909090909090909, + "English,Filipino": 0.3181818181818182, + "English,Spanish": 0.7045454545454546, + "English,Malay": 0.5625, + "Vietnamese,Chinese": 0.5454545454545454, + "Vietnamese,Indonesian": 0.6022727272727273, + "Vietnamese,Filipino": 0.32386363636363635, + "Vietnamese,Spanish": 0.5568181818181818, + "Vietnamese,Malay": 0.5738636363636364, + "Chinese,Indonesian": 0.5852272727272727, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,Spanish": 0.5795454545454546, + "Chinese,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.36363636363636365, + "Indonesian,Spanish": 0.625, + "Indonesian,Malay": 0.625, + "Filipino,Spanish": 0.3522727272727273, + "Filipino,Malay": 0.3522727272727273, + "Spanish,Malay": 0.5681818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.3693181818181818, + "English,Vietnamese,Indonesian": 0.4147727272727273, + "English,Vietnamese,Filipino": 0.1875, + "English,Vietnamese,Spanish": 0.42613636363636365, + "English,Vietnamese,Malay": 0.39204545454545453, + "English,Chinese,Indonesian": 0.4090909090909091, + "English,Chinese,Filipino": 0.20454545454545456, + "English,Chinese,Spanish": 0.4431818181818182, + "English,Chinese,Malay": 0.38636363636363635, + "English,Indonesian,Filipino": 0.21022727272727273, + "English,Indonesian,Spanish": 0.4772727272727273, + "English,Indonesian,Malay": 0.4375, + "English,Filipino,Spanish": 0.2556818181818182, + "English,Filipino,Malay": 0.19318181818181818, + "English,Spanish,Malay": 0.44886363636363635, + "Vietnamese,Chinese,Indonesian": 0.4034090909090909, + "Vietnamese,Chinese,Filipino": 0.2159090909090909, + "Vietnamese,Chinese,Spanish": 0.38636363636363635, + "Vietnamese,Chinese,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish": 0.4090909090909091, + "Vietnamese,Indonesian,Malay": 0.4318181818181818, + "Vietnamese,Filipino,Spanish": 0.21022727272727273, + "Vietnamese,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Spanish,Malay": 0.4090909090909091, + "Chinese,Indonesian,Filipino": 0.25, + "Chinese,Indonesian,Spanish": 0.42045454545454547, + "Chinese,Indonesian,Malay": 0.42613636363636365, + "Chinese,Filipino,Spanish": 0.23295454545454544, + "Chinese,Filipino,Malay": 0.2215909090909091, + "Chinese,Spanish,Malay": 0.4090909090909091, + "Indonesian,Filipino,Spanish": 0.25, + "Indonesian,Filipino,Malay": 0.25, + "Indonesian,Spanish,Malay": 0.4375, + "Filipino,Spanish,Malay": 0.21022727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Chinese,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Spanish": 0.3125, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "English,Vietnamese,Indonesian,Malay": 0.32954545454545453, + "English,Vietnamese,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Filipino,Malay": 0.13636363636363635, + "English,Vietnamese,Spanish,Malay": 0.32954545454545453, + "English,Chinese,Indonesian,Filipino": 0.16477272727272727, + "English,Chinese,Indonesian,Spanish": 0.35795454545454547, + "English,Chinese,Indonesian,Malay": 0.32954545454545453, + "English,Chinese,Filipino,Spanish": 0.1875, + "English,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Spanish,Malay": 0.3352272727272727, + "English,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Indonesian,Filipino,Malay": 0.1534090909090909, + "English,Indonesian,Spanish,Malay": 0.375, + "English,Filipino,Spanish,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Chinese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Filipino,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Spanish,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Malay": 0.32954545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Chinese,Indonesian,Spanish,Malay": 0.3352272727272727, + "Chinese,Filipino,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish,Malay": 0.16477272727272727 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26704545454545453, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Spanish,Malay": 0.26136363636363635, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2897727272727273, + "English,Vietnamese,Filipino,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Filipino,Malay": 0.125, + "English,Chinese,Indonesian,Spanish,Malay": 0.29545454545454547, + "English,Chinese,Filipino,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2556818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.125 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + } + }, + "AC3_2": 0.45493951953402395, + "AC3_3": 0.36593668184568123, + "AC3_4": 0.29569335004874975, + "AC3_5": 0.2369762486039778, + "AC3_6": 0.1845012625911846, + "AC3_7": 0.13306268019724898 + }, + "prompt_3": { + "overall_acc": 0.3896103896103896, + "language_acc": { + "English": 0.4431818181818182, + "Vietnamese": 0.4147727272727273, + "Chinese": 0.42045454545454547, + "Indonesian": 0.375, + "Filipino": 0.3125, + "Spanish": 0.375, + "Malay": 0.38636363636363635 + }, + "consistency_score_2": 0.5211038961038961, + "consistency_score_3": 0.3397727272727273, + "consistency_score_4": 0.23538961038961037, + "consistency_score_5": 0.163961038961039, + "consistency_score_6": 0.11038961038961038, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6079545454545454, + "English,Chinese": 0.5738636363636364, + "English,Indonesian": 0.6193181818181818, + "English,Filipino": 0.35795454545454547, + "English,Spanish": 0.7386363636363636, + "English,Malay": 0.5795454545454546, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Filipino": 0.3409090909090909, + "Vietnamese,Spanish": 0.6079545454545454, + "Vietnamese,Malay": 0.5795454545454546, + "Chinese,Indonesian": 0.5056818181818182, + "Chinese,Filipino": 0.26704545454545453, + "Chinese,Spanish": 0.5625, + "Chinese,Malay": 0.5056818181818182, + "Indonesian,Filipino": 0.32386363636363635, + "Indonesian,Spanish": 0.6704545454545454, + "Indonesian,Malay": 0.6306818181818182, + "Filipino,Spanish": 0.35795454545454547, + "Filipino,Malay": 0.3806818181818182, + "Spanish,Malay": 0.625 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.38636363636363635, + "English,Vietnamese,Indonesian": 0.45454545454545453, + "English,Vietnamese,Filipino": 0.23863636363636365, + "English,Vietnamese,Spanish": 0.4943181818181818, + "English,Vietnamese,Malay": 0.4318181818181818, + "English,Chinese,Indonesian": 0.39204545454545453, + "English,Chinese,Filipino": 0.20454545454545456, + "English,Chinese,Spanish": 0.4715909090909091, + "English,Chinese,Malay": 0.39204545454545453, + "English,Indonesian,Filipino": 0.23863636363636365, + "English,Indonesian,Spanish": 0.5397727272727273, + "English,Indonesian,Malay": 0.44886363636363635, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Filipino,Malay": 0.25, + "English,Spanish,Malay": 0.5056818181818182, + "Vietnamese,Chinese,Indonesian": 0.3409090909090909, + "Vietnamese,Chinese,Filipino": 0.16477272727272727, + "Vietnamese,Chinese,Spanish": 0.375, + "Vietnamese,Chinese,Malay": 0.3693181818181818, + "Vietnamese,Indonesian,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish": 0.45454545454545453, + "Vietnamese,Indonesian,Malay": 0.4318181818181818, + "Vietnamese,Filipino,Spanish": 0.23295454545454544, + "Vietnamese,Filipino,Malay": 0.2159090909090909, + "Vietnamese,Spanish,Malay": 0.4431818181818182, + "Chinese,Indonesian,Filipino": 0.16477272727272727, + "Chinese,Indonesian,Spanish": 0.3977272727272727, + "Chinese,Indonesian,Malay": 0.3693181818181818, + "Chinese,Filipino,Spanish": 0.19318181818181818, + "Chinese,Filipino,Malay": 0.18181818181818182, + "Chinese,Spanish,Malay": 0.3977272727272727, + "Indonesian,Filipino,Spanish": 0.24431818181818182, + "Indonesian,Filipino,Malay": 0.23863636363636365, + "Indonesian,Spanish,Malay": 0.48863636363636365, + "Filipino,Spanish,Malay": 0.24431818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.2840909090909091, + "English,Vietnamese,Chinese,Filipino": 0.14204545454545456, + "English,Vietnamese,Chinese,Spanish": 0.32386363636363635, + "English,Vietnamese,Chinese,Malay": 0.30113636363636365, + "English,Vietnamese,Indonesian,Filipino": 0.1875, + "English,Vietnamese,Indonesian,Spanish": 0.4034090909090909, + "English,Vietnamese,Indonesian,Malay": 0.3522727272727273, + "English,Vietnamese,Filipino,Spanish": 0.20454545454545456, + "English,Vietnamese,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Spanish,Malay": 0.3806818181818182, + "English,Chinese,Indonesian,Filipino": 0.14204545454545456, + "English,Chinese,Indonesian,Spanish": 0.35795454545454547, + "English,Chinese,Indonesian,Malay": 0.3068181818181818, + "English,Chinese,Filipino,Spanish": 0.17613636363636365, + "English,Chinese,Filipino,Malay": 0.1534090909090909, + "English,Chinese,Spanish,Malay": 0.3465909090909091, + "English,Indonesian,Filipino,Spanish": 0.21022727272727273, + "English,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Indonesian,Spanish,Malay": 0.4147727272727273, + "English,Filipino,Spanish,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.125, + "Vietnamese,Chinese,Spanish,Malay": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.35795454545454547, + "Vietnamese,Filipino,Spanish,Malay": 0.1590909090909091, + "Chinese,Indonesian,Filipino,Spanish": 0.125, + "Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "Chinese,Indonesian,Spanish,Malay": 0.3125, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish,Malay": 0.26704545454545453, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.16477272727272727, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.14772727272727273, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino,Spanish": 0.125, + "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish,Malay": 0.2840909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.125, + "English,Indonesian,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2215909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.125, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818 + } + }, + "AC3_2": 0.44586429608271816, + "AC3_3": 0.36298889176173815, + "AC3_4": 0.29347276095827285, + "AC3_5": 0.23079559732282023, + "AC3_6": 0.17203575641693372, + "AC3_7": 0.11605415858199924 + }, + "prompt_4": { + "overall_acc": 0.39691558441558433, + "language_acc": { + "English": 0.4375, + "Vietnamese": 0.4318181818181818, + "Chinese": 0.4431818181818182, + "Indonesian": 0.38636363636363635, + "Filipino": 0.29545454545454547, + "Spanish": 0.3977272727272727, + "Malay": 0.38636363636363635 + }, + "consistency_score_2": 0.5116341991341992, + "consistency_score_3": 0.33051948051948055, + "consistency_score_4": 0.22905844155844157, + "consistency_score_5": 0.16152597402597402, + "consistency_score_6": 0.11201298701298701, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.5511363636363636, + "English,Indonesian": 0.5681818181818182, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.6875, + "English,Malay": 0.5511363636363636, + "Vietnamese,Chinese": 0.48863636363636365, + "Vietnamese,Indonesian": 0.5738636363636364, + "Vietnamese,Filipino": 0.32386363636363635, + "Vietnamese,Spanish": 0.5852272727272727, + "Vietnamese,Malay": 0.6022727272727273, + "Chinese,Indonesian": 0.5340909090909091, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,Spanish": 0.5681818181818182, + "Chinese,Malay": 0.5340909090909091, + "Indonesian,Filipino": 0.35795454545454547, + "Indonesian,Spanish": 0.5965909090909091, + "Indonesian,Malay": 0.6534090909090909, + "Filipino,Spanish": 0.36363636363636365, + "Filipino,Malay": 0.375, + "Spanish,Malay": 0.5568181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.36363636363636365, + "English,Vietnamese,Indonesian": 0.4147727272727273, + "English,Vietnamese,Filipino": 0.23295454545454544, + "English,Vietnamese,Spanish": 0.45454545454545453, + "English,Vietnamese,Malay": 0.42045454545454547, + "English,Chinese,Indonesian": 0.3806818181818182, + "English,Chinese,Filipino": 0.22727272727272727, + "English,Chinese,Spanish": 0.44886363636363635, + "English,Chinese,Malay": 0.375, + "English,Indonesian,Filipino": 0.22727272727272727, + "English,Indonesian,Spanish": 0.4602272727272727, + "English,Indonesian,Malay": 0.42045454545454547, + "English,Filipino,Spanish": 0.2784090909090909, + "English,Filipino,Malay": 0.23295454545454544, + "English,Spanish,Malay": 0.42613636363636365, + "Vietnamese,Chinese,Indonesian": 0.3409090909090909, + "Vietnamese,Chinese,Filipino": 0.17613636363636365, + "Vietnamese,Chinese,Spanish": 0.375, + "Vietnamese,Chinese,Malay": 0.36363636363636365, + "Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish": 0.4090909090909091, + "Vietnamese,Indonesian,Malay": 0.4431818181818182, + "Vietnamese,Filipino,Spanish": 0.25, + "Vietnamese,Filipino,Malay": 0.2215909090909091, + "Vietnamese,Spanish,Malay": 0.4147727272727273, + "Chinese,Indonesian,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Spanish": 0.39204545454545453, + "Chinese,Indonesian,Malay": 0.3977272727272727, + "Chinese,Filipino,Spanish": 0.22727272727272727, + "Chinese,Filipino,Malay": 0.2159090909090909, + "Chinese,Spanish,Malay": 0.39204545454545453, + "Indonesian,Filipino,Spanish": 0.23295454545454544, + "Indonesian,Filipino,Malay": 0.26136363636363635, + "Indonesian,Spanish,Malay": 0.4431818181818182, + "Filipino,Spanish,Malay": 0.23295454545454544 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Chinese,Filipino": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish": 0.3125, + "English,Vietnamese,Chinese,Malay": 0.2784090909090909, + "English,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.3409090909090909, + "English,Vietnamese,Indonesian,Malay": 0.32386363636363635, + "English,Vietnamese,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Filipino,Malay": 0.1590909090909091, + "English,Vietnamese,Spanish,Malay": 0.3409090909090909, + "English,Chinese,Indonesian,Filipino": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish": 0.3352272727272727, + "English,Chinese,Indonesian,Malay": 0.29545454545454547, + "English,Chinese,Filipino,Spanish": 0.19318181818181818, + "English,Chinese,Filipino,Malay": 0.16477272727272727, + "English,Chinese,Spanish,Malay": 0.32386363636363635, + "English,Indonesian,Filipino,Spanish": 0.19318181818181818, + "English,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Indonesian,Spanish,Malay": 0.3465909090909091, + "English,Filipino,Spanish,Malay": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Filipino,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.3409090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.3181818181818182, + "Chinese,Filipino,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.1875 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Chinese,Spanish,Malay": 0.25, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.11931818181818182, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.2784090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.14772727272727273, + "English,Chinese,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Filipino,Malay": 0.13068181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.26704545454545453, + "English,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.13068181818181818 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.09659090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + } + }, + "AC3_2": 0.4470323823986054, + "AC3_3": 0.3606874044896398, + "AC3_4": 0.2904812704984032, + "AC3_5": 0.22961105024580597, + "AC3_6": 0.1747188218136777, + "AC3_7": 0.12454937301429635 + }, + "prompt_5": { + "overall_acc": 0.3887987012987013, + "language_acc": { + "English": 0.45454545454545453, + "Vietnamese": 0.4147727272727273, + "Chinese": 0.4318181818181818, + "Indonesian": 0.3693181818181818, + "Filipino": 0.29545454545454547, + "Spanish": 0.3806818181818182, + "Malay": 0.375 + }, + "consistency_score_2": 0.5449134199134199, + "consistency_score_3": 0.37370129870129876, + "consistency_score_4": 0.27435064935064934, + "consistency_score_5": 0.20589826839826836, + "consistency_score_6": 0.15422077922077923, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6647727272727273, + "English,Chinese": 0.5965909090909091, + "English,Indonesian": 0.6590909090909091, + "English,Filipino": 0.39204545454545453, + "English,Spanish": 0.75, + "English,Malay": 0.625, + "Vietnamese,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian": 0.6136363636363636, + "Vietnamese,Filipino": 0.3522727272727273, + "Vietnamese,Spanish": 0.625, + "Vietnamese,Malay": 0.6022727272727273, + "Chinese,Indonesian": 0.5227272727272727, + "Chinese,Filipino": 0.3068181818181818, + "Chinese,Spanish": 0.5681818181818182, + "Chinese,Malay": 0.5227272727272727, + "Indonesian,Filipino": 0.3465909090909091, + "Indonesian,Spanish": 0.7045454545454546, + "Indonesian,Malay": 0.6306818181818182, + "Filipino,Spanish": 0.4034090909090909, + "Filipino,Malay": 0.4090909090909091, + "Spanish,Malay": 0.6079545454545454 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.4431818181818182, + "English,Vietnamese,Indonesian": 0.5056818181818182, + "English,Vietnamese,Filipino": 0.26704545454545453, + "English,Vietnamese,Spanish": 0.5397727272727273, + "English,Vietnamese,Malay": 0.4772727272727273, + "English,Chinese,Indonesian": 0.4375, + "English,Chinese,Filipino": 0.25, + "English,Chinese,Spanish": 0.48863636363636365, + "English,Chinese,Malay": 0.4147727272727273, + "English,Indonesian,Filipino": 0.2727272727272727, + "English,Indonesian,Spanish": 0.5738636363636364, + "English,Indonesian,Malay": 0.48863636363636365, + "English,Filipino,Spanish": 0.32954545454545453, + "English,Filipino,Malay": 0.2897727272727273, + "English,Spanish,Malay": 0.5056818181818182, + "Vietnamese,Chinese,Indonesian": 0.3806818181818182, + "Vietnamese,Chinese,Filipino": 0.19886363636363635, + "Vietnamese,Chinese,Spanish": 0.4090909090909091, + "Vietnamese,Chinese,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish": 0.4943181818181818, + "Vietnamese,Indonesian,Malay": 0.4715909090909091, + "Vietnamese,Filipino,Spanish": 0.2897727272727273, + "Vietnamese,Filipino,Malay": 0.2556818181818182, + "Vietnamese,Spanish,Malay": 0.4602272727272727, + "Chinese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Indonesian,Spanish": 0.4318181818181818, + "Chinese,Indonesian,Malay": 0.3806818181818182, + "Chinese,Filipino,Spanish": 0.2556818181818182, + "Chinese,Filipino,Malay": 0.21022727272727273, + "Chinese,Spanish,Malay": 0.4034090909090909, + "Indonesian,Filipino,Spanish": 0.2840909090909091, + "Indonesian,Filipino,Malay": 0.26704545454545453, + "Indonesian,Spanish,Malay": 0.5, + "Filipino,Spanish,Malay": 0.2784090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.3409090909090909, + "English,Vietnamese,Chinese,Filipino": 0.17613636363636365, + "English,Vietnamese,Chinese,Spanish": 0.375, + "English,Vietnamese,Chinese,Malay": 0.32954545454545453, + "English,Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "English,Vietnamese,Indonesian,Spanish": 0.45454545454545453, + "English,Vietnamese,Indonesian,Malay": 0.4034090909090909, + "English,Vietnamese,Filipino,Spanish": 0.25, + "English,Vietnamese,Filipino,Malay": 0.20454545454545456, + "English,Vietnamese,Spanish,Malay": 0.4147727272727273, + "English,Chinese,Indonesian,Filipino": 0.1875, + "English,Chinese,Indonesian,Spanish": 0.4034090909090909, + "English,Chinese,Indonesian,Malay": 0.3465909090909091, + "English,Chinese,Filipino,Spanish": 0.22727272727272727, + "English,Chinese,Filipino,Malay": 0.1875, + "English,Chinese,Spanish,Malay": 0.35795454545454547, + "English,Indonesian,Filipino,Spanish": 0.2556818181818182, + "English,Indonesian,Filipino,Malay": 0.22727272727272727, + "English,Indonesian,Spanish,Malay": 0.4431818181818182, + "English,Filipino,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian,Malay": 0.3125, + "Vietnamese,Chinese,Filipino,Spanish": 0.1875, + "Vietnamese,Chinese,Filipino,Malay": 0.14772727272727273, + "Vietnamese,Chinese,Spanish,Malay": 0.32386363636363635, + "Vietnamese,Indonesian,Filipino,Spanish": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.39204545454545453, + "Vietnamese,Filipino,Spanish,Malay": 0.22727272727272727, + "Chinese,Indonesian,Filipino,Spanish": 0.1875, + "Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.3352272727272727, + "Chinese,Filipino,Spanish,Malay": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay": 0.23295454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Filipino,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Spanish,Malay": 0.29545454545454547, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.19886363636363635, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.3693181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Filipino,Spanish": 0.18181818181818182, + "English,Chinese,Indonesian,Filipino,Malay": 0.1590909090909091, + "English,Chinese,Indonesian,Spanish,Malay": 0.32386363636363635, + "English,Chinese,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Indonesian,Filipino,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.26704545454545453, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.17045454545454544, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363 + } + }, + "AC3_2": 0.4538050329363786, + "AC3_3": 0.38110053662858484, + "AC3_4": 0.3216988029844416, + "AC3_5": 0.2692227585428201, + "AC3_6": 0.2208423116088172, + "AC3_7": 0.17587017179639605 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5048543689320388 + }, + "prompt_2": { + "accuracy": 0.49514563106796117 + }, + "prompt_3": { + "accuracy": 0.5242718446601942 + }, + "prompt_4": { + "accuracy": 0.49514563106796117 + }, + "prompt_5": { + "accuracy": 0.46601941747572817 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5333333333333333 + }, + "prompt_2": { + "accuracy": 0.5619047619047619 + }, + "prompt_3": { + "accuracy": 0.49523809523809526 + }, + "prompt_4": { + "accuracy": 0.4666666666666667 + }, + "prompt_5": { + "accuracy": 0.44761904761904764 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.411214953271028 + }, + "prompt_2": { + "accuracy": 0.3925233644859813 + }, + "prompt_3": { + "accuracy": 0.411214953271028 + }, + "prompt_4": { + "accuracy": 0.4392523364485981 + }, + "prompt_5": { + "accuracy": 0.4485981308411215 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.6, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.5, + "culture": 0.1, + "film": 0.2, + "law": 0.4, + "geography": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.5, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.4, + "culture": 0.1, + "film": 0.2, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.3, + "history": 0.4, + "literature": 0.3, + "politics": 0.4, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.2, + "demographics": 0.2, + "biology": 0.4, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.4, + "culture": 0.1, + "film": 0.2, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.4, + "history": 0.4666666666666667, + "literature": 0.4, + "politics": 0.4, + "culture": 0.1, + "film": 0.1, + "law": 0.3, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.14618315842245094 + }, + "prompt_2": { + "bleu_score": 0.1449608039549639 + }, + "prompt_3": { + "bleu_score": 0.17199703149273404 + }, + "prompt_4": { + "bleu_score": 0.1706466117685109 + }, + "prompt_5": { + "bleu_score": 0.16115075683396307 + } }, "indommlu": { - "prompt_1": -1, + "prompt_1": { + "accuracy": 0.43574337405701313, + "category_acc": { + "History": 0.3714859437751004, + "Geography": 0.3836734693877551, + "Lampungic": 0.272108843537415, + "Social science": 0.6477462437395659, + "Balinese": 0.2929936305732484, + "Makassarese": 0.27956989247311825, + "Banjarese": 0.3333333333333333, + "Chemistry": 0.2934306569343066, + "Biology": 0.39289940828402364, + "Science": 0.5335397316821465, + "Christian religion": 0.5771144278606966, + "Art": 0.5158069883527454, + "Islam religion": 0.4907539118065434, + "Hindu religion": 0.3933333333333333, + "Madurese": 0.2745762711864407, + "Sport": 0.4864864864864865, + "Indonesian language": 0.5146326276463262, + "Physics": 0.3474747474747475, + "Minangkabau culture": 0.38190954773869346, + "Dayak language": 0.25688073394495414, + "Sociology": 0.4576612903225806, + "Economy": 0.3831967213114754, + "Sundanese": 0.3439930855661193, + "Javanese": 0.3387096774193548, + "Civic education": 0.5407725321888412 + } + }, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.575615422859229 + }, + "prompt_2": { + "bleu_score": 0.6228866335793971 + }, + "prompt_3": { + "bleu_score": 0.5922155762723892 + }, + "prompt_4": { + "bleu_score": 0.5737102434209379 + }, + "prompt_5": { + "bleu_score": 0.571437439183794 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.5389269550567992 + }, + "prompt_2": { + "bleu_score": 0.5816244659711519 + }, + "prompt_3": { + "bleu_score": 0.5422631034724344 + }, + "prompt_4": { + "bleu_score": 0.5301540187518774 + }, + "prompt_5": { + "bleu_score": 0.5493798635473321 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.4445508601711929 + }, + "prompt_2": { + "bleu_score": 0.49792645991224727 + }, + "prompt_3": { + "bleu_score": 0.4566313739381107 + }, + "prompt_4": { + "bleu_score": 0.4472236614664973 + }, + "prompt_5": { + "bleu_score": 0.4464139377348458 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.5232589594049941 + }, + "prompt_2": { + "bleu_score": 0.574577083676138 + }, + "prompt_3": { + "bleu_score": 0.5340262091477525 + }, + "prompt_4": { + "bleu_score": 0.5023095444128485 + }, + "prompt_5": { + "bleu_score": 0.5241891791372568 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4457409568261377 + }, + "prompt_2": { + "accuracy": 0.45274212368728123 + }, + "prompt_3": { + "accuracy": 0.4515752625437573 + }, + "prompt_4": { + "accuracy": 0.4364060676779463 + }, + "prompt_5": { + "accuracy": 0.4469078179696616 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4390418305327136, + "category_acc": { + "high_school_european_history": 0.5853658536585366, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.5123152709359606, + "high_school_physics": 0.36, + "high_school_world_history": 0.673728813559322, + "virology": 0.3393939393939394, + "high_school_microeconomics": 0.45569620253164556, + "econometrics": 0.336283185840708, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.5080906148867314, + "abstract_algebra": 0.35353535353535354, + "professional_accounting": 0.3202846975088968, + "philosophy": 0.44516129032258067, + "professional_medicine": 0.45387453874538747, + "nutrition": 0.45901639344262296, + "global_facts": 0.29292929292929293, + "machine_learning": 0.2882882882882883, + "security_studies": 0.45081967213114754, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.4533551554828151, + "prehistory": 0.43653250773993807, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.3769230769230769, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.5520833333333334, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.4567901234567901, + "high_school_geography": 0.5736040609137056, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.536036036036036, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.6415441176470589, + "formal_logic": 0.32, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.5666666666666667, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.5252525252525253, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.5652173913043478, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.6866952789699571, + "professional_law": 0.32093933463796476, + "management": 0.6176470588235294, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.4766355140186916, + "world_religions": 0.5529411764705883, + "sociology": 0.56, + "us_foreign_policy": 0.5050505050505051, + "high_school_macroeconomics": 0.4370179948586118, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.2785234899328859, + "moral_disputes": 0.42318840579710143, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.46357615894039733, + "college_biology": 0.48951048951048953 + } + }, + "prompt_2": { + "accuracy": 0.44097247050411154, + "category_acc": { + "high_school_european_history": 0.5975609756097561, + "business_ethics": 0.47474747474747475, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.5123152709359606, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.6822033898305084, + "virology": 0.34545454545454546, + "high_school_microeconomics": 0.45569620253164556, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.5048543689320388, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.3167259786476868, + "philosophy": 0.45806451612903226, + "professional_medicine": 0.44649446494464945, + "nutrition": 0.46557377049180326, + "global_facts": 0.2727272727272727, + "machine_learning": 0.3153153153153153, + "security_studies": 0.47540983606557374, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.4402618657937807, + "prehistory": 0.44272445820433437, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.36153846153846153, + "college_medicine": 0.4186046511627907, + "high_school_government_and_politics": 0.5625, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.5634517766497462, + "elementary_mathematics": 0.3103448275862069, + "human_aging": 0.527027027027027, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.6452205882352942, + "formal_logic": 0.312, + "high_school_statistics": 0.39069767441860465, + "international_law": 0.5416666666666666, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.5151515151515151, + "conceptual_physics": 0.4017094017094017, + "miscellaneous": 0.5677749360613811, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.6909871244635193, + "professional_law": 0.32746249184605347, + "management": 0.6274509803921569, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.5117647058823529, + "sociology": 0.585, + "us_foreign_policy": 0.5151515151515151, + "high_school_macroeconomics": 0.4318766066838046, + "computer_security": 0.5353535353535354, + "moral_scenarios": 0.2897091722595078, + "moral_disputes": 0.4289855072463768, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.46357615894039733, + "college_biology": 0.48951048951048953 + } + }, + "prompt_3": { + "accuracy": 0.4366821594565606, + "category_acc": { + "high_school_european_history": 0.573170731707317, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.5024630541871922, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.673728813559322, + "virology": 0.3393939393939394, + "high_school_microeconomics": 0.4430379746835443, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.49514563106796117, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.4483870967741935, + "professional_medicine": 0.46494464944649444, + "nutrition": 0.46557377049180326, + "global_facts": 0.2727272727272727, + "machine_learning": 0.35135135135135137, + "security_studies": 0.45491803278688525, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.4369885433715221, + "prehistory": 0.43343653250773995, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.36153846153846153, + "college_medicine": 0.42441860465116277, + "high_school_government_and_politics": 0.5677083333333334, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.5685279187817259, + "elementary_mathematics": 0.2917771883289125, + "human_aging": 0.5180180180180181, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6415441176470589, + "formal_logic": 0.328, + "high_school_statistics": 0.37209302325581395, + "international_law": 0.55, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.5601023017902813, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.6866952789699571, + "professional_law": 0.3294194390084801, + "management": 0.6078431372549019, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.4953271028037383, + "world_religions": 0.5, + "sociology": 0.575, + "us_foreign_policy": 0.494949494949495, + "high_school_macroeconomics": 0.4370179948586118, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.2785234899328859, + "moral_disputes": 0.43768115942028984, + "electrical_engineering": 0.3958333333333333, + "astronomy": 0.4370860927152318, + "college_biology": 0.4755244755244755 + } + }, + "prompt_4": { + "accuracy": 0.4260278870218091, + "category_acc": { + "high_school_european_history": 0.6036585365853658, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5353535353535354, + "high_school_us_history": 0.47783251231527096, + "high_school_physics": 0.36, + "high_school_world_history": 0.6483050847457628, + "virology": 0.3393939393939394, + "high_school_microeconomics": 0.43037974683544306, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.4919093851132686, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.43870967741935485, + "professional_medicine": 0.43911439114391143, + "nutrition": 0.4491803278688525, + "global_facts": 0.2828282828282828, + "machine_learning": 0.3153153153153153, + "security_studies": 0.4180327868852459, + "public_relations": 0.5321100917431193, + "professional_psychology": 0.42225859247135844, + "prehistory": 0.44272445820433437, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.53125, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.5736040609137056, + "elementary_mathematics": 0.27320954907161804, + "human_aging": 0.509009009009009, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6213235294117647, + "formal_logic": 0.312, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.525, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.5485933503836317, + "high_school_chemistry": 0.3316831683168317, + "marketing": 0.6266094420600858, + "professional_law": 0.32093933463796476, + "management": 0.6078431372549019, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.4392523364485981, + "world_religions": 0.5294117647058824, + "sociology": 0.56, + "us_foreign_policy": 0.48484848484848486, + "high_school_macroeconomics": 0.4241645244215938, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.26286353467561524, + "moral_disputes": 0.41739130434782606, + "electrical_engineering": 0.3611111111111111, + "astronomy": 0.48344370860927155, + "college_biology": 0.45454545454545453 + } + }, + "prompt_5": { + "accuracy": 0.43253485877726133, + "category_acc": { + "high_school_european_history": 0.573170731707317, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.5037878787878788, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.49261083743842365, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.6567796610169492, + "virology": 0.3333333333333333, + "high_school_microeconomics": 0.43037974683544306, + "econometrics": 0.34513274336283184, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.49514563106796117, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.3167259786476868, + "philosophy": 0.4612903225806452, + "professional_medicine": 0.45018450184501846, + "nutrition": 0.4557377049180328, + "global_facts": 0.2828282828282828, + "machine_learning": 0.3153153153153153, + "security_studies": 0.45491803278688525, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.425531914893617, + "prehistory": 0.44272445820433437, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.36923076923076925, + "college_medicine": 0.4186046511627907, + "high_school_government_and_politics": 0.5625, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.47530864197530864, + "high_school_geography": 0.5482233502538071, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.509009009009009, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.6360294117647058, + "formal_logic": 0.304, + "high_school_statistics": 0.3302325581395349, + "international_law": 0.55, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.5252525252525253, + "conceptual_physics": 0.41452991452991456, + "miscellaneous": 0.5677749360613811, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.6566523605150214, + "professional_law": 0.3287671232876712, + "management": 0.5882352941176471, + "college_physics": 0.297029702970297, + "jurisprudence": 0.45794392523364486, + "world_religions": 0.4588235294117647, + "sociology": 0.595, + "us_foreign_policy": 0.47474747474747475, + "high_school_macroeconomics": 0.442159383033419, + "computer_security": 0.5454545454545454, + "moral_scenarios": 0.28859060402684567, + "moral_disputes": 0.42028985507246375, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.46357615894039733, + "college_biology": 0.4825174825174825 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42570579494799404 + }, + "prompt_2": { + "accuracy": 0.4182763744427935 + }, + "prompt_3": { + "accuracy": 0.4086181277860327 + }, + "prompt_4": { + "accuracy": 0.4160475482912333 + }, + "prompt_5": { + "accuracy": 0.4197622585438336 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40597758405977585, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.40476190476190477, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.5238095238095238, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.35, + "business_administration": 0.3684210526315789, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.375, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.2962962962962963, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.5789473684210527, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.125, + "high_school_history": 0.52, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.3269230769230769, + "sports_science": 0.4166666666666667, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.4074074074074074, + "physician": 0.4444444444444444 + } + }, + "prompt_2": { + "accuracy": 0.39975093399750933, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.15384615384615385, + "college_programming": 0.40476190476190477, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.36666666666666664, + "business_administration": 0.3684210526315789, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.48, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.36538461538461536, + "sports_science": 0.4166666666666667, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.5, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.3888888888888889, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.4074074074074074, + "physician": 0.4444444444444444 + } + }, + "prompt_3": { + "accuracy": 0.4078455790784558, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.15384615384615385, + "college_programming": 0.38095238095238093, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.38333333333333336, + "business_administration": 0.42105263157894735, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.375, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.37037037037037035, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.5789473684210527, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4583333333333333, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.37037037037037035, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.4074074074074074, + "physician": 0.4444444444444444 + } + }, + "prompt_4": { + "accuracy": 0.39975093399750933, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.35714285714285715, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.38333333333333336, + "business_administration": 0.39473684210526316, + "marxism": 0.5, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.6470588235294118, + "modern_chinese_history": 0.5, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.37037037037037035, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.38461538461538464, + "sports_science": 0.5833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.3333333333333333, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.35185185185185186, + "physician": 0.4444444444444444 + } + }, + "prompt_5": { + "accuracy": 0.3991282689912827, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.375, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.35714285714285715, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.36666666666666664, + "business_administration": 0.34210526315789475, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.5517241379310345, + "education_science": 0.6470588235294118, + "teacher_qualification": 0.5714285714285714, + "high_school_politics": 0.5, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.18518518518518517, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.52, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.36538461538461536, + "sports_science": 0.5, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.35185185185185186, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.3888888888888889, + "physician": 0.42592592592592593 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4050179211469534 + }, + "prompt_2": { + "accuracy": 0.43727598566308246 + }, + "prompt_3": { + "accuracy": 0.4157706093189964 + }, + "prompt_4": { + "accuracy": 0.4014336917562724 + }, + "prompt_5": { + "accuracy": 0.41935483870967744 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42376100846140563, + "category_acc": { + "agronomy": 0.4437869822485207, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2926829268292683, + "arts": 0.575, + "astronomy": 0.36363636363636365, + "business_ethics": 0.4784688995215311, + "chinese_civil_service_exam": 0.35, + "chinese_driving_rule": 0.5954198473282443, + "chinese_food_culture": 0.39705882352941174, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.5232198142414861, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.5586592178770949, + "clinical_knowledge": 0.3459915611814346, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.5420560747663551, + "college_engineering_hydrology": 0.3490566037735849, + "college_law": 0.3148148148148148, + "college_mathematics": 0.2, + "college_medical_statistics": 0.36792452830188677, + "college_medicine": 0.38461538461538464, + "computer_science": 0.44607843137254904, + "computer_security": 0.5029239766081871, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.35251798561151076, + "economics": 0.4276729559748428, + "education": 0.5460122699386503, + "electrical_engineering": 0.38953488372093026, + "elementary_chinese": 0.373015873015873, + "elementary_commonsense": 0.41919191919191917, + "elementary_information_and_technology": 0.6092436974789915, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.4222222222222222, + "food_science": 0.4195804195804196, + "genetics": 0.3522727272727273, + "global_facts": 0.48322147651006714, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.423728813559322, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.36363636363636365, + "high_school_politics": 0.3916083916083916, + "human_sexuality": 0.46825396825396826, + "international_law": 0.3675675675675676, + "journalism": 0.5290697674418605, + "jurisprudence": 0.4257907542579075, + "legal_and_moral_basis": 0.794392523364486, + "logical": 0.3902439024390244, + "machine_learning": 0.29508196721311475, + "management": 0.45714285714285713, + "marketing": 0.4888888888888889, + "marxist_theory": 0.5132275132275133, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.4206896551724138, + "philosophy": 0.4380952380952381, + "professional_accounting": 0.48, + "professional_law": 0.3459715639810427, + "professional_medicine": 0.3058510638297872, + "professional_psychology": 0.4870689655172414, + "public_relations": 0.5229885057471264, + "security_study": 0.5037037037037037, + "sociology": 0.4778761061946903, + "sports_science": 0.44242424242424244, + "traditional_chinese_medicine": 0.372972972972973, + "virology": 0.4437869822485207, + "world_history": 0.4658385093167702, + "world_religions": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.42384734933517526, + "category_acc": { + "agronomy": 0.4378698224852071, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.3170731707317073, + "arts": 0.58125, + "astronomy": 0.3696969696969697, + "business_ethics": 0.45933014354066987, + "chinese_civil_service_exam": 0.35625, + "chinese_driving_rule": 0.5877862595419847, + "chinese_food_culture": 0.40441176470588236, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.5170278637770898, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.5418994413407822, + "clinical_knowledge": 0.3333333333333333, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.5233644859813084, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.3148148148148148, + "college_mathematics": 0.18095238095238095, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.39194139194139194, + "computer_science": 0.4264705882352941, + "computer_security": 0.5321637426900585, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.4028776978417266, + "economics": 0.44654088050314467, + "education": 0.5214723926380368, + "electrical_engineering": 0.3953488372093023, + "elementary_chinese": 0.3968253968253968, + "elementary_commonsense": 0.40404040404040403, + "elementary_information_and_technology": 0.5840336134453782, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.45925925925925926, + "food_science": 0.4405594405594406, + "genetics": 0.3409090909090909, + "global_facts": 0.42953020134228187, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.4322033898305085, + "high_school_mathematics": 0.25, + "high_school_physics": 0.37272727272727274, + "high_school_politics": 0.42657342657342656, + "human_sexuality": 0.47619047619047616, + "international_law": 0.3675675675675676, + "journalism": 0.5, + "jurisprudence": 0.41849148418491483, + "legal_and_moral_basis": 0.8084112149532711, + "logical": 0.4065040650406504, + "machine_learning": 0.28688524590163933, + "management": 0.4666666666666667, + "marketing": 0.4722222222222222, + "marxist_theory": 0.5026455026455027, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.4206896551724138, + "philosophy": 0.4857142857142857, + "professional_accounting": 0.4742857142857143, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.31382978723404253, + "professional_psychology": 0.4870689655172414, + "public_relations": 0.4885057471264368, + "security_study": 0.5259259259259259, + "sociology": 0.4823008849557522, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.3783783783783784, + "virology": 0.4437869822485207, + "world_history": 0.4720496894409938, + "world_religions": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.4215161457433949, + "category_acc": { + "agronomy": 0.4260355029585799, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.29878048780487804, + "arts": 0.59375, + "astronomy": 0.34545454545454546, + "business_ethics": 0.45933014354066987, + "chinese_civil_service_exam": 0.36875, + "chinese_driving_rule": 0.5954198473282443, + "chinese_food_culture": 0.38235294117647056, + "chinese_foreign_policy": 0.48598130841121495, + "chinese_history": 0.5046439628482973, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.5642458100558659, + "clinical_knowledge": 0.33755274261603374, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.5794392523364486, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.35185185185185186, + "college_mathematics": 0.19047619047619047, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.39194139194139194, + "computer_science": 0.45588235294117646, + "computer_security": 0.49707602339181284, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.39568345323741005, + "economics": 0.4339622641509434, + "education": 0.5214723926380368, + "electrical_engineering": 0.3953488372093023, + "elementary_chinese": 0.36507936507936506, + "elementary_commonsense": 0.41414141414141414, + "elementary_information_and_technology": 0.5882352941176471, + "elementary_mathematics": 0.3, + "ethnology": 0.4074074074074074, + "food_science": 0.4195804195804196, + "genetics": 0.35795454545454547, + "global_facts": 0.4697986577181208, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.25, + "high_school_geography": 0.4322033898305085, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.36363636363636365, + "high_school_politics": 0.40559440559440557, + "human_sexuality": 0.4444444444444444, + "international_law": 0.372972972972973, + "journalism": 0.5116279069767442, + "jurisprudence": 0.41119221411192214, + "legal_and_moral_basis": 0.7757009345794392, + "logical": 0.43089430894308944, + "machine_learning": 0.29508196721311475, + "management": 0.47619047619047616, + "marketing": 0.48333333333333334, + "marxist_theory": 0.48148148148148145, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.45517241379310347, + "philosophy": 0.4380952380952381, + "professional_accounting": 0.4685714285714286, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.30851063829787234, + "professional_psychology": 0.4827586206896552, + "public_relations": 0.5057471264367817, + "security_study": 0.5259259259259259, + "sociology": 0.49557522123893805, + "sports_science": 0.41818181818181815, + "traditional_chinese_medicine": 0.3945945945945946, + "virology": 0.4556213017751479, + "world_history": 0.4968944099378882, + "world_religions": 0.49375 + } + }, + "prompt_4": { + "accuracy": 0.41486789846313243, + "category_acc": { + "agronomy": 0.4437869822485207, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.2865853658536585, + "arts": 0.5625, + "astronomy": 0.3393939393939394, + "business_ethics": 0.4784688995215311, + "chinese_civil_service_exam": 0.35625, + "chinese_driving_rule": 0.5954198473282443, + "chinese_food_culture": 0.40441176470588236, + "chinese_foreign_policy": 0.48598130841121495, + "chinese_history": 0.4953560371517028, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.5418994413407822, + "clinical_knowledge": 0.32489451476793246, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.5700934579439252, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.3611111111111111, + "college_mathematics": 0.19047619047619047, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.38095238095238093, + "computer_science": 0.43137254901960786, + "computer_security": 0.49122807017543857, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.3381294964028777, + "economics": 0.4528301886792453, + "education": 0.48466257668711654, + "electrical_engineering": 0.37209302325581395, + "elementary_chinese": 0.373015873015873, + "elementary_commonsense": 0.41414141414141414, + "elementary_information_and_technology": 0.6218487394957983, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.4148148148148148, + "food_science": 0.44755244755244755, + "genetics": 0.3693181818181818, + "global_facts": 0.4563758389261745, + "high_school_biology": 0.20710059171597633, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.4406779661016949, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.36363636363636365, + "high_school_politics": 0.35664335664335667, + "human_sexuality": 0.42857142857142855, + "international_law": 0.3567567567567568, + "journalism": 0.4883720930232558, + "jurisprudence": 0.41119221411192214, + "legal_and_moral_basis": 0.7710280373831776, + "logical": 0.3821138211382114, + "machine_learning": 0.26229508196721313, + "management": 0.46190476190476193, + "marketing": 0.49444444444444446, + "marxist_theory": 0.5185185185185185, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.43448275862068964, + "philosophy": 0.44761904761904764, + "professional_accounting": 0.4857142857142857, + "professional_law": 0.32701421800947866, + "professional_medicine": 0.2978723404255319, + "professional_psychology": 0.46120689655172414, + "public_relations": 0.5057471264367817, + "security_study": 0.5111111111111111, + "sociology": 0.4646017699115044, + "sports_science": 0.43636363636363634, + "traditional_chinese_medicine": 0.372972972972973, + "virology": 0.4378698224852071, + "world_history": 0.484472049689441, + "world_religions": 0.4875 + } + }, + "prompt_5": { + "accuracy": 0.4240200310827146, + "category_acc": { + "agronomy": 0.4437869822485207, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2926829268292683, + "arts": 0.5875, + "astronomy": 0.38181818181818183, + "business_ethics": 0.4880382775119617, + "chinese_civil_service_exam": 0.375, + "chinese_driving_rule": 0.6030534351145038, + "chinese_food_culture": 0.4264705882352941, + "chinese_foreign_policy": 0.4672897196261682, + "chinese_history": 0.5170278637770898, + "chinese_literature": 0.35294117647058826, + "chinese_teacher_qualification": 0.553072625698324, + "clinical_knowledge": 0.3291139240506329, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.5607476635514018, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.37362637362637363, + "computer_science": 0.4362745098039216, + "computer_security": 0.49707602339181284, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.381294964028777, + "economics": 0.4088050314465409, + "education": 0.5214723926380368, + "electrical_engineering": 0.43023255813953487, + "elementary_chinese": 0.36904761904761907, + "elementary_commonsense": 0.4090909090909091, + "elementary_information_and_technology": 0.6008403361344538, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.4222222222222222, + "food_science": 0.42657342657342656, + "genetics": 0.3693181818181818, + "global_facts": 0.4563758389261745, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.423728813559322, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.46825396825396826, + "international_law": 0.372972972972973, + "journalism": 0.47674418604651164, + "jurisprudence": 0.4257907542579075, + "legal_and_moral_basis": 0.7990654205607477, + "logical": 0.3902439024390244, + "machine_learning": 0.27049180327868855, + "management": 0.4666666666666667, + "marketing": 0.4777777777777778, + "marxist_theory": 0.5026455026455027, + "modern_chinese": 0.3706896551724138, + "nutrition": 0.42758620689655175, + "philosophy": 0.49523809523809526, + "professional_accounting": 0.49714285714285716, + "professional_law": 0.3886255924170616, + "professional_medicine": 0.30319148936170215, + "professional_psychology": 0.47413793103448276, + "public_relations": 0.5114942528735632, + "security_study": 0.5185185185185185, + "sociology": 0.4690265486725664, + "sports_science": 0.43636363636363634, + "traditional_chinese_medicine": 0.372972972972973, + "virology": 0.46745562130177515, + "world_history": 0.4906832298136646, + "world_religions": 0.5 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.30303030303030304 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.30303030303030304 + }, + "prompt_5": { + "accuracy": 0.30303030303030304 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49772727272727274 + }, + "prompt_2": { + "accuracy": 0.425 + }, + "prompt_3": { + "accuracy": 0.4340909090909091 + }, + "prompt_4": { + "accuracy": 0.5386363636363637 + }, + "prompt_5": { + "accuracy": 0.5318181818181819 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36 + }, + "prompt_2": { + "accuracy": 0.33796610169491526 + }, + "prompt_3": { + "accuracy": 0.33322033898305087 + }, + "prompt_4": { + "accuracy": 0.36 + }, + "prompt_5": { + "accuracy": 0.39016949152542374 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8462976813762154 + }, + "prompt_2": { + "accuracy": 0.8474195961106956 + }, + "prompt_3": { + "accuracy": 0.8451757666417352 + }, + "prompt_4": { + "accuracy": 0.850037397157816 + }, + "prompt_5": { + "accuracy": 0.8477935676888556 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8309652131308183 + }, + "prompt_2": { + "accuracy": 0.821656050955414 + }, + "prompt_3": { + "accuracy": 0.8275355218030377 + }, + "prompt_4": { + "accuracy": 0.8187163155316022 + }, + "prompt_5": { + "accuracy": 0.8270455658990691 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.4822572463382286, + "rouge2": 0.24179752223275194, + "rougeL": 0.40685265935505044, + "avg_rouge": 0.3769691426420103 + }, + "prompt_2": { + "rouge1": 0.4846505036032038, + "rouge2": 0.24004678672438085, + "rougeL": 0.40652330967105027, + "avg_rouge": 0.3770735333328783 + }, + "prompt_3": { + "rouge1": 0.46741426339092534, + "rouge2": 0.23548723662909044, + "rougeL": 0.39586316988696874, + "avg_rouge": 0.36625488996899486 + }, + "prompt_4": { + "rouge1": 0.4745661391560416, + "rouge2": 0.24224734394256742, + "rougeL": 0.401643858204577, + "avg_rouge": 0.37281911376772864 + }, + "prompt_5": { + "rouge1": 0.46212615742005675, + "rouge2": 0.23491715926325557, + "rougeL": 0.3931437108357111, + "avg_rouge": 0.3633956758396744 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.21569667653689323, + "rouge2": 0.055964371788382455, + "rougeL": 0.16733119107331074, + "avg_rouge": 0.14633074646619548 + }, + "prompt_2": { + "rouge1": 0.22457067857048188, + "rouge2": 0.061917739962859285, + "rougeL": 0.1692818486991533, + "avg_rouge": 0.15192342241083148 + }, + "prompt_3": { + "rouge1": 0.2336676833747752, + "rouge2": 0.0640031805303293, + "rougeL": 0.1803814561637721, + "avg_rouge": 0.1593507733562922 + }, + "prompt_4": { + "rouge1": 0.23148382643270657, + "rouge2": 0.0616428377285849, + "rougeL": 0.1787858952052932, + "avg_rouge": 0.1573041864555282 + }, + "prompt_5": { + "rouge1": 0.2262787923951814, + "rouge2": 0.05505546442252213, + "rougeL": 0.18798340087733187, + "avg_rouge": 0.15643921923167847 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9403669724770642 + }, + "prompt_2": { + "accuracy": 0.9392201834862385 + }, + "prompt_3": { + "accuracy": 0.9357798165137615 + }, + "prompt_4": { + "accuracy": 0.9311926605504587 + }, + "prompt_5": { + "accuracy": 0.9323394495412844 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6059443911792906 + }, + "prompt_2": { + "accuracy": 0.6097794822627037 + }, + "prompt_3": { + "accuracy": 0.6164908916586769 + }, + "prompt_4": { + "accuracy": 0.6567593480345159 + }, + "prompt_5": { + "accuracy": 0.6299137104506232 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.693 + }, + "prompt_2": { + "accuracy": 0.769 + }, + "prompt_3": { + "accuracy": 0.7385 + }, + "prompt_4": { + "accuracy": 0.722 + }, + "prompt_5": { + "accuracy": 0.731 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.519 + }, + "prompt_2": { + "accuracy": 0.466 + }, + "prompt_3": { + "accuracy": 0.4875 + }, + "prompt_4": { + "accuracy": 0.4965 + }, + "prompt_5": { + "accuracy": 0.4825 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.601 + }, + "prompt_2": { + "accuracy": 0.648 + }, + "prompt_3": { + "accuracy": 0.633 + }, + "prompt_4": { + "accuracy": 0.6035 + }, + "prompt_5": { + "accuracy": 0.6115 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49295774647887325 + }, + "prompt_2": { + "accuracy": 0.5211267605633803 + }, + "prompt_3": { + "accuracy": 0.5352112676056338 + }, + "prompt_4": { + "accuracy": 0.5633802816901409 + }, + "prompt_5": { + "accuracy": 0.5915492957746479 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.592057761732852 + }, + "prompt_2": { + "accuracy": 0.5631768953068592 + }, + "prompt_3": { + "accuracy": 0.6570397111913358 + }, + "prompt_4": { + "accuracy": 0.6823104693140795 + }, + "prompt_5": { + "accuracy": 0.6498194945848376 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7230392156862745 + }, + "prompt_2": { + "accuracy": 0.6102941176470589 + }, + "prompt_3": { + "accuracy": 0.7132352941176471 + }, + "prompt_4": { + "accuracy": 0.47794117647058826 + }, + "prompt_5": { + "accuracy": 0.6470588235294118 + } } }, "five_shot": { @@ -9384,7 +82272,36 @@ "prompt_1": -1 }, "indommlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3591695039722278, + "category_acc": { + "History": 0.25903614457831325, + "Geography": 0.2938775510204082, + "Lampungic": 0.272108843537415, + "Social science": 0.5609348914858097, + "Balinese": 0.267515923566879, + "Makassarese": 0.24193548387096775, + "Banjarese": 0.2361111111111111, + "Chemistry": 0.24671532846715327, + "Biology": 0.34437869822485206, + "Science": 0.41279669762641896, + "Christian religion": 0.44776119402985076, + "Art": 0.4059900166389351, + "Islam religion": 0.40256045519203415, + "Hindu religion": 0.34, + "Madurese": 0.2542372881355932, + "Sport": 0.32432432432432434, + "Indonesian language": 0.4349315068493151, + "Physics": 0.296969696969697, + "Minangkabau culture": 0.3417085427135678, + "Dayak language": 0.21100917431192662, + "Sociology": 0.3326612903225806, + "Economy": 0.29918032786885246, + "Sundanese": 0.2929991356957649, + "Javanese": 0.28830645161290325, + "Civic education": 0.43490701001430615 + } + } }, "flores_ind2eng": { "prompt_1": -1 @@ -9806,53 +82723,1733 @@ "model_link": "https://huggingface.co/hpcai-tech/Colossal-LLaMA-2-7b-base", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3733333333333334, + "language_acc": { + "Malay": 0.32, + "English": 0.5266666666666666, + "Vietnamese": 0.34, + "Spanish": 0.3933333333333333, + "Indonesian": 0.32, + "Filipino": 0.30666666666666664, + "Chinese": 0.4066666666666667 + }, + "consistency_score_2": 0.3384126984126984, + "consistency_score_3": 0.1434285714285714, + "consistency_score_4": 0.0676190476190476, + "consistency_score_5": 0.03174603174603175, + "consistency_score_6": 0.014285714285714287, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.3, + "Malay,Vietnamese": 0.32666666666666666, + "Malay,Spanish": 0.2733333333333333, + "Malay,Indonesian": 0.3466666666666667, + "Malay,Filipino": 0.32666666666666666, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.34, + "English,Spanish": 0.4066666666666667, + "English,Indonesian": 0.31333333333333335, + "English,Filipino": 0.31333333333333335, + "English,Chinese": 0.41333333333333333, + "Vietnamese,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian": 0.35333333333333333, + "Vietnamese,Filipino": 0.3, + "Vietnamese,Chinese": 0.32666666666666666, + "Spanish,Indonesian": 0.38666666666666666, + "Spanish,Filipino": 0.32666666666666666, + "Spanish,Chinese": 0.3466666666666667, + "Indonesian,Filipino": 0.32, + "Indonesian,Chinese": 0.31333333333333335, + "Filipino,Chinese": 0.32666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.12, + "Malay,English,Spanish": 0.10666666666666667, + "Malay,English,Indonesian": 0.11333333333333333, + "Malay,English,Filipino": 0.11333333333333333, + "Malay,English,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish": 0.13333333333333333, + "Malay,Vietnamese,Indonesian": 0.14666666666666667, + "Malay,Vietnamese,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian": 0.13333333333333333, + "Malay,Spanish,Filipino": 0.11333333333333333, + "Malay,Spanish,Chinese": 0.10666666666666667, + "Malay,Indonesian,Filipino": 0.17333333333333334, + "Malay,Indonesian,Chinese": 0.13333333333333333, + "Malay,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish": 0.18, + "English,Vietnamese,Indonesian": 0.14666666666666667, + "English,Vietnamese,Filipino": 0.12, + "English,Vietnamese,Chinese": 0.17333333333333334, + "English,Spanish,Indonesian": 0.18666666666666668, + "English,Spanish,Filipino": 0.14, + "English,Spanish,Chinese": 0.2, + "English,Indonesian,Filipino": 0.12, + "English,Indonesian,Chinese": 0.14666666666666667, + "English,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Vietnamese,Spanish,Filipino": 0.16, + "Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino": 0.14, + "Vietnamese,Indonesian,Chinese": 0.14, + "Vietnamese,Filipino,Chinese": 0.14666666666666667, + "Spanish,Indonesian,Filipino": 0.14, + "Spanish,Indonesian,Chinese": 0.16, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.12666666666666668 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.06, + "Malay,English,Vietnamese,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Chinese": 0.06, + "Malay,English,Spanish,Indonesian": 0.05333333333333334, + "Malay,English,Spanish,Filipino": 0.03333333333333333, + "Malay,English,Spanish,Chinese": 0.06, + "Malay,English,Indonesian,Filipino": 0.06, + "Malay,English,Indonesian,Chinese": 0.06, + "Malay,English,Filipino,Chinese": 0.06, + "Malay,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.06666666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.04, + "Malay,Spanish,Filipino,Chinese": 0.05333333333333334, + "Malay,Indonesian,Filipino,Chinese": 0.07333333333333333, + "English,Vietnamese,Spanish,Indonesian": 0.1, + "English,Vietnamese,Spanish,Filipino": 0.08, + "English,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.06, + "English,Vietnamese,Indonesian,Chinese": 0.06, + "English,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "English,Spanish,Indonesian,Filipino": 0.07333333333333333, + "English,Spanish,Indonesian,Chinese": 0.1, + "English,Spanish,Filipino,Chinese": 0.08666666666666667, + "English,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.07333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Spanish,Filipino,Chinese": 0.02, + "Malay,English,Indonesian,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "English,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + } + }, + "AC3_2": 0.355016354395556, + "AC3_3": 0.20723921853711735, + "AC3_4": 0.11449964000283125, + "AC3_5": 0.05851619643278533, + "AC3_6": 0.027518427511328172, + "AC3_7": 0.01309941520123115 + }, + "prompt_2": { + "overall_acc": 0.36, + "language_acc": { + "Malay": 0.26666666666666666, + "English": 0.5066666666666667, + "Vietnamese": 0.36666666666666664, + "Spanish": 0.3333333333333333, + "Indonesian": 0.26666666666666666, + "Filipino": 0.34, + "Chinese": 0.44 + }, + "consistency_score_2": 0.3228571428571428, + "consistency_score_3": 0.1220952380952381, + "consistency_score_4": 0.051428571428571435, + "consistency_score_5": 0.02317460317460317, + "consistency_score_6": 0.011428571428571429, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.25333333333333335, + "Malay,Vietnamese": 0.29333333333333333, + "Malay,Spanish": 0.3, + "Malay,Indonesian": 0.2733333333333333, + "Malay,Filipino": 0.32, + "Malay,Chinese": 0.3333333333333333, + "English,Vietnamese": 0.35333333333333333, + "English,Spanish": 0.36, + "English,Indonesian": 0.3, + "English,Filipino": 0.4, + "English,Chinese": 0.36666666666666664, + "Vietnamese,Spanish": 0.28, + "Vietnamese,Indonesian": 0.29333333333333333, + "Vietnamese,Filipino": 0.3333333333333333, + "Vietnamese,Chinese": 0.28, + "Spanish,Indonesian": 0.26666666666666666, + "Spanish,Filipino": 0.34, + "Spanish,Chinese": 0.36666666666666664, + "Indonesian,Filipino": 0.36, + "Indonesian,Chinese": 0.32666666666666666, + "Filipino,Chinese": 0.38 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.12, + "Malay,English,Spanish": 0.11333333333333333, + "Malay,English,Indonesian": 0.06666666666666667, + "Malay,English,Filipino": 0.11333333333333333, + "Malay,English,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish": 0.06666666666666667, + "Malay,Vietnamese,Indonesian": 0.08, + "Malay,Vietnamese,Filipino": 0.1, + "Malay,Vietnamese,Chinese": 0.09333333333333334, + "Malay,Spanish,Indonesian": 0.1, + "Malay,Spanish,Filipino": 0.11333333333333333, + "Malay,Spanish,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino": 0.13333333333333333, + "Malay,Indonesian,Chinese": 0.09333333333333334, + "Malay,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish": 0.10666666666666667, + "English,Vietnamese,Indonesian": 0.12, + "English,Vietnamese,Filipino": 0.16666666666666666, + "English,Vietnamese,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian": 0.10666666666666667, + "English,Spanish,Filipino": 0.16, + "English,Spanish,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino": 0.16, + "English,Indonesian,Chinese": 0.12, + "English,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Vietnamese,Spanish,Filipino": 0.09333333333333334, + "Vietnamese,Spanish,Chinese": 0.1, + "Vietnamese,Indonesian,Filipino": 0.14666666666666667, + "Vietnamese,Indonesian,Chinese": 0.12, + "Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Spanish,Indonesian,Filipino": 0.14, + "Spanish,Indonesian,Chinese": 0.14, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.16 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04, + "Malay,English,Vietnamese,Indonesian": 0.03333333333333333, + "Malay,English,Vietnamese,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian": 0.02666666666666667, + "Malay,English,Spanish,Filipino": 0.05333333333333334, + "Malay,English,Spanish,Chinese": 0.07333333333333333, + "Malay,English,Indonesian,Filipino": 0.04, + "Malay,English,Indonesian,Chinese": 0.04, + "Malay,English,Filipino,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.04, + "Malay,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "Malay,Spanish,Indonesian,Filipino": 0.06, + "Malay,Spanish,Indonesian,Chinese": 0.05333333333333334, + "Malay,Spanish,Filipino,Chinese": 0.06666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.06666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.04, + "English,Vietnamese,Indonesian,Filipino": 0.08, + "English,Vietnamese,Indonesian,Chinese": 0.05333333333333334, + "English,Vietnamese,Filipino,Chinese": 0.04666666666666667, + "English,Spanish,Indonesian,Filipino": 0.07333333333333333, + "English,Spanish,Indonesian,Chinese": 0.07333333333333333, + "English,Spanish,Filipino,Chinese": 0.07333333333333333, + "English,Indonesian,Filipino,Chinese": 0.08, + "Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Chinese": 0.02, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.04, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + } + }, + "AC3_2": 0.34041840999198886, + "AC3_3": 0.18234689842128327, + "AC3_4": 0.08999999997812501, + "AC3_5": 0.04354598176162635, + "AC3_6": 0.022153846147881656, + "AC3_7": 0.013090909087338844 + }, + "prompt_3": { + "overall_acc": 0.3295238095238095, + "language_acc": { + "Malay": 0.25333333333333335, + "English": 0.44, + "Vietnamese": 0.34, + "Spanish": 0.37333333333333335, + "Indonesian": 0.34, + "Filipino": 0.28, + "Chinese": 0.28 + }, + "consistency_score_2": 0.34, + "consistency_score_3": 0.136952380952381, + "consistency_score_4": 0.061333333333333344, + "consistency_score_5": 0.029523809523809525, + "consistency_score_6": 0.014285714285714287, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.32666666666666666, + "Malay,Vietnamese": 0.37333333333333335, + "Malay,Spanish": 0.3, + "Malay,Indonesian": 0.3333333333333333, + "Malay,Filipino": 0.36666666666666664, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.3466666666666667, + "English,Spanish": 0.36, + "English,Indonesian": 0.35333333333333333, + "English,Filipino": 0.3, + "English,Chinese": 0.36, + "Vietnamese,Spanish": 0.37333333333333335, + "Vietnamese,Indonesian": 0.38666666666666666, + "Vietnamese,Filipino": 0.3, + "Vietnamese,Chinese": 0.30666666666666664, + "Spanish,Indonesian": 0.36, + "Spanish,Filipino": 0.32, + "Spanish,Chinese": 0.36, + "Indonesian,Filipino": 0.3333333333333333, + "Indonesian,Chinese": 0.35333333333333333, + "Filipino,Chinese": 0.26666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.16666666666666666, + "Malay,English,Spanish": 0.14, + "Malay,English,Indonesian": 0.14, + "Malay,English,Filipino": 0.12, + "Malay,English,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish": 0.14666666666666667, + "Malay,Vietnamese,Indonesian": 0.18666666666666668, + "Malay,Vietnamese,Filipino": 0.15333333333333332, + "Malay,Vietnamese,Chinese": 0.14666666666666667, + "Malay,Spanish,Indonesian": 0.1, + "Malay,Spanish,Filipino": 0.11333333333333333, + "Malay,Spanish,Chinese": 0.16, + "Malay,Indonesian,Filipino": 0.14, + "Malay,Indonesian,Chinese": 0.12, + "Malay,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish": 0.18, + "English,Vietnamese,Indonesian": 0.16, + "English,Vietnamese,Filipino": 0.14666666666666667, + "English,Vietnamese,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian": 0.18, + "English,Spanish,Filipino": 0.13333333333333333, + "English,Spanish,Chinese": 0.16, + "English,Indonesian,Filipino": 0.13333333333333333, + "English,Indonesian,Chinese": 0.14, + "English,Filipino,Chinese": 0.1, + "Vietnamese,Spanish,Indonesian": 0.15333333333333332, + "Vietnamese,Spanish,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Chinese": 0.16, + "Vietnamese,Indonesian,Filipino": 0.14, + "Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Spanish,Indonesian,Filipino": 0.11333333333333333, + "Spanish,Indonesian,Chinese": 0.16, + "Spanish,Filipino,Chinese": 0.09333333333333334, + "Indonesian,Filipino,Chinese": 0.09333333333333334 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.08666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.08666666666666667, + "Malay,English,Vietnamese,Filipino": 0.06666666666666667, + "Malay,English,Vietnamese,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian": 0.06666666666666667, + "Malay,English,Spanish,Filipino": 0.04666666666666667, + "Malay,English,Spanish,Chinese": 0.08, + "Malay,English,Indonesian,Filipino": 0.06666666666666667, + "Malay,English,Indonesian,Chinese": 0.06, + "Malay,English,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "Malay,Vietnamese,Spanish,Chinese": 0.06666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.08, + "Malay,Vietnamese,Indonesian,Chinese": 0.08, + "Malay,Vietnamese,Filipino,Chinese": 0.03333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.06666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.1, + "English,Vietnamese,Spanish,Filipino": 0.06666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "English,Spanish,Indonesian,Filipino": 0.06666666666666667, + "English,Spanish,Indonesian,Chinese": 0.08666666666666667, + "English,Spanish,Filipino,Chinese": 0.03333333333333333, + "English,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian,Filipino": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Chinese": 0.08, + "Vietnamese,Spanish,Filipino,Chinese": 0.02666666666666667, + "Vietnamese,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.04666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Spanish,Indonesian,Chinese": 0.04, + "Malay,English,Spanish,Filipino,Chinese": 0.02, + "Malay,English,Indonesian,Filipino,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.02, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.02, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + } + }, + "AC3_2": 0.334679943051008, + "AC3_3": 0.1934892764367727, + "AC3_4": 0.10341780374222174, + "AC3_5": 0.054192244521978794, + "AC3_6": 0.02738425009096659, + "AC3_7": 0.013068932951731149 + }, + "prompt_4": { + "overall_acc": 0.379047619047619, + "language_acc": { + "Malay": 0.3, + "English": 0.5266666666666666, + "Vietnamese": 0.32666666666666666, + "Spanish": 0.43333333333333335, + "Indonesian": 0.3466666666666667, + "Filipino": 0.34, + "Chinese": 0.38 + }, + "consistency_score_2": 0.34222222222222215, + "consistency_score_3": 0.14495238095238097, + "consistency_score_4": 0.07161904761904762, + "consistency_score_5": 0.03968253968253969, + "consistency_score_6": 0.02380952380952381, + "consistency_score_7": 0.013333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.31333333333333335, + "Malay,Vietnamese": 0.32, + "Malay,Spanish": 0.28, + "Malay,Indonesian": 0.2733333333333333, + "Malay,Filipino": 0.3466666666666667, + "Malay,Chinese": 0.2866666666666667, + "English,Vietnamese": 0.32666666666666666, + "English,Spanish": 0.44, + "English,Indonesian": 0.38666666666666666, + "English,Filipino": 0.3466666666666667, + "English,Chinese": 0.4666666666666667, + "Vietnamese,Spanish": 0.35333333333333333, + "Vietnamese,Indonesian": 0.29333333333333333, + "Vietnamese,Filipino": 0.34, + "Vietnamese,Chinese": 0.38666666666666666, + "Spanish,Indonesian": 0.30666666666666664, + "Spanish,Filipino": 0.2866666666666667, + "Spanish,Chinese": 0.4066666666666667, + "Indonesian,Filipino": 0.30666666666666664, + "Indonesian,Chinese": 0.3333333333333333, + "Filipino,Chinese": 0.38666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.14, + "Malay,English,Spanish": 0.13333333333333333, + "Malay,English,Indonesian": 0.11333333333333333, + "Malay,English,Filipino": 0.13333333333333333, + "Malay,English,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish": 0.11333333333333333, + "Malay,Vietnamese,Indonesian": 0.1, + "Malay,Vietnamese,Filipino": 0.14666666666666667, + "Malay,Vietnamese,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian": 0.08666666666666667, + "Malay,Spanish,Filipino": 0.12666666666666668, + "Malay,Spanish,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino": 0.12, + "Malay,Indonesian,Chinese": 0.08, + "Malay,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.16666666666666666, + "English,Vietnamese,Indonesian": 0.12666666666666668, + "English,Vietnamese,Filipino": 0.13333333333333333, + "English,Vietnamese,Chinese": 0.21333333333333335, + "English,Spanish,Indonesian": 0.17333333333333334, + "English,Spanish,Filipino": 0.16666666666666666, + "English,Spanish,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino": 0.14666666666666667, + "English,Indonesian,Chinese": 0.18, + "English,Filipino,Chinese": 0.2, + "Vietnamese,Spanish,Indonesian": 0.12666666666666668, + "Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Chinese": 0.14, + "Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Spanish,Indonesian,Filipino": 0.11333333333333333, + "Spanish,Indonesian,Chinese": 0.16, + "Spanish,Filipino,Chinese": 0.16666666666666666, + "Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.06666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.05333333333333334, + "Malay,English,Vietnamese,Filipino": 0.05333333333333334, + "Malay,English,Vietnamese,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian": 0.04666666666666667, + "Malay,English,Spanish,Filipino": 0.06, + "Malay,English,Spanish,Chinese": 0.08666666666666667, + "Malay,English,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Indonesian,Chinese": 0.04666666666666667, + "Malay,English,Filipino,Chinese": 0.09333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.04666666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.07333333333333333, + "Malay,Vietnamese,Spanish,Chinese": 0.06, + "Malay,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.04, + "Malay,Vietnamese,Filipino,Chinese": 0.07333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.04, + "Malay,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.08, + "Malay,Indonesian,Filipino,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.08, + "English,Vietnamese,Filipino,Chinese": 0.10666666666666667, + "English,Spanish,Indonesian,Filipino": 0.08, + "English,Spanish,Indonesian,Chinese": 0.1, + "English,Spanish,Filipino,Chinese": 0.12666666666666668, + "English,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino": 0.06, + "Vietnamese,Spanish,Indonesian,Chinese": 0.08666666666666667, + "Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Vietnamese,Indonesian,Filipino,Chinese": 0.06, + "Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.02666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino": 0.03333333333333333, + "Malay,English,Vietnamese,Spanish,Chinese": 0.04666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Spanish,Indonesian,Chinese": 0.02666666666666667, + "Malay,English,Spanish,Filipino,Chinese": 0.06, + "Malay,English,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.04666666666666667, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.05333333333333334, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.08, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "English,Spanish,Indonesian,Filipino,Chinese": 0.06, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.05333333333333334 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.04 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334 + } + }, + "AC3_2": 0.35969483563088145, + "AC3_3": 0.20970936971296938, + "AC3_4": 0.12047498286583269, + "AC3_5": 0.0718437488546429, + "AC3_6": 0.04480468309143372, + "AC3_7": 0.025760517792787575 + }, + "prompt_5": { + "overall_acc": 0.34, + "language_acc": { + "Malay": 0.23333333333333334, + "English": 0.4866666666666667, + "Vietnamese": 0.26, + "Spanish": 0.4066666666666667, + "Indonesian": 0.2866666666666667, + "Filipino": 0.3333333333333333, + "Chinese": 0.37333333333333335 + }, + "consistency_score_2": 0.32285714285714284, + "consistency_score_3": 0.12304761904761902, + "consistency_score_4": 0.052571428571428554, + "consistency_score_5": 0.025396825396825397, + "consistency_score_6": 0.013333333333333334, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.2733333333333333, + "Malay,Vietnamese": 0.29333333333333333, + "Malay,Spanish": 0.2733333333333333, + "Malay,Indonesian": 0.3333333333333333, + "Malay,Filipino": 0.29333333333333333, + "Malay,Chinese": 0.3333333333333333, + "English,Vietnamese": 0.3333333333333333, + "English,Spanish": 0.37333333333333335, + "English,Indonesian": 0.4, + "English,Filipino": 0.30666666666666664, + "English,Chinese": 0.3933333333333333, + "Vietnamese,Spanish": 0.32, + "Vietnamese,Indonesian": 0.34, + "Vietnamese,Filipino": 0.3, + "Vietnamese,Chinese": 0.2733333333333333, + "Spanish,Indonesian": 0.38666666666666666, + "Spanish,Filipino": 0.29333333333333333, + "Spanish,Chinese": 0.35333333333333333, + "Indonesian,Filipino": 0.30666666666666664, + "Indonesian,Chinese": 0.31333333333333335, + "Filipino,Chinese": 0.2866666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.10666666666666667, + "Malay,English,Spanish": 0.10666666666666667, + "Malay,English,Indonesian": 0.11333333333333333, + "Malay,English,Filipino": 0.08, + "Malay,English,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish": 0.08666666666666667, + "Malay,Vietnamese,Indonesian": 0.11333333333333333, + "Malay,Vietnamese,Filipino": 0.10666666666666667, + "Malay,Vietnamese,Chinese": 0.08, + "Malay,Spanish,Indonesian": 0.12, + "Malay,Spanish,Filipino": 0.07333333333333333, + "Malay,Spanish,Chinese": 0.12666666666666668, + "Malay,Indonesian,Filipino": 0.13333333333333333, + "Malay,Indonesian,Chinese": 0.11333333333333333, + "Malay,Filipino,Chinese": 0.1, + "English,Vietnamese,Spanish": 0.13333333333333333, + "English,Vietnamese,Indonesian": 0.14, + "English,Vietnamese,Filipino": 0.12, + "English,Vietnamese,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian": 0.19333333333333333, + "English,Spanish,Filipino": 0.14666666666666667, + "English,Spanish,Chinese": 0.19333333333333333, + "English,Indonesian,Filipino": 0.15333333333333332, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Filipino,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Indonesian": 0.13333333333333333, + "Vietnamese,Spanish,Filipino": 0.10666666666666667, + "Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "Vietnamese,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Filipino,Chinese": 0.08666666666666667, + "Spanish,Indonesian,Filipino": 0.12666666666666668, + "Spanish,Indonesian,Chinese": 0.13333333333333333, + "Spanish,Filipino,Chinese": 0.11333333333333333, + "Indonesian,Filipino,Chinese": 0.12 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.04, + "Malay,English,Vietnamese,Indonesian": 0.04666666666666667, + "Malay,English,Vietnamese,Filipino": 0.04, + "Malay,English,Vietnamese,Chinese": 0.04666666666666667, + "Malay,English,Spanish,Indonesian": 0.04, + "Malay,English,Spanish,Filipino": 0.03333333333333333, + "Malay,English,Spanish,Chinese": 0.06666666666666667, + "Malay,English,Indonesian,Filipino": 0.05333333333333334, + "Malay,English,Indonesian,Chinese": 0.04, + "Malay,English,Filipino,Chinese": 0.04, + "Malay,Vietnamese,Spanish,Indonesian": 0.04, + "Malay,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.04, + "Malay,Vietnamese,Indonesian,Filipino": 0.04666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.03333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.04, + "Malay,Spanish,Indonesian,Chinese": 0.04666666666666667, + "Malay,Spanish,Filipino,Chinese": 0.04, + "Malay,Indonesian,Filipino,Chinese": 0.06, + "English,Vietnamese,Spanish,Indonesian": 0.06666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.06, + "English,Vietnamese,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.06, + "English,Vietnamese,Filipino,Chinese": 0.05333333333333334, + "English,Spanish,Indonesian,Filipino": 0.08666666666666667, + "English,Spanish,Indonesian,Chinese": 0.09333333333333334, + "English,Spanish,Filipino,Chinese": 0.08, + "English,Indonesian,Filipino,Chinese": 0.07333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.05333333333333334, + "Vietnamese,Spanish,Indonesian,Chinese": 0.05333333333333334, + "Vietnamese,Spanish,Filipino,Chinese": 0.05333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.05333333333333334, + "Spanish,Indonesian,Filipino,Chinese": 0.06 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Filipino": 0.013333333333333334, + "Malay,English,Vietnamese,Spanish,Chinese": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.02666666666666667, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Filipino,Chinese": 0.02, + "Malay,English,Spanish,Indonesian,Filipino": 0.02, + "Malay,English,Spanish,Indonesian,Chinese": 0.02, + "Malay,English,Spanish,Filipino,Chinese": 0.02666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.02666666666666667, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.013333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.02, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.02, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.02666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.04, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.03333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.03333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.04, + "English,Spanish,Indonesian,Filipino,Chinese": 0.04666666666666667, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.03333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.006666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.013333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.02, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.013333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.02 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.006666666666666667 + } + }, + "AC3_2": 0.3312068965017576, + "AC3_3": 0.18069930066027676, + "AC3_4": 0.09106259095205828, + "AC3_5": 0.047263249335457926, + "AC3_6": 0.025660377351228197, + "AC3_7": 0.013076923073150887 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.37094155844155846, + "language_acc": { + "English": 0.4602272727272727, + "Vietnamese": 0.3068181818181818, + "Chinese": 0.4318181818181818, + "Indonesian": 0.39204545454545453, + "Filipino": 0.3068181818181818, + "Spanish": 0.375, + "Malay": 0.32386363636363635 + }, + "consistency_score_2": 0.36904761904761907, + "consistency_score_3": 0.16964285714285715, + "consistency_score_4": 0.08831168831168831, + "consistency_score_5": 0.04978354978354977, + "consistency_score_6": 0.02922077922077922, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3977272727272727, + "English,Chinese": 0.44886363636363635, + "English,Indonesian": 0.3693181818181818, + "English,Filipino": 0.32386363636363635, + "English,Spanish": 0.4431818181818182, + "English,Malay": 0.3693181818181818, + "Vietnamese,Chinese": 0.35795454545454547, + "Vietnamese,Indonesian": 0.3522727272727273, + "Vietnamese,Filipino": 0.3465909090909091, + "Vietnamese,Spanish": 0.4147727272727273, + "Vietnamese,Malay": 0.35795454545454547, + "Chinese,Indonesian": 0.3465909090909091, + "Chinese,Filipino": 0.2727272727272727, + "Chinese,Spanish": 0.4034090909090909, + "Chinese,Malay": 0.42045454545454547, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,Spanish": 0.375, + "Indonesian,Malay": 0.3465909090909091, + "Filipino,Spanish": 0.3181818181818182, + "Filipino,Malay": 0.32954545454545453, + "Spanish,Malay": 0.375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1875, + "English,Vietnamese,Indonesian": 0.18181818181818182, + "English,Vietnamese,Filipino": 0.16477272727272727, + "English,Vietnamese,Spanish": 0.22727272727272727, + "English,Vietnamese,Malay": 0.17045454545454544, + "English,Chinese,Indonesian": 0.1875, + "English,Chinese,Filipino": 0.14772727272727273, + "English,Chinese,Spanish": 0.22727272727272727, + "English,Chinese,Malay": 0.20454545454545456, + "English,Indonesian,Filipino": 0.14772727272727273, + "English,Indonesian,Spanish": 0.20454545454545456, + "English,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Spanish": 0.1534090909090909, + "English,Filipino,Malay": 0.14204545454545456, + "English,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "Vietnamese,Chinese,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Malay": 0.1875, + "Vietnamese,Indonesian,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay": 0.14772727272727273, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Malay": 0.17045454545454544, + "Chinese,Filipino,Spanish": 0.10227272727272728, + "Chinese,Filipino,Malay": 0.1590909090909091, + "Chinese,Spanish,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish": 0.1534090909090909, + "Indonesian,Filipino,Malay": 0.17045454545454544, + "Indonesian,Spanish,Malay": 0.16477272727272727, + "Filipino,Spanish,Malay": 0.11931818181818182 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.10227272727272728, + "English,Vietnamese,Chinese,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Indonesian,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Filipino": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Filipino,Spanish,Malay": 0.0625, + "Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "Chinese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.36999216500615073, + "AC3_3": 0.2328131702701034, + "AC3_4": 0.142659743901215, + "AC3_5": 0.08778552634987007, + "AC3_6": 0.054174020691922856, + "AC3_7": 0.032593191319100365 + }, + "prompt_2": { + "overall_acc": 0.36607142857142855, + "language_acc": { + "English": 0.4318181818181818, + "Vietnamese": 0.2840909090909091, + "Chinese": 0.4318181818181818, + "Indonesian": 0.35795454545454547, + "Filipino": 0.32386363636363635, + "Spanish": 0.36363636363636365, + "Malay": 0.3693181818181818 + }, + "consistency_score_2": 0.353896103896104, + "consistency_score_3": 0.15665584415584416, + "consistency_score_4": 0.08262987012987014, + "consistency_score_5": 0.0514069264069264, + "consistency_score_6": 0.036525974025974024, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.32386363636363635, + "English,Chinese": 0.42613636363636365, + "English,Indonesian": 0.3352272727272727, + "English,Filipino": 0.35795454545454547, + "English,Spanish": 0.3977272727272727, + "English,Malay": 0.3409090909090909, + "Vietnamese,Chinese": 0.3522727272727273, + "Vietnamese,Indonesian": 0.36363636363636365, + "Vietnamese,Filipino": 0.3125, + "Vietnamese,Spanish": 0.375, + "Vietnamese,Malay": 0.3125, + "Chinese,Indonesian": 0.3977272727272727, + "Chinese,Filipino": 0.3181818181818182, + "Chinese,Spanish": 0.3522727272727273, + "Chinese,Malay": 0.3806818181818182, + "Indonesian,Filipino": 0.375, + "Indonesian,Spanish": 0.3806818181818182, + "Indonesian,Malay": 0.32954545454545453, + "Filipino,Spanish": 0.2897727272727273, + "Filipino,Malay": 0.35795454545454547, + "Spanish,Malay": 0.3522727272727273 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.16477272727272727, + "English,Vietnamese,Indonesian": 0.14772727272727273, + "English,Vietnamese,Filipino": 0.125, + "English,Vietnamese,Spanish": 0.16477272727272727, + "English,Vietnamese,Malay": 0.11931818181818182, + "English,Chinese,Indonesian": 0.17613636363636365, + "English,Chinese,Filipino": 0.17045454545454544, + "English,Chinese,Spanish": 0.1875, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.1534090909090909, + "English,Indonesian,Spanish": 0.17613636363636365, + "English,Indonesian,Malay": 0.14204545454545456, + "English,Filipino,Spanish": 0.1590909090909091, + "English,Filipino,Malay": 0.17045454545454544, + "English,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "Vietnamese,Chinese,Filipino": 0.11931818181818182, + "Vietnamese,Chinese,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Indonesian,Malay": 0.13636363636363635, + "Vietnamese,Filipino,Spanish": 0.13068181818181818, + "Vietnamese,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Spanish,Malay": 0.13636363636363635, + "Chinese,Indonesian,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Malay": 0.19886363636363635, + "Chinese,Filipino,Spanish": 0.125, + "Chinese,Filipino,Malay": 0.1590909090909091, + "Chinese,Spanish,Malay": 0.17613636363636365, + "Indonesian,Filipino,Spanish": 0.1590909090909091, + "Indonesian,Filipino,Malay": 0.16477272727272727, + "Indonesian,Spanish,Malay": 0.14772727272727273, + "Filipino,Spanish,Malay": 0.13068181818181818 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "English,Vietnamese,Chinese,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Malay": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.0625, + "English,Vietnamese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Filipino": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Malay": 0.0625, + "Vietnamese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Malay": 0.09090909090909091, + "Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Indonesian,Filipino,Spanish,Malay": 0.07386363636363637 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + } + }, + "AC3_2": 0.359880818117191, + "AC3_3": 0.2194154835427631, + "AC3_4": 0.13482659703424787, + "AC3_5": 0.0901536894516021, + "AC3_6": 0.0664242511355748, + "AC3_7": 0.05272633743519365 + }, + "prompt_3": { + "overall_acc": 0.3733766233766233, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.3409090909090909, + "Chinese": 0.3977272727272727, + "Indonesian": 0.3352272727272727, + "Filipino": 0.2784090909090909, + "Spanish": 0.4375, + "Malay": 0.375 + }, + "consistency_score_2": 0.36769480519480513, + "consistency_score_3": 0.1699675324675325, + "consistency_score_4": 0.09237012987012985, + "consistency_score_5": 0.05735930735930736, + "consistency_score_6": 0.03896103896103896, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.3693181818181818, + "English,Chinese": 0.45454545454545453, + "English,Indonesian": 0.3977272727272727, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.4715909090909091, + "English,Malay": 0.3977272727272727, + "Vietnamese,Chinese": 0.3068181818181818, + "Vietnamese,Indonesian": 0.3465909090909091, + "Vietnamese,Filipino": 0.30113636363636365, + "Vietnamese,Spanish": 0.38636363636363635, + "Vietnamese,Malay": 0.30113636363636365, + "Chinese,Indonesian": 0.3977272727272727, + "Chinese,Filipino": 0.3465909090909091, + "Chinese,Spanish": 0.39204545454545453, + "Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino": 0.2727272727272727, + "Indonesian,Spanish": 0.4715909090909091, + "Indonesian,Malay": 0.38636363636363635, + "Filipino,Spanish": 0.375, + "Filipino,Malay": 0.3068181818181818, + "Spanish,Malay": 0.38636363636363635 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.17613636363636365, + "English,Vietnamese,Indonesian": 0.17613636363636365, + "English,Vietnamese,Filipino": 0.13068181818181818, + "English,Vietnamese,Spanish": 0.22727272727272727, + "English,Vietnamese,Malay": 0.17045454545454544, + "English,Chinese,Indonesian": 0.24431818181818182, + "English,Chinese,Filipino": 0.18181818181818182, + "English,Chinese,Spanish": 0.25, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.14772727272727273, + "English,Indonesian,Spanish": 0.26704545454545453, + "English,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Spanish": 0.1875, + "English,Filipino,Malay": 0.14772727272727273, + "English,Spanish,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "Vietnamese,Chinese,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Malay": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay": 0.1590909090909091, + "Vietnamese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Filipino,Malay": 0.09659090909090909, + "Vietnamese,Spanish,Malay": 0.1534090909090909, + "Chinese,Indonesian,Filipino": 0.14772727272727273, + "Chinese,Indonesian,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Malay": 0.1534090909090909, + "Chinese,Filipino,Spanish": 0.18181818181818182, + "Chinese,Filipino,Malay": 0.10795454545454546, + "Chinese,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish": 0.17045454545454544, + "Indonesian,Filipino,Malay": 0.10795454545454546, + "Indonesian,Spanish,Malay": 0.19318181818181818, + "Filipino,Spanish,Malay": 0.14204545454545456 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.11931818181818182, + "English,Vietnamese,Chinese,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish": 0.125, + "English,Vietnamese,Chinese,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino": 0.06818181818181818, + "English,Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Filipino,Malay": 0.0625, + "English,Vietnamese,Spanish,Malay": 0.11931818181818182, + "English,Chinese,Indonesian,Filipino": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish": 0.1875, + "English,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Chinese,Filipino,Spanish": 0.11363636363636363, + "English,Chinese,Filipino,Malay": 0.06818181818181818, + "English,Chinese,Spanish,Malay": 0.11363636363636363, + "English,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Indonesian,Spanish,Malay": 0.13636363636363635, + "English,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.0625, + "Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Filipino,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Vietnamese,Filipino,Spanish,Malay": 0.0625, + "Chinese,Indonesian,Filipino,Spanish": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Malay": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + } + }, + "AC3_2": 0.3705139328954802, + "AC3_3": 0.23359744523655607, + "AC3_4": 0.14810128871082848, + "AC3_5": 0.09944201524807447, + "AC3_6": 0.07055936188083253, + "AC3_7": 0.05280073460577492 + }, + "prompt_4": { + "overall_acc": 0.3717532467532468, + "language_acc": { + "English": 0.48863636363636365, + "Vietnamese": 0.3181818181818182, + "Chinese": 0.4090909090909091, + "Indonesian": 0.3181818181818182, + "Filipino": 0.23863636363636365, + "Spanish": 0.4375, + "Malay": 0.39204545454545453 + }, + "consistency_score_2": 0.3533549783549783, + "consistency_score_3": 0.15503246753246752, + "consistency_score_4": 0.07857142857142856, + "consistency_score_5": 0.044372294372294376, + "consistency_score_6": 0.026785714285714284, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.45454545454545453, + "English,Chinese": 0.4147727272727273, + "English,Indonesian": 0.3409090909090909, + "English,Filipino": 0.2215909090909091, + "English,Spanish": 0.48295454545454547, + "English,Malay": 0.3522727272727273, + "Vietnamese,Chinese": 0.3693181818181818, + "Vietnamese,Indonesian": 0.3352272727272727, + "Vietnamese,Filipino": 0.26704545454545453, + "Vietnamese,Spanish": 0.42613636363636365, + "Vietnamese,Malay": 0.3693181818181818, + "Chinese,Indonesian": 0.4090909090909091, + "Chinese,Filipino": 0.2556818181818182, + "Chinese,Spanish": 0.4034090909090909, + "Chinese,Malay": 0.3352272727272727, + "Indonesian,Filipino": 0.32386363636363635, + "Indonesian,Spanish": 0.375, + "Indonesian,Malay": 0.3806818181818182, + "Filipino,Spanish": 0.26704545454545453, + "Filipino,Malay": 0.32954545454545453, + "Spanish,Malay": 0.3068181818181818 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.22727272727272727, + "English,Vietnamese,Indonesian": 0.1875, + "English,Vietnamese,Filipino": 0.11931818181818182, + "English,Vietnamese,Spanish": 0.2727272727272727, + "English,Vietnamese,Malay": 0.18181818181818182, + "English,Chinese,Indonesian": 0.19318181818181818, + "English,Chinese,Filipino": 0.09090909090909091, + "English,Chinese,Spanish": 0.23295454545454544, + "English,Chinese,Malay": 0.18181818181818182, + "English,Indonesian,Filipino": 0.09090909090909091, + "English,Indonesian,Spanish": 0.2215909090909091, + "English,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Spanish": 0.11931818181818182, + "English,Filipino,Malay": 0.11363636363636363, + "English,Spanish,Malay": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian": 0.1875, + "Vietnamese,Chinese,Filipino": 0.09659090909090909, + "Vietnamese,Chinese,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Malay": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Filipino,Malay": 0.13636363636363635, + "Vietnamese,Spanish,Malay": 0.14772727272727273, + "Chinese,Indonesian,Filipino": 0.11931818181818182, + "Chinese,Indonesian,Spanish": 0.19886363636363635, + "Chinese,Indonesian,Malay": 0.18181818181818182, + "Chinese,Filipino,Spanish": 0.13068181818181818, + "Chinese,Filipino,Malay": 0.10795454545454546, + "Chinese,Spanish,Malay": 0.1534090909090909, + "Indonesian,Filipino,Spanish": 0.11931818181818182, + "Indonesian,Filipino,Malay": 0.13068181818181818, + "Indonesian,Spanish,Malay": 0.14772727272727273, + "Filipino,Spanish,Malay": 0.09659090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "English,Vietnamese,Chinese,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish": 0.1534090909090909, + "English,Vietnamese,Chinese,Malay": 0.10227272727272728, + "English,Vietnamese,Indonesian,Filipino": 0.056818181818181816, + "English,Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay": 0.10795454545454546, + "English,Vietnamese,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Filipino,Malay": 0.06818181818181818, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.056818181818181816, + "English,Chinese,Indonesian,Spanish": 0.13068181818181818, + "English,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Chinese,Filipino,Malay": 0.045454545454545456, + "English,Chinese,Spanish,Malay": 0.09659090909090909, + "English,Indonesian,Filipino,Spanish": 0.06818181818181818, + "English,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Vietnamese,Chinese,Indonesian,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Indonesian,Filipino,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + } + }, + "AC3_2": 0.36232070163640556, + "AC3_3": 0.21881315906437976, + "AC3_4": 0.1297249973962625, + "AC3_5": 0.07928157666174414, + "AC3_6": 0.04997090484635004, + "AC3_7": 0.03259631807853965 + }, + "prompt_5": { + "overall_acc": 0.36525974025974023, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.3352272727272727, + "Chinese": 0.4090909090909091, + "Indonesian": 0.39204545454545453, + "Filipino": 0.24431818181818182, + "Spanish": 0.3693181818181818, + "Malay": 0.35795454545454547 + }, + "consistency_score_2": 0.3503787878787878, + "consistency_score_3": 0.1530844155844156, + "consistency_score_4": 0.07646103896103895, + "consistency_score_5": 0.04058441558441558, + "consistency_score_6": 0.021915584415584412, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38636363636363635, + "English,Chinese": 0.39204545454545453, + "English,Indonesian": 0.36363636363636365, + "English,Filipino": 0.3068181818181818, + "English,Spanish": 0.4659090909090909, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian": 0.3693181818181818, + "Vietnamese,Filipino": 0.3465909090909091, + "Vietnamese,Spanish": 0.29545454545454547, + "Vietnamese,Malay": 0.3522727272727273, + "Chinese,Indonesian": 0.3806818181818182, + "Chinese,Filipino": 0.23863636363636365, + "Chinese,Spanish": 0.39204545454545453, + "Chinese,Malay": 0.32954545454545453, + "Indonesian,Filipino": 0.2784090909090909, + "Indonesian,Spanish": 0.35795454545454547, + "Indonesian,Malay": 0.36363636363636365, + "Filipino,Spanish": 0.3068181818181818, + "Filipino,Malay": 0.36363636363636365, + "Spanish,Malay": 0.3409090909090909 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.1590909090909091, + "English,Vietnamese,Indonesian": 0.1590909090909091, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.18181818181818182, + "English,Vietnamese,Malay": 0.18181818181818182, + "English,Chinese,Indonesian": 0.16477272727272727, + "English,Chinese,Filipino": 0.11363636363636363, + "English,Chinese,Spanish": 0.2159090909090909, + "English,Chinese,Malay": 0.17045454545454544, + "English,Indonesian,Filipino": 0.14204545454545456, + "English,Indonesian,Spanish": 0.21022727272727273, + "English,Indonesian,Malay": 0.20454545454545456, + "English,Filipino,Spanish": 0.17613636363636365, + "English,Filipino,Malay": 0.1590909090909091, + "English,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "Vietnamese,Chinese,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Malay": 0.14772727272727273, + "Vietnamese,Filipino,Spanish": 0.11931818181818182, + "Vietnamese,Filipino,Malay": 0.17045454545454544, + "Vietnamese,Spanish,Malay": 0.14204545454545456, + "Chinese,Indonesian,Filipino": 0.09090909090909091, + "Chinese,Indonesian,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Malay": 0.14204545454545456, + "Chinese,Filipino,Spanish": 0.13068181818181818, + "Chinese,Filipino,Malay": 0.10227272727272728, + "Chinese,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish": 0.13636363636363635, + "Indonesian,Filipino,Malay": 0.1534090909090909, + "Indonesian,Spanish,Malay": 0.1590909090909091, + "Filipino,Spanish,Malay": 0.14772727272727273 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.0625, + "English,Vietnamese,Chinese,Filipino": 0.0625, + "English,Vietnamese,Chinese,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Malay": 0.06818181818181818, + "English,Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "English,Vietnamese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish": 0.08522727272727272, + "English,Chinese,Filipino,Malay": 0.05113636363636364, + "English,Chinese,Spanish,Malay": 0.10227272727272728, + "English,Indonesian,Filipino,Spanish": 0.10227272727272728, + "English,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Indonesian,Spanish,Malay": 0.11363636363636363, + "English,Filipino,Spanish,Malay": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.0625, + "Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Filipino,Spanish,Malay": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.0625, + "Chinese,Filipino,Spanish,Malay": 0.0625, + "Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "English,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Chinese,Filipino,Spanish,Malay": 0.03977272727272727, + "English,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.022727272727272728, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.022727272727272728, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.011363636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.017045454545454544, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.017045454545454544 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.011363636363636364 + } + }, + "AC3_2": 0.3576645471230753, + "AC3_3": 0.21574690573033825, + "AC3_4": 0.126451552828659, + "AC3_5": 0.07305194803394804, + "AC3_6": 0.04135015926400761, + "AC3_7": 0.02204153604430433 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4368932038834951 + }, + "prompt_2": { + "accuracy": 0.44660194174757284 + }, + "prompt_3": { + "accuracy": 0.39805825242718446 + }, + "prompt_4": { + "accuracy": 0.4174757281553398 + }, + "prompt_5": { + "accuracy": 0.4174757281553398 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5333333333333333 + }, + "prompt_2": { + "accuracy": 0.44761904761904764 + }, + "prompt_3": { + "accuracy": 0.4380952380952381 + }, + "prompt_4": { + "accuracy": 0.45714285714285713 + }, + "prompt_5": { + "accuracy": 0.4666666666666667 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42990654205607476 + }, + "prompt_2": { + "accuracy": 0.5420560747663551 + }, + "prompt_3": { + "accuracy": 0.5046728971962616 + }, + "prompt_4": { + "accuracy": 0.514018691588785 + }, + "prompt_5": { + "accuracy": 0.48598130841121495 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.2, + "history": 0.2, + "literature": 0.3, + "politics": 0.5, + "culture": 0.4, + "film": 0.6, + "law": 0.2, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.29, + "category_acc": { + "brand": 0.0, + "demographics": 0.6, + "biology": 0.2, + "history": 0.06666666666666667, + "literature": 0.4, + "politics": 0.4, + "culture": 0.4, + "film": 0.3, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.0, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.4, + "culture": 0.3, + "film": 0.6, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.2, + "literature": 0.3, + "politics": 0.4, + "culture": 0.4, + "film": 0.3, + "law": 0.2, + "geography": 0.7 + } + }, + "prompt_5": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.4, + "politics": 0.5, + "culture": 0.4, + "film": 0.4, + "law": 0.3, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.13552946305798164 + }, + "prompt_2": { + "bleu_score": 0.13606630561200056 + }, + "prompt_3": { + "bleu_score": 0.1234834484703555 + }, + "prompt_4": { + "bleu_score": 0.14231527496434374 + }, + "prompt_5": { + "bleu_score": 0.10838259129294284 + } }, "indommlu": { "prompt_1": -1, @@ -9862,179 +84459,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1789026717487701 + }, + "prompt_2": { + "bleu_score": 0.2157008166227879 + }, + "prompt_3": { + "bleu_score": 0.19552290431281896 + }, + "prompt_4": { + "bleu_score": 0.212579965134183 + }, + "prompt_5": { + "bleu_score": 0.16842225525851923 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1526949174992 + }, + "prompt_2": { + "bleu_score": 0.16024595393231858 + }, + "prompt_3": { + "bleu_score": 0.15108897782891365 + }, + "prompt_4": { + "bleu_score": 0.17204869696628058 + }, + "prompt_5": { + "bleu_score": 0.13507216111669612 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1306177582732538 + }, + "prompt_2": { + "bleu_score": 0.13441143088775806 + }, + "prompt_3": { + "bleu_score": 0.12133060307579903 + }, + "prompt_4": { + "bleu_score": 0.13621806733716263 + }, + "prompt_5": { + "bleu_score": 0.1010815321821953 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.15494052058307634 + }, + "prompt_2": { + "bleu_score": 0.16691562991577336 + }, + "prompt_3": { + "bleu_score": 0.15775901356075947 + }, + "prompt_4": { + "bleu_score": 0.1683834541429431 + }, + "prompt_5": { + "bleu_score": 0.14110094385633967 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4725787631271879 + }, + "prompt_2": { + "accuracy": 0.456242707117853 + }, + "prompt_3": { + "accuracy": 0.45274212368728123 + }, + "prompt_4": { + "accuracy": 0.47024504084014 + }, + "prompt_5": { + "accuracy": 0.4282380396732789 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4410439756882374, + "category_acc": { + "high_school_european_history": 0.5975609756097561, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.5113636363636364, + "medical_genetics": 0.5050505050505051, + "high_school_us_history": 0.6551724137931034, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.652542372881356, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.4430379746835443, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.5048543689320388, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.43870967741935485, + "professional_medicine": 0.4317343173431734, + "nutrition": 0.49508196721311476, + "global_facts": 0.2828282828282828, + "machine_learning": 0.34234234234234234, + "security_studies": 0.4139344262295082, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.4026186579378069, + "prehistory": 0.46439628482972134, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.43846153846153846, + "college_medicine": 0.4418604651162791, + "high_school_government_and_politics": 0.6041666666666666, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.5740740740740741, + "high_school_geography": 0.5126903553299492, + "elementary_mathematics": 0.32625994694960214, + "human_aging": 0.4954954954954955, + "college_mathematics": 0.20202020202020202, + "high_school_psychology": 0.5974264705882353, + "formal_logic": 0.336, + "high_school_statistics": 0.2651162790697674, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.6457800511508951, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.6952789699570815, + "professional_law": 0.32811480756686234, + "management": 0.6568627450980392, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.6588235294117647, + "sociology": 0.655, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.4473007712082262, + "computer_security": 0.5757575757575758, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.43768115942028984, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.46357615894039733, + "college_biology": 0.5034965034965035 + } + }, + "prompt_2": { + "accuracy": 0.44533428673578834, + "category_acc": { + "high_school_european_history": 0.6402439024390244, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5050505050505051, + "high_school_us_history": 0.6108374384236454, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.6398305084745762, + "virology": 0.3393939393939394, + "high_school_microeconomics": 0.42616033755274263, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.26262626262626265, + "high_school_biology": 0.5533980582524272, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3202846975088968, + "philosophy": 0.4838709677419355, + "professional_medicine": 0.3800738007380074, + "nutrition": 0.4852459016393443, + "global_facts": 0.32323232323232326, + "machine_learning": 0.2972972972972973, + "security_studies": 0.4098360655737705, + "public_relations": 0.48623853211009177, + "professional_psychology": 0.45171849427168576, + "prehistory": 0.4582043343653251, + "anatomy": 0.40298507462686567, + "human_sexuality": 0.43846153846153846, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.6197916666666666, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.5679012345679012, + "high_school_geography": 0.5634517766497462, + "elementary_mathematics": 0.35013262599469497, + "human_aging": 0.536036036036036, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.6029411764705882, + "formal_logic": 0.328, + "high_school_statistics": 0.33488372093023255, + "international_law": 0.575, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.5252525252525253, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.6214833759590793, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.6995708154506438, + "professional_law": 0.345075016307893, + "management": 0.6176470588235294, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.6352941176470588, + "sociology": 0.68, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.4087403598971722, + "computer_security": 0.5454545454545454, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.46956521739130436, + "electrical_engineering": 0.3541666666666667, + "astronomy": 0.48344370860927155, + "college_biology": 0.4825174825174825 + } + }, + "prompt_3": { + "accuracy": 0.44390418305327134, + "category_acc": { + "high_school_european_history": 0.6463414634146342, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.5151515151515151, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.5615763546798029, + "high_school_physics": 0.24, + "high_school_world_history": 0.690677966101695, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.45147679324894513, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.49514563106796117, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3701067615658363, + "philosophy": 0.43870967741935485, + "professional_medicine": 0.36900369003690037, + "nutrition": 0.4819672131147541, + "global_facts": 0.24242424242424243, + "machine_learning": 0.36936936936936937, + "security_studies": 0.45491803278688525, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.44680851063829785, + "prehistory": 0.46439628482972134, + "anatomy": 0.3805970149253731, + "human_sexuality": 0.46923076923076923, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.671875, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.47530864197530864, + "high_school_geography": 0.5076142131979695, + "elementary_mathematics": 0.33156498673740054, + "human_aging": 0.4864864864864865, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.6102941176470589, + "formal_logic": 0.304, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.5, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.5353535353535354, + "conceptual_physics": 0.37606837606837606, + "miscellaneous": 0.6214833759590793, + "high_school_chemistry": 0.3564356435643564, + "marketing": 0.6995708154506438, + "professional_law": 0.35812133072407043, + "management": 0.6274509803921569, + "college_physics": 0.3564356435643564, + "jurisprudence": 0.4953271028037383, + "world_religions": 0.6294117647058823, + "sociology": 0.68, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.4241645244215938, + "computer_security": 0.5757575757575758, + "moral_scenarios": 0.2483221476510067, + "moral_disputes": 0.4463768115942029, + "electrical_engineering": 0.3333333333333333, + "astronomy": 0.4503311258278146, + "college_biology": 0.4125874125874126 + } + }, + "prompt_4": { + "accuracy": 0.4454057919199142, + "category_acc": { + "high_school_european_history": 0.5487804878048781, + "business_ethics": 0.5050505050505051, + "clinical_knowledge": 0.48863636363636365, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.6600985221674877, + "high_school_physics": 0.3, + "high_school_world_history": 0.635593220338983, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.4177215189873418, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.5372168284789643, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.34519572953736655, + "philosophy": 0.43870967741935485, + "professional_medicine": 0.3800738007380074, + "nutrition": 0.4524590163934426, + "global_facts": 0.30303030303030304, + "machine_learning": 0.34234234234234234, + "security_studies": 0.45901639344262296, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.40589198036006546, + "prehistory": 0.5232198142414861, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.436046511627907, + "high_school_government_and_politics": 0.59375, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5308641975308642, + "high_school_geography": 0.5736040609137056, + "elementary_mathematics": 0.35013262599469497, + "human_aging": 0.481981981981982, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.6617647058823529, + "formal_logic": 0.312, + "high_school_statistics": 0.27906976744186046, + "international_law": 0.575, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.37373737373737376, + "conceptual_physics": 0.37606837606837606, + "miscellaneous": 0.629156010230179, + "high_school_chemistry": 0.29207920792079206, + "marketing": 0.6952789699570815, + "professional_law": 0.3607305936073059, + "management": 0.6078431372549019, + "college_physics": 0.297029702970297, + "jurisprudence": 0.5327102803738317, + "world_religions": 0.6058823529411764, + "sociology": 0.58, + "us_foreign_policy": 0.6161616161616161, + "high_school_macroeconomics": 0.39331619537275064, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.26174496644295303, + "moral_disputes": 0.4956521739130435, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.423841059602649, + "college_biology": 0.4755244755244755 + } + }, + "prompt_5": { + "accuracy": 0.408509116910976, + "category_acc": { + "high_school_european_history": 0.6036585365853658, + "business_ethics": 0.45454545454545453, + "clinical_knowledge": 0.3977272727272727, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.5665024630541872, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.6398305084745762, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.3881856540084388, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.49514563106796117, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.3274021352313167, + "philosophy": 0.4096774193548387, + "professional_medicine": 0.42066420664206644, + "nutrition": 0.4131147540983607, + "global_facts": 0.30303030303030304, + "machine_learning": 0.27927927927927926, + "security_studies": 0.430327868852459, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.4055727554179567, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.46153846153846156, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.4947916666666667, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.5308641975308642, + "high_school_geography": 0.5177664974619289, + "elementary_mathematics": 0.32625994694960214, + "human_aging": 0.45495495495495497, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.5551470588235294, + "formal_logic": 0.232, + "high_school_statistics": 0.3302325581395349, + "international_law": 0.5, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3504273504273504, + "miscellaneous": 0.5549872122762148, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.5836909871244635, + "professional_law": 0.345075016307893, + "management": 0.5294117647058824, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.411214953271028, + "world_religions": 0.5705882352941176, + "sociology": 0.51, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.38817480719794345, + "computer_security": 0.40404040404040403, + "moral_scenarios": 0.2684563758389262, + "moral_disputes": 0.42028985507246375, + "electrical_engineering": 0.375, + "astronomy": 0.3973509933774834, + "college_biology": 0.4125874125874126 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4561664190193165 + }, + "prompt_2": { + "accuracy": 0.4658246656760773 + }, + "prompt_3": { + "accuracy": 0.44799405646359586 + }, + "prompt_4": { + "accuracy": 0.4197622585438336 + }, + "prompt_5": { + "accuracy": 0.45170876671619614 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4539227895392279, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.35714285714285715, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.64, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.4, + "business_administration": 0.3157894736842105, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.5, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.7083333333333334, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.7692307692307693, + "middle_school_geography": 0.7058823529411765, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.7083333333333334, + "logic": 0.5185185185185185, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.5, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.5294117647058824, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.72, + "middle_school_history": 0.8148148148148148, + "civil_servant": 0.5, + "sports_science": 0.4583333333333333, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.375, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.2777777777777778, + "physician": 0.46296296296296297 + } + }, + "prompt_2": { + "accuracy": 0.4489414694894147, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.35714285714285715, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.375, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.7692307692307693, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.5, + "college_economics": 0.2833333333333333, + "business_administration": 0.3684210526315789, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6206896551724138, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.7083333333333334, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.7692307692307693, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.7083333333333334, + "logic": 0.4444444444444444, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.6764705882352942, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.375, + "high_school_history": 0.64, + "middle_school_history": 0.7777777777777778, + "civil_servant": 0.36538461538461536, + "sports_science": 0.5833333333333334, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.2777777777777778, + "physician": 0.6481481481481481 + } + }, + "prompt_3": { + "accuracy": 0.42528019925280197, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.25, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.35714285714285715, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.6, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.3333333333333333, + "business_administration": 0.2894736842105263, + "marxism": 0.6666666666666666, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.6666666666666666, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.48148148148148145, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.5, + "art_studies": 0.5263157894736842, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.64, + "middle_school_history": 0.7037037037037037, + "civil_servant": 0.38461538461538464, + "sports_science": 0.4583333333333333, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.42592592592592593, + "fire_engineer": 0.5277777777777778, + "environmental_impact_assessment_engineer": 0.5833333333333334, + "tax_accountant": 0.35185185185185186, + "physician": 0.42592592592592593 + } + }, + "prompt_4": { + "accuracy": 0.4178082191780822, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.125, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.30952380952380953, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.375, + "high_school_biology": 0.5, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.64, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.35, + "business_administration": 0.18421052631578946, + "marxism": 0.625, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.6666666666666666, + "high_school_geography": 0.5, + "middle_school_politics": 0.8461538461538461, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.48148148148148145, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.5526315789473685, + "professional_tour_guide": 0.5, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.68, + "middle_school_history": 0.7037037037037037, + "civil_servant": 0.2692307692307692, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.46296296296296297, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.3148148148148148, + "physician": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.43897882938978827, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.30952380952380953, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.5833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.31666666666666665, + "business_administration": 0.2894736842105263, + "marxism": 0.75, + "mao_zedong_thought": 0.6896551724137931, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.7083333333333334, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.6923076923076923, + "middle_school_geography": 0.7647058823529411, + "modern_chinese_history": 0.5357142857142857, + "ideological_and_moral_cultivation": 0.7083333333333334, + "logic": 0.4444444444444444, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.5, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.68, + "middle_school_history": 0.7777777777777778, + "civil_servant": 0.46153846153846156, + "sports_science": 0.3333333333333333, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.37037037037037035, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.35185185185185186, + "physician": 0.35185185185185186 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.44086021505376344 + }, + "prompt_2": { + "accuracy": 0.41935483870967744 + }, + "prompt_3": { + "accuracy": 0.4444444444444444 + }, + "prompt_4": { + "accuracy": 0.4444444444444444 + }, + "prompt_5": { + "accuracy": 0.4336917562724014 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.422465895354861, + "category_acc": { + "agronomy": 0.34911242603550297, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.23170731707317074, + "arts": 0.49375, + "astronomy": 0.3212121212121212, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.5190839694656488, + "chinese_food_culture": 0.3897058823529412, + "chinese_foreign_policy": 0.5794392523364486, + "chinese_history": 0.5820433436532507, + "chinese_literature": 0.36764705882352944, + "chinese_teacher_qualification": 0.5642458100558659, + "clinical_knowledge": 0.3881856540084388, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.5420560747663551, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.39814814814814814, + "college_mathematics": 0.19047619047619047, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.3553113553113553, + "computer_science": 0.4117647058823529, + "computer_security": 0.4444444444444444, + "conceptual_physics": 0.42857142857142855, + "construction_project_management": 0.37410071942446044, + "economics": 0.44025157232704404, + "education": 0.44785276073619634, + "electrical_engineering": 0.3430232558139535, + "elementary_chinese": 0.36507936507936506, + "elementary_commonsense": 0.47474747474747475, + "elementary_information_and_technology": 0.5966386554621849, + "elementary_mathematics": 0.3, + "ethnology": 0.4740740740740741, + "food_science": 0.44755244755244755, + "genetics": 0.32386363636363635, + "global_facts": 0.47651006711409394, + "high_school_biology": 0.40236686390532544, + "high_school_chemistry": 0.3409090909090909, + "high_school_geography": 0.3983050847457627, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.4965034965034965, + "human_sexuality": 0.3412698412698413, + "international_law": 0.41621621621621624, + "journalism": 0.42441860465116277, + "jurisprudence": 0.4793187347931874, + "legal_and_moral_basis": 0.719626168224299, + "logical": 0.3333333333333333, + "machine_learning": 0.36065573770491804, + "management": 0.5142857142857142, + "marketing": 0.5333333333333333, + "marxist_theory": 0.5185185185185185, + "modern_chinese": 0.33620689655172414, + "nutrition": 0.36551724137931035, + "philosophy": 0.5142857142857142, + "professional_accounting": 0.4228571428571429, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.35904255319148937, + "professional_psychology": 0.47844827586206895, + "public_relations": 0.4367816091954023, + "security_study": 0.5111111111111111, + "sociology": 0.42920353982300885, + "sports_science": 0.36363636363636365, + "traditional_chinese_medicine": 0.372972972972973, + "virology": 0.4556213017751479, + "world_history": 0.5527950310559007, + "world_religions": 0.525 + } + }, + "prompt_2": { + "accuracy": 0.4338628906924538, + "category_acc": { + "agronomy": 0.4260355029585799, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2621951219512195, + "arts": 0.54375, + "astronomy": 0.28484848484848485, + "business_ethics": 0.4449760765550239, + "chinese_civil_service_exam": 0.41875, + "chinese_driving_rule": 0.5190839694656488, + "chinese_food_culture": 0.40441176470588236, + "chinese_foreign_policy": 0.5700934579439252, + "chinese_history": 0.5851393188854489, + "chinese_literature": 0.3431372549019608, + "chinese_teacher_qualification": 0.5921787709497207, + "clinical_knowledge": 0.3459915611814346, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5981308411214953, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.3611111111111111, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.41025641025641024, + "computer_science": 0.4166666666666667, + "computer_security": 0.5555555555555556, + "conceptual_physics": 0.4421768707482993, + "construction_project_management": 0.37410071942446044, + "economics": 0.41509433962264153, + "education": 0.48466257668711654, + "electrical_engineering": 0.46511627906976744, + "elementary_chinese": 0.36904761904761907, + "elementary_commonsense": 0.5050505050505051, + "elementary_information_and_technology": 0.5882352941176471, + "elementary_mathematics": 0.30869565217391304, + "ethnology": 0.48148148148148145, + "food_science": 0.4195804195804196, + "genetics": 0.3693181818181818, + "global_facts": 0.5033557046979866, + "high_school_biology": 0.46745562130177515, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.4491525423728814, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.46853146853146854, + "human_sexuality": 0.3968253968253968, + "international_law": 0.31351351351351353, + "journalism": 0.4476744186046512, + "jurisprudence": 0.48175182481751827, + "legal_and_moral_basis": 0.7476635514018691, + "logical": 0.34959349593495936, + "machine_learning": 0.38524590163934425, + "management": 0.49523809523809526, + "marketing": 0.4666666666666667, + "marxist_theory": 0.5396825396825397, + "modern_chinese": 0.3448275862068966, + "nutrition": 0.4206896551724138, + "philosophy": 0.5238095238095238, + "professional_accounting": 0.46285714285714286, + "professional_law": 0.3791469194312796, + "professional_medicine": 0.2898936170212766, + "professional_psychology": 0.4870689655172414, + "public_relations": 0.42528735632183906, + "security_study": 0.4666666666666667, + "sociology": 0.4823008849557522, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.33513513513513515, + "virology": 0.4319526627218935, + "world_history": 0.577639751552795, + "world_religions": 0.5125 + } + }, + "prompt_3": { + "accuracy": 0.40390260749438783, + "category_acc": { + "agronomy": 0.35502958579881655, + "anatomy": 0.32432432432432434, + "ancient_chinese": 0.25609756097560976, + "arts": 0.5125, + "astronomy": 0.3575757575757576, + "business_ethics": 0.40669856459330145, + "chinese_civil_service_exam": 0.4125, + "chinese_driving_rule": 0.42748091603053434, + "chinese_food_culture": 0.4485294117647059, + "chinese_foreign_policy": 0.5327102803738317, + "chinese_history": 0.5232198142414861, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.547486033519553, + "clinical_knowledge": 0.3755274261603376, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5046728971962616, + "college_engineering_hydrology": 0.3490566037735849, + "college_law": 0.4074074074074074, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.36792452830188677, + "college_medicine": 0.3626373626373626, + "computer_science": 0.4166666666666667, + "computer_security": 0.5380116959064327, + "conceptual_physics": 0.43537414965986393, + "construction_project_management": 0.381294964028777, + "economics": 0.36477987421383645, + "education": 0.5276073619631901, + "electrical_engineering": 0.3546511627906977, + "elementary_chinese": 0.3134920634920635, + "elementary_commonsense": 0.4696969696969697, + "elementary_information_and_technology": 0.5756302521008403, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.43703703703703706, + "food_science": 0.38461538461538464, + "genetics": 0.3522727272727273, + "global_facts": 0.4697986577181208, + "high_school_biology": 0.378698224852071, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.36507936507936506, + "international_law": 0.31891891891891894, + "journalism": 0.4127906976744186, + "jurisprudence": 0.41362530413625304, + "legal_and_moral_basis": 0.6401869158878505, + "logical": 0.34146341463414637, + "machine_learning": 0.3114754098360656, + "management": 0.49523809523809526, + "marketing": 0.4777777777777778, + "marxist_theory": 0.4708994708994709, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.3793103448275862, + "philosophy": 0.5142857142857142, + "professional_accounting": 0.4, + "professional_law": 0.3175355450236967, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.46120689655172414, + "public_relations": 0.43103448275862066, + "security_study": 0.48148148148148145, + "sociology": 0.37168141592920356, + "sports_science": 0.3939393939393939, + "traditional_chinese_medicine": 0.33513513513513515, + "virology": 0.42011834319526625, + "world_history": 0.5031055900621118, + "world_religions": 0.525 + } + }, + "prompt_4": { + "accuracy": 0.40649283370747713, + "category_acc": { + "agronomy": 0.41420118343195267, + "anatomy": 0.32432432432432434, + "ancient_chinese": 0.23170731707317074, + "arts": 0.4875, + "astronomy": 0.2909090909090909, + "business_ethics": 0.47368421052631576, + "chinese_civil_service_exam": 0.3, + "chinese_driving_rule": 0.48091603053435117, + "chinese_food_culture": 0.4338235294117647, + "chinese_foreign_policy": 0.5607476635514018, + "chinese_history": 0.4953560371517028, + "chinese_literature": 0.3431372549019608, + "chinese_teacher_qualification": 0.43575418994413406, + "clinical_knowledge": 0.35443037974683544, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.6261682242990654, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.32407407407407407, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.3626373626373626, + "computer_science": 0.3431372549019608, + "computer_security": 0.49122807017543857, + "conceptual_physics": 0.4557823129251701, + "construction_project_management": 0.3597122302158273, + "economics": 0.4339622641509434, + "education": 0.4171779141104294, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.3492063492063492, + "elementary_commonsense": 0.46464646464646464, + "elementary_information_and_technology": 0.5546218487394958, + "elementary_mathematics": 0.30434782608695654, + "ethnology": 0.4444444444444444, + "food_science": 0.3776223776223776, + "genetics": 0.3409090909090909, + "global_facts": 0.4966442953020134, + "high_school_biology": 0.38461538461538464, + "high_school_chemistry": 0.3106060606060606, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.42063492063492064, + "international_law": 0.3945945945945946, + "journalism": 0.4186046511627907, + "jurisprudence": 0.40145985401459855, + "legal_and_moral_basis": 0.7149532710280374, + "logical": 0.42276422764227645, + "machine_learning": 0.32786885245901637, + "management": 0.44761904761904764, + "marketing": 0.45, + "marxist_theory": 0.5026455026455027, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.32413793103448274, + "philosophy": 0.45714285714285713, + "professional_accounting": 0.4342857142857143, + "professional_law": 0.3080568720379147, + "professional_medicine": 0.2978723404255319, + "professional_psychology": 0.4698275862068966, + "public_relations": 0.4367816091954023, + "security_study": 0.4444444444444444, + "sociology": 0.4424778761061947, + "sports_science": 0.40606060606060607, + "traditional_chinese_medicine": 0.35135135135135137, + "virology": 0.46745562130177515, + "world_history": 0.515527950310559, + "world_religions": 0.4875 + } + }, + "prompt_5": { + "accuracy": 0.4209981005007771, + "category_acc": { + "agronomy": 0.40828402366863903, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.2926829268292683, + "arts": 0.525, + "astronomy": 0.3333333333333333, + "business_ethics": 0.3875598086124402, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.5648854961832062, + "chinese_food_culture": 0.39705882352941174, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.5325077399380805, + "chinese_literature": 0.38235294117647056, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.3881856540084388, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.5700934579439252, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.32407407407407407, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.34798534798534797, + "computer_science": 0.4166666666666667, + "computer_security": 0.5029239766081871, + "conceptual_physics": 0.46938775510204084, + "construction_project_management": 0.3669064748201439, + "economics": 0.44654088050314467, + "education": 0.5276073619631901, + "electrical_engineering": 0.36046511627906974, + "elementary_chinese": 0.4007936507936508, + "elementary_commonsense": 0.46464646464646464, + "elementary_information_and_technology": 0.6092436974789915, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.4222222222222222, + "food_science": 0.4755244755244755, + "genetics": 0.30113636363636365, + "global_facts": 0.44966442953020136, + "high_school_biology": 0.4319526627218935, + "high_school_chemistry": 0.20454545454545456, + "high_school_geography": 0.4491525423728814, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.4405594405594406, + "human_sexuality": 0.4603174603174603, + "international_law": 0.3837837837837838, + "journalism": 0.45930232558139533, + "jurisprudence": 0.45985401459854014, + "legal_and_moral_basis": 0.7336448598130841, + "logical": 0.34959349593495936, + "machine_learning": 0.3360655737704918, + "management": 0.4666666666666667, + "marketing": 0.4111111111111111, + "marxist_theory": 0.5238095238095238, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.41379310344827586, + "philosophy": 0.5238095238095238, + "professional_accounting": 0.44, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.3351063829787234, + "professional_psychology": 0.4353448275862069, + "public_relations": 0.41954022988505746, + "security_study": 0.5111111111111111, + "sociology": 0.4469026548672566, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.34594594594594597, + "virology": 0.4260355029585799, + "world_history": 0.4906832298136646, + "world_religions": 0.54375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.2727272727272727 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.30303030303030304 + }, + "prompt_5": { + "accuracy": 0.3333333333333333 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3 + }, + "prompt_2": { + "accuracy": 0.3340909090909091 + }, + "prompt_3": { + "accuracy": 0.275 + }, + "prompt_4": { + "accuracy": 0.33181818181818185 + }, + "prompt_5": { + "accuracy": 0.3477272727272727 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3783050847457627 + }, + "prompt_2": { + "accuracy": 0.3644067796610169 + }, + "prompt_3": { + "accuracy": 0.3644067796610169 + }, + "prompt_4": { + "accuracy": 0.36 + }, + "prompt_5": { + "accuracy": 0.3742372881355932 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6925953627524308 + }, + "prompt_2": { + "accuracy": 0.6963350785340314 + }, + "prompt_3": { + "accuracy": 0.6451009723261032 + }, + "prompt_4": { + "accuracy": 0.6914734480179506 + }, + "prompt_5": { + "accuracy": 0.6888556469708302 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7643312101910829 + }, + "prompt_2": { + "accuracy": 0.7966682998530132 + }, + "prompt_3": { + "accuracy": 0.7780499755022048 + }, + "prompt_4": { + "accuracy": 0.7824595786379226 + }, + "prompt_5": { + "accuracy": 0.7922586967172954 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.310984189342734, + "rouge2": 0.11277591319287593, + "rougeL": 0.2344649678227497, + "avg_rouge": 0.2194083567861199 + }, + "prompt_2": { + "rouge1": 0.3378689869851193, + "rouge2": 0.11983837287814116, + "rougeL": 0.2568386212267886, + "avg_rouge": 0.238181993696683 + }, + "prompt_3": { + "rouge1": 0.33510609136858355, + "rouge2": 0.12190646061433955, + "rougeL": 0.25356291166759737, + "avg_rouge": 0.23685848788350683 + }, + "prompt_4": { + "rouge1": 0.3182273201663158, + "rouge2": 0.11322582699522639, + "rougeL": 0.2390115581349066, + "avg_rouge": 0.22348823509881624 + }, + "prompt_5": { + "rouge1": 0.3220512127742751, + "rouge2": 0.11844647141620766, + "rougeL": 0.24463900073747138, + "avg_rouge": 0.22837889497598474 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2312965879250093, + "rouge2": 0.06300906409182609, + "rougeL": 0.1709453426416082, + "avg_rouge": 0.15508366488614786 + }, + "prompt_2": { + "rouge1": 0.23080698103993752, + "rouge2": 0.06298568156964322, + "rougeL": 0.170218758804644, + "avg_rouge": 0.15467047380474158 + }, + "prompt_3": { + "rouge1": 0.2296269587467571, + "rouge2": 0.06210138505787306, + "rougeL": 0.1690750420848352, + "avg_rouge": 0.15360112862982178 + }, + "prompt_4": { + "rouge1": 0.229663402098225, + "rouge2": 0.061754836084144796, + "rougeL": 0.16990711114664012, + "avg_rouge": 0.15377511644300332 + }, + "prompt_5": { + "rouge1": 0.22773196288175207, + "rouge2": 0.06142275970581499, + "rougeL": 0.1682093877229666, + "avg_rouge": 0.15245470343684456 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8681192660550459 + }, + "prompt_2": { + "accuracy": 0.8474770642201835 + }, + "prompt_3": { + "accuracy": 0.8211009174311926 + }, + "prompt_4": { + "accuracy": 0.8337155963302753 + }, + "prompt_5": { + "accuracy": 0.8658256880733946 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7171620325982742 + }, + "prompt_2": { + "accuracy": 0.6912751677852349 + }, + "prompt_3": { + "accuracy": 0.7200383509108341 + }, + "prompt_4": { + "accuracy": 0.7248322147651006 + }, + "prompt_5": { + "accuracy": 0.7152444870565676 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.703 + }, + "prompt_2": { + "accuracy": 0.6935 + }, + "prompt_3": { + "accuracy": 0.651 + }, + "prompt_4": { + "accuracy": 0.6975 + }, + "prompt_5": { + "accuracy": 0.698 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5825 + }, + "prompt_2": { + "accuracy": 0.5965 + }, + "prompt_3": { + "accuracy": 0.586 + }, + "prompt_4": { + "accuracy": 0.59 + }, + "prompt_5": { + "accuracy": 0.5695 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7365 + }, + "prompt_2": { + "accuracy": 0.7295 + }, + "prompt_3": { + "accuracy": 0.7485 + }, + "prompt_4": { + "accuracy": 0.7145 + }, + "prompt_5": { + "accuracy": 0.744 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4788732394366197 + }, + "prompt_2": { + "accuracy": 0.49295774647887325 + }, + "prompt_3": { + "accuracy": 0.5774647887323944 + }, + "prompt_4": { + "accuracy": 0.5070422535211268 + }, + "prompt_5": { + "accuracy": 0.5915492957746479 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6787003610108303 + }, + "prompt_2": { + "accuracy": 0.6787003610108303 + }, + "prompt_3": { + "accuracy": 0.703971119133574 + }, + "prompt_4": { + "accuracy": 0.6678700361010831 + }, + "prompt_5": { + "accuracy": 0.6750902527075813 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.571078431372549 + }, + "prompt_2": { + "accuracy": 0.6053921568627451 + }, + "prompt_3": { + "accuracy": 0.6495098039215687 + }, + "prompt_4": { + "accuracy": 0.5563725490196079 + }, + "prompt_5": { + "accuracy": 0.5906862745098039 + } } }, "five_shot": { @@ -10144,53 +85931,1733 @@ "model_link": "https://huggingface.co/lmsys/fastchat-t5-3b-v1.0", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3485714285714285, + "language_acc": { + "Malay": 0.32, + "English": 0.5333333333333333, + "Vietnamese": 0.24666666666666667, + "Spanish": 0.42, + "Indonesian": 0.35333333333333333, + "Filipino": 0.26, + "Chinese": 0.30666666666666664 + }, + "consistency_score_2": 0.4682539682539682, + "consistency_score_3": 0.2803809523809524, + "consistency_score_4": 0.1899047619047619, + "consistency_score_5": 0.13904761904761903, + "consistency_score_6": 0.10761904761904761, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.5333333333333333, + "Malay,Indonesian": 0.58, + "Malay,Filipino": 0.4866666666666667, + "Malay,Chinese": 0.3, + "English,Vietnamese": 0.38, + "English,Spanish": 0.5666666666666667, + "English,Indonesian": 0.5, + "English,Filipino": 0.4666666666666667, + "English,Chinese": 0.34, + "Vietnamese,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian": 0.52, + "Vietnamese,Filipino": 0.4533333333333333, + "Vietnamese,Chinese": 0.41333333333333333, + "Spanish,Indonesian": 0.6, + "Spanish,Filipino": 0.54, + "Spanish,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.4866666666666667, + "Indonesian,Chinese": 0.4666666666666667, + "Filipino,Chinese": 0.41333333333333333 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.3466666666666667, + "Malay,English,Indonesian": 0.36, + "Malay,English,Filipino": 0.29333333333333333, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.3, + "Malay,Vietnamese,Indonesian": 0.3466666666666667, + "Malay,Vietnamese,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Chinese": 0.18666666666666668, + "Malay,Spanish,Indonesian": 0.41333333333333333, + "Malay,Spanish,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino": 0.3466666666666667, + "Malay,Indonesian,Chinese": 0.24666666666666667, + "Malay,Filipino,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.28, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian": 0.38666666666666666, + "English,Spanish,Filipino": 0.34, + "English,Spanish,Chinese": 0.24666666666666667, + "English,Indonesian,Filipino": 0.2866666666666667, + "English,Indonesian,Chinese": 0.24666666666666667, + "English,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian": 0.3466666666666667, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Spanish,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Chinese": 0.28, + "Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino": 0.3466666666666667, + "Spanish,Indonesian,Chinese": 0.3, + "Spanish,Filipino,Chinese": 0.25333333333333335, + "Indonesian,Filipino,Chinese": 0.26 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.22, + "Malay,English,Vietnamese,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Chinese": 0.12666666666666668, + "Malay,English,Spanish,Indonesian": 0.29333333333333333, + "Malay,English,Spanish,Filipino": 0.24, + "Malay,English,Spanish,Chinese": 0.15333333333333332, + "Malay,English,Indonesian,Filipino": 0.22666666666666666, + "Malay,English,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.2, + "Malay,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Malay,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "Malay,Spanish,Indonesian,Filipino": 0.26666666666666666, + "Malay,Spanish,Indonesian,Chinese": 0.18, + "Malay,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,Indonesian,Filipino,Chinese": 0.16, + "English,Vietnamese,Spanish,Indonesian": 0.22, + "English,Vietnamese,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Filipino,Chinese": 0.14, + "English,Spanish,Indonesian,Filipino": 0.24666666666666667, + "English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "English,Spanish,Filipino,Chinese": 0.18, + "English,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Vietnamese,Spanish,Indonesian,Chinese": 0.2, + "Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18666666666666668, + "Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18, + "Malay,English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.2, + "Malay,English,Spanish,Indonesian,Chinese": 0.14, + "Malay,English,Spanish,Filipino,Chinese": 0.12666666666666668, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.14, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.16, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + } + }, + "AC3_2": 0.39964466154676487, + "AC3_3": 0.3107796140372884, + "AC3_4": 0.24586184239205908, + "AC3_5": 0.19879464281637438, + "AC3_6": 0.16446167607489431, + "AC3_7": 0.13881838071208766 + }, + "prompt_2": { + "overall_acc": 0.3380952380952381, + "language_acc": { + "Malay": 0.3, + "English": 0.5533333333333333, + "Vietnamese": 0.23333333333333334, + "Spanish": 0.41333333333333333, + "Indonesian": 0.34, + "Filipino": 0.24, + "Chinese": 0.2866666666666667 + }, + "consistency_score_2": 0.47619047619047616, + "consistency_score_3": 0.28685714285714287, + "consistency_score_4": 0.1971428571428571, + "consistency_score_5": 0.14825396825396828, + "consistency_score_6": 0.11904761904761905, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5, + "Malay,Vietnamese": 0.47333333333333333, + "Malay,Spanish": 0.58, + "Malay,Indonesian": 0.68, + "Malay,Filipino": 0.58, + "Malay,Chinese": 0.3466666666666667, + "English,Vietnamese": 0.38, + "English,Spanish": 0.5533333333333333, + "English,Indonesian": 0.4866666666666667, + "English,Filipino": 0.4533333333333333, + "English,Chinese": 0.34, + "Vietnamese,Spanish": 0.4, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Filipino": 0.46, + "Vietnamese,Chinese": 0.4266666666666667, + "Spanish,Indonesian": 0.5666666666666667, + "Spanish,Filipino": 0.5466666666666666, + "Spanish,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.4866666666666667, + "Indonesian,Chinese": 0.48, + "Filipino,Chinese": 0.4066666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.26666666666666666, + "Malay,English,Spanish": 0.36, + "Malay,English,Indonesian": 0.37333333333333335, + "Malay,English,Filipino": 0.32666666666666666, + "Malay,English,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish": 0.30666666666666664, + "Malay,Vietnamese,Indonesian": 0.37333333333333335, + "Malay,Vietnamese,Filipino": 0.32666666666666666, + "Malay,Vietnamese,Chinese": 0.22, + "Malay,Spanish,Indonesian": 0.44, + "Malay,Spanish,Filipino": 0.3933333333333333, + "Malay,Spanish,Chinese": 0.22666666666666666, + "Malay,Indonesian,Filipino": 0.4066666666666667, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish": 0.24, + "English,Vietnamese,Indonesian": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.19333333333333333, + "English,Spanish,Indonesian": 0.35333333333333333, + "English,Spanish,Filipino": 0.32666666666666666, + "English,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino": 0.28, + "English,Indonesian,Chinese": 0.24, + "English,Filipino,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.26666666666666666, + "Vietnamese,Spanish,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Spanish,Indonesian,Filipino": 0.34, + "Spanish,Indonesian,Chinese": 0.28, + "Spanish,Filipino,Chinese": 0.25333333333333335, + "Indonesian,Filipino,Chinese": 0.26 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.21333333333333335, + "Malay,English,Vietnamese,Filipino": 0.19333333333333333, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.30666666666666664, + "Malay,English,Spanish,Filipino": 0.25333333333333335, + "Malay,English,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino": 0.24, + "Malay,English,Indonesian,Chinese": 0.18, + "Malay,English,Filipino,Chinese": 0.16666666666666666, + "Malay,Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Malay,Vietnamese,Spanish,Chinese": 0.16, + "Malay,Vietnamese,Indonesian,Filipino": 0.26, + "Malay,Vietnamese,Indonesian,Chinese": 0.2, + "Malay,Vietnamese,Filipino,Chinese": 0.17333333333333334, + "Malay,Spanish,Indonesian,Filipino": 0.3, + "Malay,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Malay,Spanish,Filipino,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino,Chinese": 0.21333333333333335, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.14, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian,Filipino": 0.23333333333333334, + "English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "English,Spanish,Filipino,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18, + "Spanish,Indonesian,Filipino,Chinese": 0.2 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino,Chinese": 0.12, + "Malay,English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Filipino,Chinese": 0.14666666666666667, + "Malay,English,Indonesian,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.2, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.18, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.12, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.1 + } + }, + "AC3_2": 0.3954330269634124, + "AC3_3": 0.3103757564705608, + "AC3_4": 0.24905948139729062, + "AC3_5": 0.2061233370209031, + "AC3_6": 0.1760912698027499, + "AC3_7": 0.15434782605172495 + }, + "prompt_3": { + "overall_acc": 0.34476190476190477, + "language_acc": { + "Malay": 0.3, + "English": 0.5266666666666666, + "Vietnamese": 0.23333333333333334, + "Spanish": 0.4533333333333333, + "Indonesian": 0.35333333333333333, + "Filipino": 0.26, + "Chinese": 0.2866666666666667 + }, + "consistency_score_2": 0.46158730158730166, + "consistency_score_3": 0.2706666666666666, + "consistency_score_4": 0.18304761904761904, + "consistency_score_5": 0.13650793650793652, + "consistency_score_6": 0.1076190476190476, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.52, + "Malay,Vietnamese": 0.46, + "Malay,Spanish": 0.5266666666666666, + "Malay,Indonesian": 0.68, + "Malay,Filipino": 0.5266666666666666, + "Malay,Chinese": 0.36, + "English,Vietnamese": 0.3933333333333333, + "English,Spanish": 0.5733333333333334, + "English,Indonesian": 0.48, + "English,Filipino": 0.43333333333333335, + "English,Chinese": 0.32666666666666666, + "Vietnamese,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian": 0.4866666666666667, + "Vietnamese,Filipino": 0.43333333333333335, + "Vietnamese,Chinese": 0.36666666666666664, + "Spanish,Indonesian": 0.5466666666666666, + "Spanish,Filipino": 0.5133333333333333, + "Spanish,Chinese": 0.38, + "Indonesian,Filipino": 0.48, + "Indonesian,Chinese": 0.43333333333333335, + "Filipino,Chinese": 0.38 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.36, + "Malay,English,Indonesian": 0.3933333333333333, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.20666666666666667, + "Malay,Vietnamese,Spanish": 0.2866666666666667, + "Malay,Vietnamese,Indonesian": 0.38, + "Malay,Vietnamese,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Chinese": 0.2, + "Malay,Spanish,Indonesian": 0.41333333333333333, + "Malay,Spanish,Filipino": 0.34, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino": 0.38666666666666666, + "Malay,Indonesian,Chinese": 0.29333333333333333, + "Malay,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.2733333333333333, + "English,Vietnamese,Filipino": 0.22666666666666666, + "English,Vietnamese,Chinese": 0.16666666666666666, + "English,Spanish,Indonesian": 0.3333333333333333, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.22666666666666666, + "English,Filipino,Chinese": 0.18666666666666668, + "Vietnamese,Spanish,Indonesian": 0.3, + "Vietnamese,Spanish,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Chinese": 0.18, + "Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Chinese": 0.2, + "Spanish,Indonesian,Filipino": 0.30666666666666664, + "Spanish,Indonesian,Chinese": 0.25333333333333335, + "Spanish,Filipino,Chinese": 0.22666666666666666, + "Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.20666666666666667, + "Malay,English,Vietnamese,Indonesian": 0.24, + "Malay,English,Vietnamese,Filipino": 0.18, + "Malay,English,Vietnamese,Chinese": 0.14, + "Malay,English,Spanish,Indonesian": 0.29333333333333333, + "Malay,English,Spanish,Filipino": 0.22666666666666666, + "Malay,English,Spanish,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino": 0.23333333333333334, + "Malay,English,Indonesian,Chinese": 0.18666666666666668, + "Malay,English,Filipino,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Indonesian": 0.25333333333333335, + "Malay,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "Malay,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Malay,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Filipino,Chinese": 0.14, + "Malay,Spanish,Indonesian,Filipino": 0.2733333333333333, + "Malay,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,Indonesian,Filipino,Chinese": 0.17333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Indonesian,Chinese": 0.17333333333333334, + "English,Spanish,Filipino,Chinese": 0.16, + "English,Indonesian,Filipino,Chinese": 0.14, + "Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Indonesian,Chinese": 0.16, + "Vietnamese,Spanish,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Spanish,Indonesian,Filipino,Chinese": 0.17333333333333334 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Chinese": 0.12, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Malay,English,Vietnamese,Filipino,Chinese": 0.1, + "Malay,English,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.18, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.14, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.12 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + } + }, + "AC3_2": 0.3947116609934193, + "AC3_3": 0.30325389451381146, + "AC3_4": 0.23913113707211728, + "AC3_5": 0.19557733379527129, + "AC3_6": 0.1640340851767724, + "AC3_7": 0.13851361291859324 + }, + "prompt_4": { + "overall_acc": 0.34285714285714286, + "language_acc": { + "Malay": 0.30666666666666664, + "English": 0.54, + "Vietnamese": 0.22666666666666666, + "Spanish": 0.4266666666666667, + "Indonesian": 0.36, + "Filipino": 0.24666666666666667, + "Chinese": 0.29333333333333333 + }, + "consistency_score_2": 0.4898412698412698, + "consistency_score_3": 0.30476190476190473, + "consistency_score_4": 0.21409523809523812, + "consistency_score_5": 0.16253968253968254, + "consistency_score_6": 0.12952380952380954, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.5266666666666666, + "Malay,Vietnamese": 0.48, + "Malay,Spanish": 0.5733333333333334, + "Malay,Indonesian": 0.68, + "Malay,Filipino": 0.5466666666666666, + "Malay,Chinese": 0.4266666666666667, + "English,Vietnamese": 0.37333333333333335, + "English,Spanish": 0.58, + "English,Indonesian": 0.5, + "English,Filipino": 0.44, + "English,Chinese": 0.3333333333333333, + "Vietnamese,Spanish": 0.42, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Filipino": 0.4533333333333333, + "Vietnamese,Chinese": 0.4266666666666667, + "Spanish,Indonesian": 0.5733333333333334, + "Spanish,Filipino": 0.5866666666666667, + "Spanish,Chinese": 0.44666666666666666, + "Indonesian,Filipino": 0.5066666666666667, + "Indonesian,Chinese": 0.49333333333333335, + "Filipino,Chinese": 0.4266666666666667 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.28, + "Malay,English,Spanish": 0.38, + "Malay,English,Indonesian": 0.4, + "Malay,English,Filipino": 0.32, + "Malay,English,Chinese": 0.24, + "Malay,Vietnamese,Spanish": 0.31333333333333335, + "Malay,Vietnamese,Indonesian": 0.38666666666666666, + "Malay,Vietnamese,Filipino": 0.31333333333333335, + "Malay,Vietnamese,Chinese": 0.26, + "Malay,Spanish,Indonesian": 0.4533333333333333, + "Malay,Spanish,Filipino": 0.3933333333333333, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.4066666666666667, + "Malay,Indonesian,Chinese": 0.35333333333333333, + "Malay,Filipino,Chinese": 0.2866666666666667, + "English,Vietnamese,Spanish": 0.24666666666666667, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.23333333333333334, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Spanish,Indonesian": 0.37333333333333335, + "English,Spanish,Filipino": 0.32666666666666666, + "English,Spanish,Chinese": 0.26, + "English,Indonesian,Filipino": 0.29333333333333333, + "English,Indonesian,Chinese": 0.24, + "English,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Indonesian,Filipino": 0.3, + "Vietnamese,Indonesian,Chinese": 0.2866666666666667, + "Vietnamese,Filipino,Chinese": 0.23333333333333334, + "Spanish,Indonesian,Filipino": 0.36666666666666664, + "Spanish,Indonesian,Chinese": 0.32, + "Spanish,Filipino,Chinese": 0.30666666666666664, + "Indonesian,Filipino,Chinese": 0.28 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.21333333333333335, + "Malay,English,Vietnamese,Indonesian": 0.23333333333333334, + "Malay,English,Vietnamese,Filipino": 0.18666666666666668, + "Malay,English,Vietnamese,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Indonesian": 0.32666666666666666, + "Malay,English,Spanish,Filipino": 0.26, + "Malay,English,Spanish,Chinese": 0.20666666666666667, + "Malay,English,Indonesian,Filipino": 0.24666666666666667, + "Malay,English,Indonesian,Chinese": 0.21333333333333335, + "Malay,English,Filipino,Chinese": 0.17333333333333334, + "Malay,Vietnamese,Spanish,Indonesian": 0.2866666666666667, + "Malay,Vietnamese,Spanish,Filipino": 0.24, + "Malay,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Malay,Vietnamese,Indonesian,Filipino": 0.26, + "Malay,Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Malay,Vietnamese,Filipino,Chinese": 0.18, + "Malay,Spanish,Indonesian,Filipino": 0.31333333333333335, + "Malay,Spanish,Indonesian,Chinese": 0.26, + "Malay,Spanish,Filipino,Chinese": 0.24, + "Malay,Indonesian,Filipino,Chinese": 0.23333333333333334, + "English,Vietnamese,Spanish,Indonesian": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "English,Vietnamese,Filipino,Chinese": 0.13333333333333333, + "English,Spanish,Indonesian,Filipino": 0.24, + "English,Spanish,Indonesian,Chinese": 0.20666666666666667, + "English,Spanish,Filipino,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Indonesian,Filipino": 0.24, + "Vietnamese,Spanish,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Spanish,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Indonesian,Filipino,Chinese": 0.19333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.22666666666666666 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.14, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.16, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.14, + "Malay,English,Vietnamese,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Spanish,Indonesian,Filipino": 0.22, + "Malay,English,Spanish,Indonesian,Chinese": 0.19333333333333333, + "Malay,English,Spanish,Filipino,Chinese": 0.16666666666666666, + "Malay,English,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.22, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "English,Spanish,Indonesian,Filipino,Chinese": 0.16, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.13333333333333333, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.11333333333333333, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.15333333333333332, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + } + }, + "AC3_2": 0.40337672235229854, + "AC3_3": 0.3226890755804251, + "AC3_4": 0.2635919483599423, + "AC3_5": 0.2205312275227682, + "AC3_6": 0.18801843313992125, + "AC3_7": 0.16271186437058316 + }, + "prompt_5": { + "overall_acc": 0.3428571428571429, + "language_acc": { + "Malay": 0.30666666666666664, + "English": 0.5333333333333333, + "Vietnamese": 0.24, + "Spanish": 0.3933333333333333, + "Indonesian": 0.38, + "Filipino": 0.26666666666666666, + "Chinese": 0.28 + }, + "consistency_score_2": 0.45428571428571424, + "consistency_score_3": 0.2636190476190476, + "consistency_score_4": 0.17657142857142857, + "consistency_score_5": 0.13047619047619047, + "consistency_score_6": 0.10380952380952381, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Malay,English": 0.49333333333333335, + "Malay,Vietnamese": 0.4866666666666667, + "Malay,Spanish": 0.5733333333333334, + "Malay,Indonesian": 0.6533333333333333, + "Malay,Filipino": 0.54, + "Malay,Chinese": 0.32, + "English,Vietnamese": 0.34, + "English,Spanish": 0.5933333333333334, + "English,Indonesian": 0.5066666666666667, + "English,Filipino": 0.44, + "English,Chinese": 0.32666666666666666, + "Vietnamese,Spanish": 0.36666666666666664, + "Vietnamese,Indonesian": 0.46, + "Vietnamese,Filipino": 0.41333333333333333, + "Vietnamese,Chinese": 0.36, + "Spanish,Indonesian": 0.5733333333333334, + "Spanish,Filipino": 0.54, + "Spanish,Chinese": 0.36, + "Indonesian,Filipino": 0.46, + "Indonesian,Chinese": 0.4066666666666667, + "Filipino,Chinese": 0.32666666666666666 + }, + "3_combine": { + "Malay,English,Vietnamese": 0.25333333333333335, + "Malay,English,Spanish": 0.37333333333333335, + "Malay,English,Indonesian": 0.38666666666666666, + "Malay,English,Filipino": 0.3, + "Malay,English,Chinese": 0.18666666666666668, + "Malay,Vietnamese,Spanish": 0.29333333333333333, + "Malay,Vietnamese,Indonesian": 0.36666666666666664, + "Malay,Vietnamese,Filipino": 0.29333333333333333, + "Malay,Vietnamese,Chinese": 0.16666666666666666, + "Malay,Spanish,Indonesian": 0.44666666666666666, + "Malay,Spanish,Filipino": 0.38, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Indonesian,Filipino": 0.38, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Filipino,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.24, + "English,Vietnamese,Filipino": 0.19333333333333333, + "English,Vietnamese,Chinese": 0.14666666666666667, + "English,Spanish,Indonesian": 0.38, + "English,Spanish,Filipino": 0.34, + "English,Spanish,Chinese": 0.23333333333333334, + "English,Indonesian,Filipino": 0.26666666666666666, + "English,Indonesian,Chinese": 0.22, + "English,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Indonesian": 0.29333333333333333, + "Vietnamese,Spanish,Filipino": 0.25333333333333335, + "Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Chinese": 0.16, + "Spanish,Indonesian,Filipino": 0.32666666666666666, + "Spanish,Indonesian,Chinese": 0.25333333333333335, + "Spanish,Filipino,Chinese": 0.20666666666666667, + "Indonesian,Filipino,Chinese": 0.2 + }, + "4_combine": { + "Malay,English,Vietnamese,Spanish": 0.2, + "Malay,English,Vietnamese,Indonesian": 0.21333333333333335, + "Malay,English,Vietnamese,Filipino": 0.17333333333333334, + "Malay,English,Vietnamese,Chinese": 0.10666666666666667, + "Malay,English,Spanish,Indonesian": 0.32, + "Malay,English,Spanish,Filipino": 0.26, + "Malay,English,Spanish,Chinese": 0.17333333333333334, + "Malay,English,Indonesian,Filipino": 0.24, + "Malay,English,Indonesian,Chinese": 0.16666666666666666, + "Malay,English,Filipino,Chinese": 0.13333333333333333, + "Malay,Vietnamese,Spanish,Indonesian": 0.26666666666666666, + "Malay,Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Malay,Vietnamese,Spanish,Chinese": 0.12, + "Malay,Vietnamese,Indonesian,Filipino": 0.24, + "Malay,Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Malay,Vietnamese,Filipino,Chinese": 0.12, + "Malay,Spanish,Indonesian,Filipino": 0.2866666666666667, + "Malay,Spanish,Indonesian,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino,Chinese": 0.15333333333333332, + "Malay,Indonesian,Filipino,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Indonesian": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Chinese": 0.12, + "English,Vietnamese,Filipino,Chinese": 0.1, + "English,Spanish,Indonesian,Filipino": 0.22666666666666666, + "English,Spanish,Indonesian,Chinese": 0.18, + "English,Spanish,Filipino,Chinese": 0.15333333333333332, + "English,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Filipino,Chinese": 0.12, + "Vietnamese,Indonesian,Filipino,Chinese": 0.13333333333333333, + "Spanish,Indonesian,Filipino,Chinese": 0.16 + }, + "5_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian": 0.18666666666666668, + "Malay,English,Vietnamese,Spanish,Filipino": 0.16, + "Malay,English,Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Malay,English,Vietnamese,Indonesian,Filipino": 0.15333333333333332, + "Malay,English,Vietnamese,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Spanish,Indonesian,Filipino": 0.21333333333333335, + "Malay,English,Spanish,Indonesian,Chinese": 0.15333333333333332, + "Malay,English,Spanish,Filipino,Chinese": 0.13333333333333333, + "Malay,English,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino": 0.19333333333333333, + "Malay,Vietnamese,Spanish,Indonesian,Chinese": 0.11333333333333333, + "Malay,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "Malay,Vietnamese,Indonesian,Filipino,Chinese": 0.11333333333333333, + "Malay,Spanish,Indonesian,Filipino,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "English,Vietnamese,Spanish,Filipino,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "English,Spanish,Indonesian,Filipino,Chinese": 0.12666666666666668, + "Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.10666666666666667 + }, + "6_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino": 0.14666666666666667, + "Malay,English,Vietnamese,Spanish,Indonesian,Chinese": 0.1, + "Malay,English,Vietnamese,Spanish,Filipino,Chinese": 0.09333333333333334, + "Malay,English,Vietnamese,Indonesian,Filipino,Chinese": 0.08666666666666667, + "Malay,English,Spanish,Indonesian,Filipino,Chinese": 0.12, + "Malay,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + }, + "7_combine": { + "Malay,English,Vietnamese,Spanish,Indonesian,Filipino,Chinese": 0.08666666666666667 + } + }, + "AC3_2": 0.39078341008922585, + "AC3_3": 0.29806173720857065, + "AC3_4": 0.23309759542895733, + "AC3_5": 0.18901983324549307, + "AC3_6": 0.1593664331047413, + "AC3_7": 0.1383592017416237 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3409090909090909, + "language_acc": { + "English": 0.48295454545454547, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.26704545454545453, + "Indonesian": 0.3522727272727273, + "Filipino": 0.26704545454545453, + "Spanish": 0.42045454545454547, + "Malay": 0.3409090909090909 + }, + "consistency_score_2": 0.40476190476190466, + "consistency_score_3": 0.2038961038961039, + "consistency_score_4": 0.1181818181818182, + "consistency_score_5": 0.07548701298701299, + "consistency_score_6": 0.05113636363636364, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2556818181818182, + "English,Chinese": 0.2897727272727273, + "English,Indonesian": 0.4318181818181818, + "English,Filipino": 0.32954545454545453, + "English,Spanish": 0.5170454545454546, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.38636363636363635, + "Vietnamese,Filipino": 0.4147727272727273, + "Vietnamese,Spanish": 0.3181818181818182, + "Vietnamese,Malay": 0.4431818181818182, + "Chinese,Indonesian": 0.32954545454545453, + "Chinese,Filipino": 0.4034090909090909, + "Chinese,Spanish": 0.3352272727272727, + "Chinese,Malay": 0.375, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.3977272727272727, + "Indonesian,Malay": 0.5625, + "Filipino,Spanish": 0.4034090909090909, + "Filipino,Malay": 0.5056818181818182, + "Spanish,Malay": 0.4375 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13636363636363635, + "English,Vietnamese,Indonesian": 0.14204545454545456, + "English,Vietnamese,Filipino": 0.125, + "English,Vietnamese,Spanish": 0.1534090909090909, + "English,Vietnamese,Malay": 0.16477272727272727, + "English,Chinese,Indonesian": 0.14772727272727273, + "English,Chinese,Filipino": 0.14772727272727273, + "English,Chinese,Spanish": 0.17613636363636365, + "English,Chinese,Malay": 0.19318181818181818, + "English,Indonesian,Filipino": 0.23295454545454544, + "English,Indonesian,Spanish": 0.23863636363636365, + "English,Indonesian,Malay": 0.2727272727272727, + "English,Filipino,Spanish": 0.2159090909090909, + "English,Filipino,Malay": 0.2159090909090909, + "English,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "Vietnamese,Chinese,Filipino": 0.20454545454545456, + "Vietnamese,Chinese,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Malay": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Filipino,Malay": 0.2727272727272727, + "Vietnamese,Spanish,Malay": 0.1875, + "Chinese,Indonesian,Filipino": 0.20454545454545456, + "Chinese,Indonesian,Spanish": 0.1590909090909091, + "Chinese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Filipino,Spanish": 0.19318181818181818, + "Chinese,Filipino,Malay": 0.23295454545454544, + "Chinese,Spanish,Malay": 0.19318181818181818, + "Indonesian,Filipino,Spanish": 0.23863636363636365, + "Indonesian,Filipino,Malay": 0.3409090909090909, + "Indonesian,Spanish,Malay": 0.2556818181818182, + "Filipino,Spanish,Malay": 0.25 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino": 0.07386363636363637, + "English,Vietnamese,Chinese,Spanish": 0.09090909090909091, + "English,Vietnamese,Chinese,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.13636363636363635, + "English,Indonesian,Filipino,Spanish": 0.14204545454545456, + "English,Indonesian,Filipino,Malay": 0.17045454545454544, + "English,Indonesian,Spanish,Malay": 0.17045454545454544, + "English,Filipino,Spanish,Malay": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Filipino": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Filipino,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Filipino,Spanish,Malay": 0.13068181818181818, + "Chinese,Indonesian,Filipino,Spanish": 0.125, + "Chinese,Indonesian,Filipino,Malay": 0.14772727272727273, + "Chinese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Chinese,Filipino,Spanish,Malay": 0.13068181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.17613636363636365 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Filipino,Malay": 0.0625, + "English,Vietnamese,Chinese,Spanish,Malay": 0.0625, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Vietnamese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.3701015964670574, + "AC3_3": 0.2551739082855927, + "AC3_4": 0.17551755171694394, + "AC3_5": 0.12360446568004453, + "AC3_6": 0.08893280630142637, + "AC3_7": 0.06198347105785123 + }, + "prompt_2": { + "overall_acc": 0.34334415584415584, + "language_acc": { + "English": 0.4943181818181818, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.2556818181818182, + "Indonesian": 0.3522727272727273, + "Filipino": 0.2897727272727273, + "Spanish": 0.4090909090909091, + "Malay": 0.32954545454545453 + }, + "consistency_score_2": 0.4099025974025975, + "consistency_score_3": 0.2076298701298701, + "consistency_score_4": 0.1198051948051948, + "consistency_score_5": 0.07575757575757576, + "consistency_score_6": 0.05032467532467533, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2840909090909091, + "English,Chinese": 0.3352272727272727, + "English,Indonesian": 0.4659090909090909, + "English,Filipino": 0.3522727272727273, + "English,Spanish": 0.5568181818181818, + "English,Malay": 0.4147727272727273, + "Vietnamese,Chinese": 0.38636363636363635, + "Vietnamese,Indonesian": 0.3977272727272727, + "Vietnamese,Filipino": 0.38636363636363635, + "Vietnamese,Spanish": 0.32386363636363635, + "Vietnamese,Malay": 0.4147727272727273, + "Chinese,Indonesian": 0.3465909090909091, + "Chinese,Filipino": 0.4431818181818182, + "Chinese,Spanish": 0.29545454545454547, + "Chinese,Malay": 0.3522727272727273, + "Indonesian,Filipino": 0.5227272727272727, + "Indonesian,Spanish": 0.45454545454545453, + "Indonesian,Malay": 0.5738636363636364, + "Filipino,Spanish": 0.42613636363636365, + "Filipino,Malay": 0.4772727272727273, + "Spanish,Malay": 0.3977272727272727 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.13068181818181818, + "English,Vietnamese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.18181818181818182, + "English,Vietnamese,Malay": 0.1590909090909091, + "English,Chinese,Indonesian": 0.17613636363636365, + "English,Chinese,Filipino": 0.1875, + "English,Chinese,Spanish": 0.17045454545454544, + "English,Chinese,Malay": 0.18181818181818182, + "English,Indonesian,Filipino": 0.26136363636363635, + "English,Indonesian,Spanish": 0.3068181818181818, + "English,Indonesian,Malay": 0.2897727272727273, + "English,Filipino,Spanish": 0.25, + "English,Filipino,Malay": 0.22727272727272727, + "English,Spanish,Malay": 0.25, + "Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Vietnamese,Chinese,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Spanish": 0.13636363636363635, + "Vietnamese,Chinese,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.23863636363636365, + "Vietnamese,Spanish,Malay": 0.17613636363636365, + "Chinese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Indonesian,Spanish": 0.1534090909090909, + "Chinese,Indonesian,Malay": 0.22727272727272727, + "Chinese,Filipino,Spanish": 0.18181818181818182, + "Chinese,Filipino,Malay": 0.22727272727272727, + "Chinese,Spanish,Malay": 0.14204545454545456, + "Indonesian,Filipino,Spanish": 0.2556818181818182, + "Indonesian,Filipino,Malay": 0.3352272727272727, + "Indonesian,Spanish,Malay": 0.26136363636363635, + "Filipino,Spanish,Malay": 0.23863636363636365 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.07954545454545454, + "English,Vietnamese,Chinese,Filipino": 0.07954545454545454, + "English,Vietnamese,Chinese,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.09659090909090909, + "English,Vietnamese,Filipino,Spanish": 0.11931818181818182, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.13636363636363635, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Chinese,Filipino,Spanish": 0.11931818181818182, + "English,Chinese,Filipino,Malay": 0.13636363636363635, + "English,Chinese,Spanish,Malay": 0.10795454545454546, + "English,Indonesian,Filipino,Spanish": 0.17613636363636365, + "English,Indonesian,Filipino,Malay": 0.19318181818181818, + "English,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Filipino,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Spanish,Malay": 0.07954545454545454, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Chinese,Filipino,Spanish,Malay": 0.10795454545454546, + "Indonesian,Filipino,Spanish,Malay": 0.17045454545454544 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.056818181818181816, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07386363636363637, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Chinese,Filipino,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.07954545454545454, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.07954545454545454 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03409090909090909 + } + }, + "AC3_2": 0.3736827557721549, + "AC3_3": 0.2587726430435907, + "AC3_4": 0.1776291531428512, + "AC3_5": 0.12412700272876045, + "AC3_6": 0.0877828356986814, + "AC3_7": 0.06202346039412429 + }, + "prompt_3": { + "overall_acc": 0.3319805194805195, + "language_acc": { + "English": 0.48295454545454547, + "Vietnamese": 0.2784090909090909, + "Chinese": 0.25, + "Indonesian": 0.32954545454545453, + "Filipino": 0.26136363636363635, + "Spanish": 0.4034090909090909, + "Malay": 0.3181818181818182 + }, + "consistency_score_2": 0.3933982683982684, + "consistency_score_3": 0.1900974025974026, + "consistency_score_4": 0.10405844155844156, + "consistency_score_5": 0.06277056277056277, + "consistency_score_6": 0.04058441558441559, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2897727272727273, + "English,Chinese": 0.3068181818181818, + "English,Indonesian": 0.45454545454545453, + "English,Filipino": 0.3465909090909091, + "English,Spanish": 0.5340909090909091, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.3181818181818182, + "Vietnamese,Indonesian": 0.4034090909090909, + "Vietnamese,Filipino": 0.42613636363636365, + "Vietnamese,Spanish": 0.3181818181818182, + "Vietnamese,Malay": 0.42045454545454547, + "Chinese,Indonesian": 0.32386363636363635, + "Chinese,Filipino": 0.3522727272727273, + "Chinese,Spanish": 0.2784090909090909, + "Chinese,Malay": 0.32954545454545453, + "Indonesian,Filipino": 0.5340909090909091, + "Indonesian,Spanish": 0.4090909090909091, + "Indonesian,Malay": 0.5454545454545454, + "Filipino,Spanish": 0.39204545454545453, + "Filipino,Malay": 0.48863636363636365, + "Spanish,Malay": 0.3806818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.10795454545454546, + "English,Vietnamese,Indonesian": 0.1590909090909091, + "English,Vietnamese,Filipino": 0.14772727272727273, + "English,Vietnamese,Spanish": 0.17613636363636365, + "English,Vietnamese,Malay": 0.1534090909090909, + "English,Chinese,Indonesian": 0.17613636363636365, + "English,Chinese,Filipino": 0.14204545454545456, + "English,Chinese,Spanish": 0.1534090909090909, + "English,Chinese,Malay": 0.16477272727272727, + "English,Indonesian,Filipino": 0.2556818181818182, + "English,Indonesian,Spanish": 0.26704545454545453, + "English,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Spanish": 0.2159090909090909, + "English,Filipino,Malay": 0.2215909090909091, + "English,Spanish,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "Vietnamese,Chinese,Filipino": 0.1590909090909091, + "Vietnamese,Chinese,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Malay": 0.2556818181818182, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.26704545454545453, + "Vietnamese,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino": 0.1875, + "Chinese,Indonesian,Spanish": 0.14204545454545456, + "Chinese,Indonesian,Malay": 0.20454545454545456, + "Chinese,Filipino,Spanish": 0.125, + "Chinese,Filipino,Malay": 0.19318181818181818, + "Chinese,Spanish,Malay": 0.125, + "Indonesian,Filipino,Spanish": 0.2215909090909091, + "Indonesian,Filipino,Malay": 0.3352272727272727, + "Indonesian,Spanish,Malay": 0.2215909090909091, + "Filipino,Spanish,Malay": 0.2215909090909091 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino": 0.0625, + "English,Vietnamese,Chinese,Spanish": 0.06818181818181818, + "English,Vietnamese,Chinese,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "English,Vietnamese,Indonesian,Malay": 0.09090909090909091, + "English,Vietnamese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Filipino,Malay": 0.10227272727272728, + "English,Vietnamese,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino": 0.11363636363636363, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Filipino,Malay": 0.11363636363636363, + "English,Chinese,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Indonesian,Filipino,Malay": 0.18181818181818182, + "English,Indonesian,Spanish,Malay": 0.14772727272727273, + "English,Filipino,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Filipino": 0.09090909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.07386363636363637, + "Vietnamese,Chinese,Indonesian,Malay": 0.09659090909090909, + "Vietnamese,Chinese,Filipino,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Filipino,Malay": 0.10227272727272728, + "Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.09659090909090909, + "Vietnamese,Filipino,Spanish,Malay": 0.10795454545454546, + "Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Malay": 0.14204545454545456, + "Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "Chinese,Filipino,Spanish,Malay": 0.07386363636363637, + "Indonesian,Filipino,Spanish,Malay": 0.14772727272727273 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.09090909090909091, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish": 0.0625, + "English,Chinese,Indonesian,Filipino,Malay": 0.10227272727272728, + "English,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Indonesian,Filipino,Spanish,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.0625 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + } + }, + "AC3_2": 0.3600892765819701, + "AC3_3": 0.24175944542116068, + "AC3_4": 0.15845086592569627, + "AC3_5": 0.10557845167919071, + "AC3_6": 0.07232691054279144, + "AC3_7": 0.052339271074748124 + }, + "prompt_4": { + "overall_acc": 0.33685064935064934, + "language_acc": { + "English": 0.48863636363636365, + "Vietnamese": 0.2727272727272727, + "Chinese": 0.25, + "Indonesian": 0.3352272727272727, + "Filipino": 0.26136363636363635, + "Spanish": 0.42045454545454547, + "Malay": 0.32954545454545453 + }, + "consistency_score_2": 0.40882034632034625, + "consistency_score_3": 0.20616883116883117, + "consistency_score_4": 0.11883116883116883, + "consistency_score_5": 0.07683982683982686, + "consistency_score_6": 0.05438311688311688, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2840909090909091, + "English,Chinese": 0.30113636363636365, + "English,Indonesian": 0.44886363636363635, + "English,Filipino": 0.3181818181818182, + "English,Spanish": 0.5909090909090909, + "English,Malay": 0.4090909090909091, + "Vietnamese,Chinese": 0.3522727272727273, + "Vietnamese,Indonesian": 0.39204545454545453, + "Vietnamese,Filipino": 0.4318181818181818, + "Vietnamese,Spanish": 0.3068181818181818, + "Vietnamese,Malay": 0.4090909090909091, + "Chinese,Indonesian": 0.3352272727272727, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,Spanish": 0.2784090909090909, + "Chinese,Malay": 0.3522727272727273, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,Spanish": 0.45454545454545453, + "Indonesian,Malay": 0.5852272727272727, + "Filipino,Spanish": 0.4090909090909091, + "Filipino,Malay": 0.5340909090909091, + "Spanish,Malay": 0.42613636363636365 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.125, + "English,Vietnamese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.18181818181818182, + "English,Vietnamese,Malay": 0.1534090909090909, + "English,Chinese,Indonesian": 0.1590909090909091, + "English,Chinese,Filipino": 0.1534090909090909, + "English,Chinese,Spanish": 0.18181818181818182, + "English,Chinese,Malay": 0.17613636363636365, + "English,Indonesian,Filipino": 0.24431818181818182, + "English,Indonesian,Spanish": 0.29545454545454547, + "English,Indonesian,Malay": 0.2840909090909091, + "English,Filipino,Spanish": 0.2159090909090909, + "English,Filipino,Malay": 0.2215909090909091, + "English,Spanish,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "Vietnamese,Chinese,Filipino": 0.19318181818181818, + "Vietnamese,Chinese,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Malay": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Vietnamese,Filipino,Spanish": 0.1590909090909091, + "Vietnamese,Filipino,Malay": 0.2784090909090909, + "Vietnamese,Spanish,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Indonesian,Spanish": 0.14772727272727273, + "Chinese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Filipino,Spanish": 0.16477272727272727, + "Chinese,Filipino,Malay": 0.23863636363636365, + "Chinese,Spanish,Malay": 0.1590909090909091, + "Indonesian,Filipino,Spanish": 0.26704545454545453, + "Indonesian,Filipino,Malay": 0.38636363636363635, + "Indonesian,Spanish,Malay": 0.2727272727272727, + "Filipino,Spanish,Malay": 0.26136363636363635 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.07386363636363637, + "English,Vietnamese,Chinese,Filipino": 0.06818181818181818, + "English,Vietnamese,Chinese,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Malay": 0.07386363636363637, + "English,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "English,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.11363636363636363, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.11363636363636363, + "English,Chinese,Indonesian,Filipino": 0.11931818181818182, + "English,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Chinese,Filipino,Spanish": 0.10795454545454546, + "English,Chinese,Filipino,Malay": 0.125, + "English,Chinese,Spanish,Malay": 0.11931818181818182, + "English,Indonesian,Filipino,Spanish": 0.1534090909090909, + "English,Indonesian,Filipino,Malay": 0.1875, + "English,Indonesian,Spanish,Malay": 0.17613636363636365, + "English,Filipino,Spanish,Malay": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "Vietnamese,Chinese,Filipino,Spanish": 0.07954545454545454, + "Vietnamese,Chinese,Filipino,Malay": 0.125, + "Vietnamese,Chinese,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Malay": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Filipino,Spanish,Malay": 0.11931818181818182, + "Chinese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Malay": 0.16477272727272727, + "Chinese,Indonesian,Spanish,Malay": 0.10227272727272728, + "Chinese,Filipino,Spanish,Malay": 0.11363636363636363, + "Indonesian,Filipino,Spanish,Malay": 0.19318181818181818 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.0625, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.0625, + "English,Vietnamese,Chinese,Filipino,Malay": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.08522727272727272, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.09090909090909091, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Vietnamese,Filipino,Spanish,Malay": 0.09090909090909091, + "English,Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Chinese,Indonesian,Filipino,Malay": 0.10795454545454546, + "English,Chinese,Indonesian,Spanish,Malay": 0.08522727272727272, + "English,Chinese,Filipino,Spanish,Malay": 0.08522727272727272, + "English,Indonesian,Filipino,Spanish,Malay": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.0625, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09659090909090909, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.08522727272727272 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.07386363636363637, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.05113636363636364 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.03977272727272727 + } + }, + "AC3_2": 0.3693623592892402, + "AC3_3": 0.25578494744187913, + "AC3_4": 0.1756855541649905, + "AC3_5": 0.12513483896986202, + "AC3_6": 0.09364727593661758, + "AC3_7": 0.07114518023189342 + }, + "prompt_5": { + "overall_acc": 0.32224025974025977, + "language_acc": { + "English": 0.44886363636363635, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.25, + "Indonesian": 0.3522727272727273, + "Filipino": 0.25, + "Spanish": 0.375, + "Malay": 0.3181818181818182 + }, + "consistency_score_2": 0.3866341991341992, + "consistency_score_3": 0.18587662337662342, + "consistency_score_4": 0.10129870129870132, + "consistency_score_5": 0.05898268398268398, + "consistency_score_6": 0.03571428571428571, + "consistency_score_7": 0.022727272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.2840909090909091, + "English,Chinese": 0.3352272727272727, + "English,Indonesian": 0.45454545454545453, + "English,Filipino": 0.32954545454545453, + "English,Spanish": 0.5170454545454546, + "English,Malay": 0.4147727272727273, + "Vietnamese,Chinese": 0.2784090909090909, + "Vietnamese,Indonesian": 0.42045454545454547, + "Vietnamese,Filipino": 0.4318181818181818, + "Vietnamese,Spanish": 0.32954545454545453, + "Vietnamese,Malay": 0.42613636363636365, + "Chinese,Indonesian": 0.2784090909090909, + "Chinese,Filipino": 0.32386363636363635, + "Chinese,Spanish": 0.2840909090909091, + "Chinese,Malay": 0.29545454545454547, + "Indonesian,Filipino": 0.5113636363636364, + "Indonesian,Spanish": 0.4375, + "Indonesian,Malay": 0.5568181818181818, + "Filipino,Spanish": 0.35795454545454547, + "Filipino,Malay": 0.4715909090909091, + "Spanish,Malay": 0.3806818181818182 + }, + "3_combine": { + "English,Vietnamese,Chinese": 0.10227272727272728, + "English,Vietnamese,Indonesian": 0.16477272727272727, + "English,Vietnamese,Filipino": 0.14204545454545456, + "English,Vietnamese,Spanish": 0.18181818181818182, + "English,Vietnamese,Malay": 0.17045454545454544, + "English,Chinese,Indonesian": 0.16477272727272727, + "English,Chinese,Filipino": 0.13068181818181818, + "English,Chinese,Spanish": 0.17045454545454544, + "English,Chinese,Malay": 0.1590909090909091, + "English,Indonesian,Filipino": 0.23863636363636365, + "English,Indonesian,Spanish": 0.2727272727272727, + "English,Indonesian,Malay": 0.2784090909090909, + "English,Filipino,Spanish": 0.1875, + "English,Filipino,Malay": 0.2215909090909091, + "English,Spanish,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "Vietnamese,Chinese,Filipino": 0.13068181818181818, + "Vietnamese,Chinese,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Malay": 0.13068181818181818, + "Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Filipino,Spanish": 0.16477272727272727, + "Vietnamese,Filipino,Malay": 0.25, + "Vietnamese,Spanish,Malay": 0.18181818181818182, + "Chinese,Indonesian,Filipino": 0.16477272727272727, + "Chinese,Indonesian,Spanish": 0.13636363636363635, + "Chinese,Indonesian,Malay": 0.17613636363636365, + "Chinese,Filipino,Spanish": 0.11363636363636363, + "Chinese,Filipino,Malay": 0.16477272727272727, + "Chinese,Spanish,Malay": 0.125, + "Indonesian,Filipino,Spanish": 0.2159090909090909, + "Indonesian,Filipino,Malay": 0.3352272727272727, + "Indonesian,Spanish,Malay": 0.24431818181818182, + "Filipino,Spanish,Malay": 0.2159090909090909 + }, + "4_combine": { + "English,Vietnamese,Chinese,Indonesian": 0.06818181818181818, + "English,Vietnamese,Chinese,Filipino": 0.056818181818181816, + "English,Vietnamese,Chinese,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Malay": 0.05113636363636364, + "English,Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "English,Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay": 0.11931818181818182, + "English,Vietnamese,Filipino,Spanish": 0.10227272727272728, + "English,Vietnamese,Filipino,Malay": 0.10795454545454546, + "English,Vietnamese,Spanish,Malay": 0.09659090909090909, + "English,Chinese,Indonesian,Filipino": 0.10227272727272728, + "English,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Chinese,Filipino,Spanish": 0.07386363636363637, + "English,Chinese,Filipino,Malay": 0.09090909090909091, + "English,Chinese,Spanish,Malay": 0.07954545454545454, + "English,Indonesian,Filipino,Spanish": 0.13636363636363635, + "English,Indonesian,Filipino,Malay": 0.1875, + "English,Indonesian,Spanish,Malay": 0.1534090909090909, + "English,Filipino,Spanish,Malay": 0.125, + "Vietnamese,Chinese,Indonesian,Filipino": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Spanish": 0.06818181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Filipino,Spanish": 0.056818181818181816, + "Vietnamese,Chinese,Filipino,Malay": 0.08522727272727272, + "Vietnamese,Chinese,Spanish,Malay": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,Filipino,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Malay": 0.14204545454545456, + "Vietnamese,Filipino,Spanish,Malay": 0.11363636363636363, + "Chinese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "Chinese,Indonesian,Filipino,Malay": 0.125, + "Chinese,Indonesian,Spanish,Malay": 0.09090909090909091, + "Chinese,Filipino,Spanish,Malay": 0.06818181818181818, + "Indonesian,Filipino,Spanish,Malay": 0.1534090909090909 + }, + "5_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino": 0.03977272727272727, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.05113636363636364, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.03977272727272727, + "English,Vietnamese,Chinese,Filipino,Spanish": 0.045454545454545456, + "English,Vietnamese,Chinese,Filipino,Malay": 0.03409090909090909, + "English,Vietnamese,Chinese,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Indonesian,Filipino,Spanish": 0.07954545454545454, + "English,Vietnamese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Vietnamese,Indonesian,Spanish,Malay": 0.07954545454545454, + "English,Vietnamese,Filipino,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Indonesian,Filipino,Spanish": 0.056818181818181816, + "English,Chinese,Indonesian,Filipino,Malay": 0.08522727272727272, + "English,Chinese,Indonesian,Spanish,Malay": 0.06818181818181818, + "English,Chinese,Filipino,Spanish,Malay": 0.045454545454545456, + "English,Indonesian,Filipino,Spanish,Malay": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03977272727272727, + "Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.056818181818181816, + "Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Filipino,Spanish,Malay": 0.03409090909090909, + "Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.09090909090909091, + "Chinese,Indonesian,Filipino,Spanish,Malay": 0.056818181818181816 + }, + "6_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish": 0.03409090909090909, + "English,Vietnamese,Chinese,Indonesian,Filipino,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Indonesian,Spanish,Malay": 0.028409090909090908, + "English,Vietnamese,Chinese,Filipino,Spanish,Malay": 0.022727272727272728, + "English,Vietnamese,Indonesian,Filipino,Spanish,Malay": 0.0625, + "English,Chinese,Indonesian,Filipino,Spanish,Malay": 0.045454545454545456, + "Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.028409090909090908 + }, + "7_combine": { + "English,Vietnamese,Chinese,Indonesian,Filipino,Spanish,Malay": 0.022727272727272728 + } + }, + "AC3_2": 0.35151246649150514, + "AC3_3": 0.23576044557826406, + "AC3_4": 0.1541417570183568, + "AC3_5": 0.09971380639453442, + "AC3_6": 0.06430191122272304, + "AC3_7": 0.04245989303581997 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5922330097087378 + }, + "prompt_2": { + "accuracy": 0.6116504854368932 + }, + "prompt_3": { + "accuracy": 0.5533980582524272 + }, + "prompt_4": { + "accuracy": 0.5339805825242718 + }, + "prompt_5": { + "accuracy": 0.5631067961165048 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2857142857142857 + }, + "prompt_2": { + "accuracy": 0.2857142857142857 + }, + "prompt_3": { + "accuracy": 0.24761904761904763 + }, + "prompt_4": { + "accuracy": 0.23809523809523808 + }, + "prompt_5": { + "accuracy": 0.29523809523809524 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4953271028037383 + }, + "prompt_2": { + "accuracy": 0.4953271028037383 + }, + "prompt_3": { + "accuracy": 0.514018691588785 + }, + "prompt_4": { + "accuracy": 0.5046728971962616 + }, + "prompt_5": { + "accuracy": 0.5046728971962616 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.5, + "history": 0.2, + "literature": 0.2, + "politics": 0.1, + "culture": 0.5, + "film": 0.3, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.32, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.1, + "culture": 0.5, + "film": 0.3, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.2, + "politics": 0.3, + "culture": 0.4, + "film": 0.3, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.4, + "history": 0.06666666666666667, + "literature": 0.3, + "politics": 0.2, + "culture": 0.4, + "film": 0.2, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.4, + "culture": 0.4, + "film": 0.3, + "law": 0.2, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21680102789276903 + }, + "prompt_2": { + "bleu_score": 0.22474100334958524 + }, + "prompt_3": { + "bleu_score": 0.22863227830963867 + }, + "prompt_4": { + "bleu_score": 0.21751844487735492 + }, + "prompt_5": { + "bleu_score": 0.195875615668177 + } }, "indommlu": { "prompt_1": -1, @@ -10200,179 +87667,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.19435695954955048 + }, + "prompt_2": { + "bleu_score": 0.19271434563357726 + }, + "prompt_3": { + "bleu_score": 0.1937286841495566 + }, + "prompt_4": { + "bleu_score": 0.19534114434885336 + }, + "prompt_5": { + "bleu_score": 0.18479992882358867 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.044136482882861636 + }, + "prompt_2": { + "bleu_score": 0.04550813456968202 + }, + "prompt_3": { + "bleu_score": 0.04462087450082222 + }, + "prompt_4": { + "bleu_score": 0.04615136701937437 + }, + "prompt_5": { + "bleu_score": 0.042612704267182486 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.011833411568617648 + }, + "prompt_2": { + "bleu_score": 0.014115858404550851 + }, + "prompt_3": { + "bleu_score": 0.0149157747810875 + }, + "prompt_4": { + "bleu_score": 0.013499481132607686 + }, + "prompt_5": { + "bleu_score": 0.015705791397095634 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.18129165684239323 + }, + "prompt_2": { + "bleu_score": 0.17712650436858488 + }, + "prompt_3": { + "bleu_score": 0.18002211964244563 + }, + "prompt_4": { + "bleu_score": 0.18204787643821446 + }, + "prompt_5": { + "bleu_score": 0.17027738014358365 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.456242707117853 + }, + "prompt_2": { + "accuracy": 0.4749124854142357 + }, + "prompt_3": { + "accuracy": 0.47024504084014 + }, + "prompt_4": { + "accuracy": 0.47724620770128356 + }, + "prompt_5": { + "accuracy": 0.46091015169194866 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4592062924562031, + "category_acc": { + "high_school_european_history": 0.5792682926829268, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.48484848484848486, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.541871921182266, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.5211864406779662, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.459915611814346, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.4854368932038835, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3701067615658363, + "philosophy": 0.4935483870967742, + "professional_medicine": 0.44280442804428044, + "nutrition": 0.4885245901639344, + "global_facts": 0.30303030303030304, + "machine_learning": 0.3783783783783784, + "security_studies": 0.4713114754098361, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.4451718494271686, + "prehistory": 0.4551083591331269, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.5461538461538461, + "college_medicine": 0.4186046511627907, + "high_school_government_and_politics": 0.65625, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.6358024691358025, + "high_school_geography": 0.6091370558375635, + "elementary_mathematics": 0.3156498673740053, + "human_aging": 0.5, + "college_mathematics": 0.2222222222222222, + "high_school_psychology": 0.6691176470588235, + "formal_logic": 0.36, + "high_school_statistics": 0.30697674418604654, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.42735042735042733, + "miscellaneous": 0.6214833759590793, + "high_school_chemistry": 0.3316831683168317, + "marketing": 0.7167381974248928, + "professional_law": 0.3796477495107632, + "management": 0.5784313725490197, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.5327102803738317, + "world_religions": 0.5411764705882353, + "sociology": 0.63, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.4832904884318766, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.5333333333333333, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.47019867549668876, + "college_biology": 0.46853146853146854 + } + }, + "prompt_2": { + "accuracy": 0.47129066857347157, + "category_acc": { + "high_school_european_history": 0.6951219512195121, + "business_ethics": 0.6565656565656566, + "clinical_knowledge": 0.553030303030303, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.5911330049261084, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.6313559322033898, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.4641350210970464, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.5339805825242718, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.5064516129032258, + "professional_medicine": 0.45018450184501846, + "nutrition": 0.4918032786885246, + "global_facts": 0.2727272727272727, + "machine_learning": 0.43243243243243246, + "security_studies": 0.5327868852459017, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.45171849427168576, + "prehistory": 0.44272445820433437, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.5615384615384615, + "college_medicine": 0.4418604651162791, + "high_school_government_and_politics": 0.671875, + "college_chemistry": 0.4444444444444444, + "logical_fallacies": 0.6172839506172839, + "high_school_geography": 0.6192893401015228, + "elementary_mathematics": 0.3156498673740053, + "human_aging": 0.5180180180180181, + "college_mathematics": 0.21212121212121213, + "high_school_psychology": 0.6893382352941176, + "formal_logic": 0.368, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.675, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.494949494949495, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.6317135549872123, + "high_school_chemistry": 0.3217821782178218, + "marketing": 0.7639484978540773, + "professional_law": 0.3796477495107632, + "management": 0.5784313725490197, + "college_physics": 0.297029702970297, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.5117647058823529, + "sociology": 0.64, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.5167095115681234, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.24049217002237136, + "moral_disputes": 0.518840579710145, + "electrical_engineering": 0.4375, + "astronomy": 0.4768211920529801, + "college_biology": 0.45454545454545453 + } + }, + "prompt_3": { + "accuracy": 0.4759385055416518, + "category_acc": { + "high_school_european_history": 0.6463414634146342, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.5416666666666666, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.6157635467980296, + "high_school_physics": 0.3, + "high_school_world_history": 0.6186440677966102, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.459915611814346, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.5339805825242718, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.532258064516129, + "professional_medicine": 0.44280442804428044, + "nutrition": 0.4918032786885246, + "global_facts": 0.2828282828282828, + "machine_learning": 0.40540540540540543, + "security_studies": 0.5532786885245902, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.46317512274959083, + "prehistory": 0.4551083591331269, + "anatomy": 0.44776119402985076, + "human_sexuality": 0.5461538461538461, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.6614583333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.6192893401015228, + "elementary_mathematics": 0.35013262599469497, + "human_aging": 0.5315315315315315, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.6911764705882353, + "formal_logic": 0.368, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.405982905982906, + "miscellaneous": 0.6381074168797954, + "high_school_chemistry": 0.35148514851485146, + "marketing": 0.7553648068669528, + "professional_law": 0.38290932811480755, + "management": 0.6078431372549019, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.6074766355140186, + "world_religions": 0.5352941176470588, + "sociology": 0.65, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.519280205655527, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.5333333333333333, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.5033112582781457, + "college_biology": 0.46853146853146854 + } + }, + "prompt_4": { + "accuracy": 0.4783696818019306, + "category_acc": { + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.5492424242424242, + "medical_genetics": 0.494949494949495, + "high_school_us_history": 0.6206896551724138, + "high_school_physics": 0.24666666666666667, + "high_school_world_history": 0.6694915254237288, + "virology": 0.4303030303030303, + "high_school_microeconomics": 0.4767932489451477, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.5469255663430421, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.5225806451612903, + "professional_medicine": 0.43911439114391143, + "nutrition": 0.49508196721311476, + "global_facts": 0.29292929292929293, + "machine_learning": 0.3783783783783784, + "security_studies": 0.569672131147541, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.45662847790507366, + "prehistory": 0.43962848297213625, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.5615384615384615, + "college_medicine": 0.46511627906976744, + "high_school_government_and_politics": 0.6822916666666666, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6345177664974619, + "elementary_mathematics": 0.32891246684350134, + "human_aging": 0.5135135135135135, + "college_mathematics": 0.2222222222222222, + "high_school_psychology": 0.6875, + "formal_logic": 0.376, + "high_school_statistics": 0.3581395348837209, + "international_law": 0.675, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.6368286445012787, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.7811158798283262, + "professional_law": 0.3770384866275277, + "management": 0.5882352941176471, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.5981308411214953, + "world_religions": 0.5294117647058824, + "sociology": 0.685, + "us_foreign_policy": 0.6464646464646465, + "high_school_macroeconomics": 0.5218508997429306, + "computer_security": 0.6060606060606061, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.5565217391304348, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5033112582781457, + "college_biology": 0.46153846153846156 + } + }, + "prompt_5": { + "accuracy": 0.4657847693957812, + "category_acc": { + "high_school_european_history": 0.6097560975609756, + "business_ethics": 0.6565656565656566, + "clinical_knowledge": 0.5340909090909091, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.5812807881773399, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.5720338983050848, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.43037974683544306, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.5210355987055016, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.37722419928825623, + "philosophy": 0.4774193548387097, + "professional_medicine": 0.45387453874538747, + "nutrition": 0.46885245901639344, + "global_facts": 0.2828282828282828, + "machine_learning": 0.38738738738738737, + "security_studies": 0.4672131147540984, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.45171849427168576, + "prehistory": 0.4674922600619195, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.5307692307692308, + "college_medicine": 0.436046511627907, + "high_school_government_and_politics": 0.65625, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.6040609137055838, + "elementary_mathematics": 0.3209549071618037, + "human_aging": 0.5315315315315315, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.6764705882352942, + "formal_logic": 0.352, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.675, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.6445012787723785, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.7253218884120172, + "professional_law": 0.3724722765818656, + "management": 0.5980392156862745, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.5794392523364486, + "world_religions": 0.5588235294117647, + "sociology": 0.66, + "us_foreign_policy": 0.6464646464646465, + "high_school_macroeconomics": 0.5012853470437018, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.5362318840579711, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.4900662251655629, + "college_biology": 0.4755244755244755 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2310549777117385 + }, + "prompt_2": { + "accuracy": 0.2236255572065379 + }, + "prompt_3": { + "accuracy": 0.24071322436849926 + }, + "prompt_4": { + "accuracy": 0.24591381872213966 + }, + "prompt_5": { + "accuracy": 0.2540861812778603 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2521793275217933, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.25, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.041666666666666664, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.21666666666666667, + "business_administration": 0.2894736842105263, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.25, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.058823529411764705, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.2222222222222222, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.10714285714285714, + "high_school_chinese": 0.375, + "high_school_history": 0.32, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.2692307692307692, + "sports_science": 0.3333333333333333, + "plant_protection": 0.1111111111111111, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.3148148148148148, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.16666666666666666, + "physician": 0.3148148148148148 + } + }, + "prompt_2": { + "accuracy": 0.25093399750934, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.42857142857142855, + "college_physics": 0.08333333333333333, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.24, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.23333333333333334, + "business_administration": 0.21052631578947367, + "marxism": 0.25, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.125, + "high_school_geography": 0.08333333333333333, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.2222222222222222, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.2, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.15384615384615385, + "sports_science": 0.125, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.25925925925925924 + } + }, + "prompt_3": { + "accuracy": 0.23163138231631383, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.375, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.3333333333333333, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.08333333333333333, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.2, + "business_administration": 0.3157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.13793103448275862, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.25, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.14814814814814814, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.28, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.21153846153846154, + "sports_science": 0.20833333333333334, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.041666666666666664, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.2222222222222222, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.1388888888888889, + "tax_accountant": 0.2222222222222222, + "physician": 0.25925925925925924 + } + }, + "prompt_4": { + "accuracy": 0.2403486924034869, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.23809523809523808, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.10714285714285714, + "college_economics": 0.2, + "business_administration": 0.13157894736842105, + "marxism": 0.16666666666666666, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.125, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.2962962962962963, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.25, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.32, + "middle_school_history": 0.1111111111111111, + "civil_servant": 0.2692307692307692, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2037037037037037, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.08333333333333333, + "tax_accountant": 0.3888888888888889, + "physician": 0.24074074074074073 + } + }, + "prompt_5": { + "accuracy": 0.2521793275217933, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.30952380952380953, + "college_physics": 0.08333333333333333, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.25, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.25, + "college_economics": 0.2833333333333333, + "business_administration": 0.23684210526315788, + "marxism": 0.25, + "mao_zedong_thought": 0.06896551724137931, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.25, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.2222222222222222, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.25, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.5, + "high_school_history": 0.28, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.3076923076923077, + "sports_science": 0.2916666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.2037037037037037, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.12962962962962962, + "physician": 0.37037037037037035 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23655913978494625 + }, + "prompt_2": { + "accuracy": 0.2616487455197133 + }, + "prompt_3": { + "accuracy": 0.2903225806451613 + }, + "prompt_4": { + "accuracy": 0.27598566308243727 + }, + "prompt_5": { + "accuracy": 0.2903225806451613 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2548782593679848, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.21341463414634146, + "arts": 0.2375, + "astronomy": 0.2606060606060606, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.31875, + "chinese_driving_rule": 0.2366412213740458, + "chinese_food_culture": 0.22794117647058823, + "chinese_foreign_policy": 0.22429906542056074, + "chinese_history": 0.23839009287925697, + "chinese_literature": 0.24509803921568626, + "chinese_teacher_qualification": 0.21787709497206703, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.21495327102803738, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.2222222222222222, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.2830188679245283, + "college_medicine": 0.23809523809523808, + "computer_science": 0.2549019607843137, + "computer_security": 0.19298245614035087, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.2446043165467626, + "economics": 0.25157232704402516, + "education": 0.294478527607362, + "electrical_engineering": 0.26744186046511625, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.24369747899159663, + "elementary_mathematics": 0.2391304347826087, + "ethnology": 0.25925925925925924, + "food_science": 0.26573426573426573, + "genetics": 0.2215909090909091, + "global_facts": 0.20134228187919462, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.23076923076923078, + "human_sexuality": 0.2777777777777778, + "international_law": 0.23783783783783785, + "journalism": 0.313953488372093, + "jurisprudence": 0.2116788321167883, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.2845528455284553, + "machine_learning": 0.23770491803278687, + "management": 0.2571428571428571, + "marketing": 0.2388888888888889, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.2413793103448276, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.2342857142857143, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.30319148936170215, + "professional_psychology": 0.27155172413793105, + "public_relations": 0.2988505747126437, + "security_study": 0.25925925925925924, + "sociology": 0.24778761061946902, + "sports_science": 0.24242424242424243, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.26627218934911245, + "world_history": 0.2732919254658385, + "world_religions": 0.23125 + } + }, + "prompt_2": { + "accuracy": 0.25677775859091695, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.25, + "ancient_chinese": 0.23170731707317074, + "arts": 0.24375, + "astronomy": 0.24242424242424243, + "business_ethics": 0.24880382775119617, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.3053435114503817, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.21495327102803738, + "chinese_history": 0.2755417956656347, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.27932960893854747, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.17592592592592593, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.22641509433962265, + "college_medicine": 0.2673992673992674, + "computer_science": 0.31862745098039214, + "computer_security": 0.21052631578947367, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.31654676258992803, + "economics": 0.24528301886792453, + "education": 0.26993865030674846, + "electrical_engineering": 0.23255813953488372, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.2814814814814815, + "food_science": 0.27972027972027974, + "genetics": 0.19886363636363635, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.2288135593220339, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.2698412698412698, + "international_law": 0.24864864864864866, + "journalism": 0.28488372093023256, + "jurisprudence": 0.22871046228710462, + "legal_and_moral_basis": 0.24766355140186916, + "logical": 0.2682926829268293, + "machine_learning": 0.2786885245901639, + "management": 0.23809523809523808, + "marketing": 0.22777777777777777, + "marxist_theory": 0.24338624338624337, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.2896551724137931, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.24571428571428572, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.2413793103448276, + "public_relations": 0.27586206896551724, + "security_study": 0.3111111111111111, + "sociology": 0.22566371681415928, + "sports_science": 0.22424242424242424, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.33727810650887574, + "world_history": 0.2670807453416149, + "world_religions": 0.2375 + } + }, + "prompt_3": { + "accuracy": 0.2528060783975134, + "category_acc": { + "agronomy": 0.22485207100591717, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.25, + "arts": 0.26875, + "astronomy": 0.24242424242424243, + "business_ethics": 0.22488038277511962, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.2595419847328244, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.2786377708978328, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.20670391061452514, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.3644859813084112, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.16666666666666666, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.23076923076923078, + "computer_science": 0.22549019607843138, + "computer_security": 0.1871345029239766, + "conceptual_physics": 0.23129251700680273, + "construction_project_management": 0.19424460431654678, + "economics": 0.2641509433962264, + "education": 0.25766871165644173, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.2727272727272727, + "elementary_information_and_technology": 0.29831932773109243, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.2074074074074074, + "food_science": 0.2727272727272727, + "genetics": 0.2784090909090909, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.25, + "high_school_geography": 0.23728813559322035, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.22727272727272727, + "high_school_politics": 0.20279720279720279, + "human_sexuality": 0.2857142857142857, + "international_law": 0.23243243243243245, + "journalism": 0.22093023255813954, + "jurisprudence": 0.2360097323600973, + "legal_and_moral_basis": 0.2336448598130841, + "logical": 0.2764227642276423, + "machine_learning": 0.16393442622950818, + "management": 0.29523809523809524, + "marketing": 0.26666666666666666, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.27586206896551724, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.26857142857142857, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.28448275862068967, + "public_relations": 0.22988505747126436, + "security_study": 0.2740740740740741, + "sociology": 0.22566371681415928, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.31891891891891894, + "virology": 0.28994082840236685, + "world_history": 0.2919254658385093, + "world_religions": 0.24375 + } + }, + "prompt_4": { + "accuracy": 0.25513728198929375, + "category_acc": { + "agronomy": 0.20710059171597633, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.29878048780487804, + "arts": 0.26875, + "astronomy": 0.26666666666666666, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.31875, + "chinese_driving_rule": 0.2366412213740458, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.18691588785046728, + "chinese_history": 0.24148606811145512, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.34579439252336447, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.16666666666666666, + "college_mathematics": 0.18095238095238095, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.2783882783882784, + "computer_science": 0.24509803921568626, + "computer_security": 0.17543859649122806, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.2302158273381295, + "economics": 0.2641509433962264, + "education": 0.24539877300613497, + "electrical_engineering": 0.20930232558139536, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.19747899159663865, + "elementary_mathematics": 0.2956521739130435, + "ethnology": 0.2740740740740741, + "food_science": 0.2517482517482518, + "genetics": 0.23295454545454544, + "global_facts": 0.2348993288590604, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.20454545454545456, + "high_school_geography": 0.2966101694915254, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.23776223776223776, + "human_sexuality": 0.2222222222222222, + "international_law": 0.2594594594594595, + "journalism": 0.28488372093023256, + "jurisprudence": 0.2749391727493917, + "legal_and_moral_basis": 0.2383177570093458, + "logical": 0.24390243902439024, + "machine_learning": 0.21311475409836064, + "management": 0.22380952380952382, + "marketing": 0.23333333333333334, + "marxist_theory": 0.26455026455026454, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.23448275862068965, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.33189655172413796, + "public_relations": 0.27011494252873564, + "security_study": 0.2962962962962963, + "sociology": 0.27876106194690264, + "sports_science": 0.21818181818181817, + "traditional_chinese_medicine": 0.31891891891891894, + "virology": 0.23076923076923078, + "world_history": 0.2670807453416149, + "world_religions": 0.25 + } + }, + "prompt_5": { + "accuracy": 0.2508202383008116, + "category_acc": { + "agronomy": 0.21893491124260356, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.22560975609756098, + "arts": 0.26875, + "astronomy": 0.20606060606060606, + "business_ethics": 0.215311004784689, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.24427480916030533, + "chinese_food_culture": 0.20588235294117646, + "chinese_foreign_policy": 0.205607476635514, + "chinese_history": 0.2693498452012384, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.205607476635514, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.28703703703703703, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.2271062271062271, + "computer_science": 0.19607843137254902, + "computer_security": 0.21637426900584794, + "conceptual_physics": 0.2108843537414966, + "construction_project_management": 0.2517985611510791, + "economics": 0.22012578616352202, + "education": 0.294478527607362, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.24206349206349206, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.2773109243697479, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.21481481481481482, + "food_science": 0.2937062937062937, + "genetics": 0.23863636363636365, + "global_facts": 0.2214765100671141, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.23015873015873015, + "international_law": 0.2756756756756757, + "journalism": 0.26744186046511625, + "jurisprudence": 0.24574209245742093, + "legal_and_moral_basis": 0.26635514018691586, + "logical": 0.2682926829268293, + "machine_learning": 0.22950819672131148, + "management": 0.2571428571428571, + "marketing": 0.23333333333333334, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.2, + "philosophy": 0.22857142857142856, + "professional_accounting": 0.3028571428571429, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.28191489361702127, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.22988505747126436, + "security_study": 0.24444444444444444, + "sociology": 0.2079646017699115, + "sports_science": 0.30303030303030304, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.30177514792899407, + "world_history": 0.2546583850931677, + "world_religions": 0.225 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18181818181818182 + }, + "prompt_2": { + "accuracy": 0.24242424242424243 + }, + "prompt_3": { + "accuracy": 0.18181818181818182 + }, + "prompt_4": { + "accuracy": 0.2727272727272727 + }, + "prompt_5": { + "accuracy": 0.18181818181818182 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3340909090909091 + }, + "prompt_2": { + "accuracy": 0.37272727272727274 + }, + "prompt_3": { + "accuracy": 0.37272727272727274 + }, + "prompt_4": { + "accuracy": 0.36363636363636365 + }, + "prompt_5": { + "accuracy": 0.3704545454545455 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34 + }, + "prompt_2": { + "accuracy": 0.3430508474576271 + }, + "prompt_3": { + "accuracy": 0.33322033898305087 + }, + "prompt_4": { + "accuracy": 0.3416949152542373 + }, + "prompt_5": { + "accuracy": 0.331864406779661 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2744951383694839 + }, + "prompt_2": { + "accuracy": 0.2643979057591623 + }, + "prompt_3": { + "accuracy": 0.2744951383694839 + }, + "prompt_4": { + "accuracy": 0.2868362004487659 + }, + "prompt_5": { + "accuracy": 0.2696335078534031 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9069083782459578 + }, + "prompt_2": { + "accuracy": 0.9000489955903969 + }, + "prompt_3": { + "accuracy": 0.9059284664380206 + }, + "prompt_4": { + "accuracy": 0.8716315531602156 + }, + "prompt_5": { + "accuracy": 0.8780009799118079 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.4788363680763906, + "rouge2": 0.22481737990763914, + "rougeL": 0.3799540117513252, + "avg_rouge": 0.3612025865784516 + }, + "prompt_2": { + "rouge1": 0.47528709100771566, + "rouge2": 0.2180754598747659, + "rougeL": 0.37394185523463974, + "avg_rouge": 0.3557681353723738 + }, + "prompt_3": { + "rouge1": 0.4816733382080628, + "rouge2": 0.22617795363343557, + "rougeL": 0.38221994005171656, + "avg_rouge": 0.3633570772977383 + }, + "prompt_4": { + "rouge1": 0.47508455033843777, + "rouge2": 0.21980134765172188, + "rougeL": 0.3743895491109724, + "avg_rouge": 0.3564251490337107 + }, + "prompt_5": { + "rouge1": 0.4813094609737152, + "rouge2": 0.22393372046328713, + "rougeL": 0.3805873270301869, + "avg_rouge": 0.36194350282239646 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.24529425370147764, + "rouge2": 0.056229595319873586, + "rougeL": 0.1975089344363329, + "avg_rouge": 0.16634426115256137 + }, + "prompt_2": { + "rouge1": 0.23375700045154926, + "rouge2": 0.05116835458859676, + "rougeL": 0.19262777986148805, + "avg_rouge": 0.1591843783005447 + }, + "prompt_3": { + "rouge1": 0.2619620961149525, + "rouge2": 0.05839804124867907, + "rougeL": 0.20534342932202432, + "avg_rouge": 0.175234522228552 + }, + "prompt_4": { + "rouge1": 0.23490349548727313, + "rouge2": 0.04996672938697106, + "rougeL": 0.19132152682344702, + "avg_rouge": 0.15873058389923042 + }, + "prompt_5": { + "rouge1": 0.25133924718407163, + "rouge2": 0.05456183915416433, + "rougeL": 0.19977202221366255, + "avg_rouge": 0.16855770285063285 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9518348623853211 + }, + "prompt_2": { + "accuracy": 0.9495412844036697 + }, + "prompt_3": { + "accuracy": 0.948394495412844 + }, + "prompt_4": { + "accuracy": 0.9541284403669725 + }, + "prompt_5": { + "accuracy": 0.9426605504587156 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8245445829338447 + }, + "prompt_2": { + "accuracy": 0.8245445829338447 + }, + "prompt_3": { + "accuracy": 0.8178331735378715 + }, + "prompt_4": { + "accuracy": 0.8159156279961649 + }, + "prompt_5": { + "accuracy": 0.8159156279961649 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.823 + }, + "prompt_2": { + "accuracy": 0.82 + }, + "prompt_3": { + "accuracy": 0.807 + }, + "prompt_4": { + "accuracy": 0.818 + }, + "prompt_5": { + "accuracy": 0.8155 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.876 + }, + "prompt_2": { + "accuracy": 0.872 + }, + "prompt_3": { + "accuracy": 0.878 + }, + "prompt_4": { + "accuracy": 0.863 + }, + "prompt_5": { + "accuracy": 0.8645 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8915 + }, + "prompt_2": { + "accuracy": 0.853 + }, + "prompt_3": { + "accuracy": 0.8865 + }, + "prompt_4": { + "accuracy": 0.92 + }, + "prompt_5": { + "accuracy": 0.9005 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7605633802816901 + }, + "prompt_2": { + "accuracy": 0.7887323943661971 + }, + "prompt_3": { + "accuracy": 0.7605633802816901 + }, + "prompt_4": { + "accuracy": 0.7323943661971831 + }, + "prompt_5": { + "accuracy": 0.7464788732394366 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8844765342960289 + }, + "prompt_2": { + "accuracy": 0.8772563176895307 + }, + "prompt_3": { + "accuracy": 0.8736462093862816 + }, + "prompt_4": { + "accuracy": 0.855595667870036 + }, + "prompt_5": { + "accuracy": 0.8736462093862816 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7965686274509803 + }, + "prompt_2": { + "accuracy": 0.8235294117647058 + }, + "prompt_3": { + "accuracy": 0.8063725490196079 + }, + "prompt_4": { + "accuracy": 0.6519607843137255 + }, + "prompt_5": { + "accuracy": 0.8235294117647058 + } } }, "five_shot": { @@ -10482,53 +89139,1733 @@ "model_link": "https://huggingface.co/mistralai/Mistral-7B-v0.1", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.5133333333333333, + "language_acc": { + "English": 0.6466666666666666, + "Vietnamese": 0.43333333333333335, + "Malay": 0.43333333333333335, + "Indonesian": 0.49333333333333335, + "Spanish": 0.6066666666666667, + "Chinese": 0.49333333333333335, + "Filipino": 0.4866666666666667 + }, + "consistency_score_2": 0.607936507936508, + "consistency_score_3": 0.4474285714285714, + "consistency_score_4": 0.3516190476190476, + "consistency_score_5": 0.2850793650793651, + "consistency_score_6": 0.23428571428571426, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.56, + "English,Malay": 0.5333333333333333, + "English,Indonesian": 0.6, + "English,Spanish": 0.72, + "English,Chinese": 0.6133333333333333, + "English,Filipino": 0.6066666666666667, + "Vietnamese,Malay": 0.58, + "Vietnamese,Indonesian": 0.62, + "Vietnamese,Spanish": 0.6, + "Vietnamese,Chinese": 0.6466666666666666, + "Vietnamese,Filipino": 0.5466666666666666, + "Malay,Indonesian": 0.6933333333333334, + "Malay,Spanish": 0.6066666666666667, + "Malay,Chinese": 0.5666666666666667, + "Malay,Filipino": 0.6066666666666667, + "Indonesian,Spanish": 0.66, + "Indonesian,Chinese": 0.58, + "Indonesian,Filipino": 0.58, + "Spanish,Chinese": 0.66, + "Spanish,Filipino": 0.6133333333333333, + "Chinese,Filipino": 0.5733333333333334 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.38, + "English,Vietnamese,Indonesian": 0.43333333333333335, + "English,Vietnamese,Spanish": 0.48, + "English,Vietnamese,Chinese": 0.4666666666666667, + "English,Vietnamese,Filipino": 0.4, + "English,Malay,Indonesian": 0.44666666666666666, + "English,Malay,Spanish": 0.4533333333333333, + "English,Malay,Chinese": 0.4, + "English,Malay,Filipino": 0.4066666666666667, + "English,Indonesian,Spanish": 0.5133333333333333, + "English,Indonesian,Chinese": 0.43333333333333335, + "English,Indonesian,Filipino": 0.4266666666666667, + "English,Spanish,Chinese": 0.52, + "English,Spanish,Filipino": 0.4866666666666667, + "English,Chinese,Filipino": 0.4266666666666667, + "Vietnamese,Malay,Indonesian": 0.48, + "Vietnamese,Malay,Spanish": 0.43333333333333335, + "Vietnamese,Malay,Chinese": 0.4266666666666667, + "Vietnamese,Malay,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian,Spanish": 0.4866666666666667, + "Vietnamese,Indonesian,Chinese": 0.4666666666666667, + "Vietnamese,Indonesian,Filipino": 0.42, + "Vietnamese,Spanish,Chinese": 0.4866666666666667, + "Vietnamese,Spanish,Filipino": 0.41333333333333333, + "Vietnamese,Chinese,Filipino": 0.43333333333333335, + "Malay,Indonesian,Spanish": 0.5066666666666667, + "Malay,Indonesian,Chinese": 0.44, + "Malay,Indonesian,Filipino": 0.4666666666666667, + "Malay,Spanish,Chinese": 0.44666666666666666, + "Malay,Spanish,Filipino": 0.44666666666666666, + "Malay,Chinese,Filipino": 0.42, + "Indonesian,Spanish,Chinese": 0.4866666666666667, + "Indonesian,Spanish,Filipino": 0.44666666666666666, + "Indonesian,Chinese,Filipino": 0.41333333333333333, + "Spanish,Chinese,Filipino": 0.47333333333333333 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.34, + "English,Vietnamese,Malay,Spanish": 0.3466666666666667, + "English,Vietnamese,Malay,Chinese": 0.32666666666666666, + "English,Vietnamese,Malay,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.3933333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "English,Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.4066666666666667, + "English,Vietnamese,Spanish,Filipino": 0.34, + "English,Vietnamese,Chinese,Filipino": 0.3466666666666667, + "English,Malay,Indonesian,Spanish": 0.38, + "English,Malay,Indonesian,Chinese": 0.32666666666666666, + "English,Malay,Indonesian,Filipino": 0.32666666666666666, + "English,Malay,Spanish,Chinese": 0.35333333333333333, + "English,Malay,Spanish,Filipino": 0.34, + "English,Malay,Chinese,Filipino": 0.32, + "English,Indonesian,Spanish,Chinese": 0.38666666666666666, + "English,Indonesian,Spanish,Filipino": 0.35333333333333333, + "English,Indonesian,Chinese,Filipino": 0.31333333333333335, + "English,Spanish,Chinese,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.4, + "Vietnamese,Malay,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,Filipino": 0.34, + "Vietnamese,Malay,Spanish,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.32, + "Vietnamese,Malay,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese": 0.4, + "Vietnamese,Indonesian,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.3466666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.36, + "Malay,Indonesian,Spanish,Chinese": 0.37333333333333335, + "Malay,Indonesian,Spanish,Filipino": 0.36666666666666664, + "Malay,Indonesian,Chinese,Filipino": 0.3466666666666667, + "Malay,Spanish,Chinese,Filipino": 0.36, + "Indonesian,Spanish,Chinese,Filipino": 0.35333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.31333333333333335, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.2866666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Spanish,Chinese": 0.29333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Chinese,Filipino": 0.25333333333333335, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.3333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.28, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.3, + "English,Malay,Indonesian,Spanish,Chinese": 0.2866666666666667, + "English,Malay,Indonesian,Spanish,Filipino": 0.2733333333333333, + "English,Malay,Indonesian,Chinese,Filipino": 0.25333333333333335, + "English,Malay,Spanish,Chinese,Filipino": 0.28, + "English,Indonesian,Spanish,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.32, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.28, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.3, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.3 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.26, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.22666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.22, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.22, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.24666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.19333333333333333 + } + }, + "AC3_2": 0.5566440165603975, + "AC3_3": 0.4781205392047951, + "AC3_4": 0.41735887831921453, + "AC3_5": 0.3665791914716478, + "AC3_6": 0.32173248403339877, + "AC3_7": 0.2808805031049068 + }, + "prompt_2": { + "overall_acc": 0.4885714285714286, + "language_acc": { + "English": 0.5733333333333334, + "Vietnamese": 0.4533333333333333, + "Malay": 0.4, + "Indonesian": 0.46, + "Spanish": 0.56, + "Chinese": 0.4666666666666667, + "Filipino": 0.5066666666666667 + }, + "consistency_score_2": 0.5901587301587302, + "consistency_score_3": 0.43676190476190474, + "consistency_score_4": 0.3533333333333332, + "consistency_score_5": 0.29809523809523814, + "consistency_score_6": 0.2580952380952381, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5733333333333334, + "English,Malay": 0.58, + "English,Indonesian": 0.6, + "English,Spanish": 0.7266666666666667, + "English,Chinese": 0.62, + "English,Filipino": 0.58, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Indonesian": 0.5866666666666667, + "Vietnamese,Spanish": 0.5733333333333334, + "Vietnamese,Chinese": 0.48, + "Vietnamese,Filipino": 0.5933333333333334, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Spanish": 0.6533333333333333, + "Malay,Chinese": 0.52, + "Malay,Filipino": 0.5866666666666667, + "Indonesian,Spanish": 0.6, + "Indonesian,Chinese": 0.52, + "Indonesian,Filipino": 0.5733333333333334, + "Spanish,Chinese": 0.6, + "Spanish,Filipino": 0.6066666666666667, + "Chinese,Filipino": 0.56 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.43333333333333335, + "English,Vietnamese,Indonesian": 0.44, + "English,Vietnamese,Spanish": 0.47333333333333333, + "English,Vietnamese,Chinese": 0.38, + "English,Vietnamese,Filipino": 0.4066666666666667, + "English,Malay,Indonesian": 0.46, + "English,Malay,Spanish": 0.5133333333333333, + "English,Malay,Chinese": 0.4066666666666667, + "English,Malay,Filipino": 0.43333333333333335, + "English,Indonesian,Spanish": 0.5, + "English,Indonesian,Chinese": 0.4266666666666667, + "English,Indonesian,Filipino": 0.4266666666666667, + "English,Spanish,Chinese": 0.49333333333333335, + "English,Spanish,Filipino": 0.4866666666666667, + "English,Chinese,Filipino": 0.43333333333333335, + "Vietnamese,Malay,Indonesian": 0.4666666666666667, + "Vietnamese,Malay,Spanish": 0.4666666666666667, + "Vietnamese,Malay,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Filipino": 0.4266666666666667, + "Vietnamese,Indonesian,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Indonesian,Filipino": 0.43333333333333335, + "Vietnamese,Spanish,Chinese": 0.38, + "Vietnamese,Spanish,Filipino": 0.4266666666666667, + "Vietnamese,Chinese,Filipino": 0.4, + "Malay,Indonesian,Spanish": 0.5, + "Malay,Indonesian,Chinese": 0.4066666666666667, + "Malay,Indonesian,Filipino": 0.46, + "Malay,Spanish,Chinese": 0.4266666666666667, + "Malay,Spanish,Filipino": 0.47333333333333333, + "Malay,Chinese,Filipino": 0.4066666666666667, + "Indonesian,Spanish,Chinese": 0.4266666666666667, + "Indonesian,Spanish,Filipino": 0.44666666666666666, + "Indonesian,Chinese,Filipino": 0.4066666666666667, + "Spanish,Chinese,Filipino": 0.43333333333333335 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.37333333333333335, + "English,Vietnamese,Malay,Spanish": 0.4066666666666667, + "English,Vietnamese,Malay,Chinese": 0.3, + "English,Vietnamese,Malay,Filipino": 0.3333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.38666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.32, + "English,Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.3333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.36, + "English,Vietnamese,Chinese,Filipino": 0.32, + "English,Malay,Indonesian,Spanish": 0.42, + "English,Malay,Indonesian,Chinese": 0.34, + "English,Malay,Indonesian,Filipino": 0.36666666666666664, + "English,Malay,Spanish,Chinese": 0.37333333333333335, + "English,Malay,Spanish,Filipino": 0.4, + "English,Malay,Chinese,Filipino": 0.35333333333333333, + "English,Indonesian,Spanish,Chinese": 0.37333333333333335, + "English,Indonesian,Spanish,Filipino": 0.38666666666666666, + "English,Indonesian,Chinese,Filipino": 0.3466666666666667, + "English,Spanish,Chinese,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.3933333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Filipino": 0.36, + "Vietnamese,Malay,Spanish,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Malay,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Indonesian,Spanish,Chinese": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese,Filipino": 0.32666666666666666, + "Malay,Indonesian,Spanish,Chinese": 0.36, + "Malay,Indonesian,Spanish,Filipino": 0.3933333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.3466666666666667, + "Malay,Spanish,Chinese,Filipino": 0.3466666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.34 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.3466666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.26666666666666666, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.2866666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.32, + "English,Vietnamese,Malay,Chinese,Filipino": 0.26666666666666666, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.2866666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.30666666666666664, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.2866666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.32, + "English,Malay,Indonesian,Spanish,Filipino": 0.3466666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "English,Malay,Spanish,Chinese,Filipino": 0.32, + "English,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.28, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.28, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.24, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.25333333333333335, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.24666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.22666666666666666 + } + }, + "AC3_2": 0.5345816866569095, + "AC3_3": 0.46121625590498827, + "AC3_4": 0.41009049768884664, + "AC3_5": 0.3702732618000431, + "AC3_6": 0.3377623906253179, + "AC3_7": 0.3096671104760119 + }, + "prompt_3": { + "overall_acc": 0.4961904761904762, + "language_acc": { + "English": 0.6, + "Vietnamese": 0.46, + "Malay": 0.36666666666666664, + "Indonesian": 0.5133333333333333, + "Spanish": 0.5733333333333334, + "Chinese": 0.4533333333333333, + "Filipino": 0.5066666666666667 + }, + "consistency_score_2": 0.5993650793650793, + "consistency_score_3": 0.44533333333333325, + "consistency_score_4": 0.36190476190476184, + "consistency_score_5": 0.3085714285714286, + "consistency_score_6": 0.2704761904761905, + "consistency_score_7": 0.24, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6333333333333333, + "English,Malay": 0.5333333333333333, + "English,Indonesian": 0.64, + "English,Spanish": 0.72, + "English,Chinese": 0.6133333333333333, + "English,Filipino": 0.64, + "Vietnamese,Malay": 0.56, + "Vietnamese,Indonesian": 0.6133333333333333, + "Vietnamese,Spanish": 0.5733333333333334, + "Vietnamese,Chinese": 0.5266666666666666, + "Vietnamese,Filipino": 0.6266666666666667, + "Malay,Indonesian": 0.68, + "Malay,Spanish": 0.5933333333333334, + "Malay,Chinese": 0.5533333333333333, + "Malay,Filipino": 0.5133333333333333, + "Indonesian,Spanish": 0.6266666666666667, + "Indonesian,Chinese": 0.5666666666666667, + "Indonesian,Filipino": 0.5933333333333334, + "Spanish,Chinese": 0.62, + "Spanish,Filipino": 0.6133333333333333, + "Chinese,Filipino": 0.5466666666666666 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.41333333333333333, + "English,Vietnamese,Indonesian": 0.49333333333333335, + "English,Vietnamese,Spanish": 0.5066666666666667, + "English,Vietnamese,Chinese": 0.44, + "English,Vietnamese,Filipino": 0.5, + "English,Malay,Indonesian": 0.4533333333333333, + "English,Malay,Spanish": 0.46, + "English,Malay,Chinese": 0.3933333333333333, + "English,Malay,Filipino": 0.38666666666666666, + "English,Indonesian,Spanish": 0.5266666666666666, + "English,Indonesian,Chinese": 0.4666666666666667, + "English,Indonesian,Filipino": 0.47333333333333333, + "English,Spanish,Chinese": 0.5066666666666667, + "English,Spanish,Filipino": 0.5266666666666666, + "English,Chinese,Filipino": 0.44, + "Vietnamese,Malay,Indonesian": 0.47333333333333333, + "Vietnamese,Malay,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Chinese": 0.38, + "Vietnamese,Malay,Filipino": 0.4066666666666667, + "Vietnamese,Indonesian,Spanish": 0.47333333333333333, + "Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Vietnamese,Indonesian,Filipino": 0.46, + "Vietnamese,Spanish,Chinese": 0.42, + "Vietnamese,Spanish,Filipino": 0.4533333333333333, + "Vietnamese,Chinese,Filipino": 0.4266666666666667, + "Malay,Indonesian,Spanish": 0.48, + "Malay,Indonesian,Chinese": 0.4266666666666667, + "Malay,Indonesian,Filipino": 0.43333333333333335, + "Malay,Spanish,Chinese": 0.4066666666666667, + "Malay,Spanish,Filipino": 0.41333333333333333, + "Malay,Chinese,Filipino": 0.36, + "Indonesian,Spanish,Chinese": 0.44, + "Indonesian,Spanish,Filipino": 0.4533333333333333, + "Indonesian,Chinese,Filipino": 0.4066666666666667, + "Spanish,Chinese,Filipino": 0.43333333333333335 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.37333333333333335, + "English,Vietnamese,Malay,Spanish": 0.38, + "English,Vietnamese,Malay,Chinese": 0.32, + "English,Vietnamese,Malay,Filipino": 0.34, + "English,Vietnamese,Indonesian,Spanish": 0.43333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.38666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.4, + "English,Vietnamese,Spanish,Chinese": 0.38666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.42, + "English,Vietnamese,Chinese,Filipino": 0.38, + "English,Malay,Indonesian,Spanish": 0.3933333333333333, + "English,Malay,Indonesian,Chinese": 0.3333333333333333, + "English,Malay,Indonesian,Filipino": 0.3466666666666667, + "English,Malay,Spanish,Chinese": 0.34, + "English,Malay,Spanish,Filipino": 0.36666666666666664, + "English,Malay,Chinese,Filipino": 0.3, + "English,Indonesian,Spanish,Chinese": 0.3933333333333333, + "English,Indonesian,Spanish,Filipino": 0.41333333333333333, + "English,Indonesian,Chinese,Filipino": 0.37333333333333335, + "English,Spanish,Chinese,Filipino": 0.4, + "Vietnamese,Malay,Indonesian,Spanish": 0.38, + "Vietnamese,Malay,Indonesian,Chinese": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Filipino": 0.36, + "Vietnamese,Malay,Spanish,Chinese": 0.32666666666666666, + "Vietnamese,Malay,Spanish,Filipino": 0.34, + "Vietnamese,Malay,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese": 0.36, + "Vietnamese,Indonesian,Spanish,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese,Filipino": 0.36666666666666664, + "Vietnamese,Spanish,Chinese,Filipino": 0.36666666666666664, + "Malay,Indonesian,Spanish,Chinese": 0.34, + "Malay,Indonesian,Spanish,Filipino": 0.36, + "Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese,Filipino": 0.31333333333333335, + "Indonesian,Spanish,Chinese,Filipino": 0.3466666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.34, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.2866666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.30666666666666664, + "English,Vietnamese,Malay,Spanish,Chinese": 0.3, + "English,Vietnamese,Malay,Spanish,Filipino": 0.32, + "English,Vietnamese,Malay,Chinese,Filipino": 0.28, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.34, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.36, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.34, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.3466666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.29333333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.32666666666666666, + "English,Malay,Indonesian,Chinese,Filipino": 0.2733333333333333, + "English,Malay,Spanish,Chinese,Filipino": 0.2866666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.28, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.28, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.32, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.2733333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.26666666666666666, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.26666666666666666, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.26, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.24 + } + }, + "AC3_2": 0.5429195125830828, + "AC3_3": 0.4693883605948285, + "AC3_4": 0.4185402462384186, + "AC3_5": 0.38051056630943786, + "AC3_6": 0.35010706886885623, + "AC3_7": 0.32351875804143665 + }, + "prompt_4": { + "overall_acc": 0.5219047619047619, + "language_acc": { + "English": 0.66, + "Vietnamese": 0.47333333333333333, + "Malay": 0.42, + "Indonesian": 0.5, + "Spanish": 0.6066666666666667, + "Chinese": 0.47333333333333333, + "Filipino": 0.52 + }, + "consistency_score_2": 0.6253968253968254, + "consistency_score_3": 0.47828571428571426, + "consistency_score_4": 0.3906666666666666, + "consistency_score_5": 0.3292063492063492, + "consistency_score_6": 0.28285714285714286, + "consistency_score_7": 0.24666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6066666666666667, + "English,Malay": 0.6, + "English,Indonesian": 0.68, + "English,Spanish": 0.7533333333333333, + "English,Chinese": 0.5933333333333334, + "English,Filipino": 0.6266666666666667, + "Vietnamese,Malay": 0.6266666666666667, + "Vietnamese,Indonesian": 0.6533333333333333, + "Vietnamese,Spanish": 0.6333333333333333, + "Vietnamese,Chinese": 0.6133333333333333, + "Vietnamese,Filipino": 0.5866666666666667, + "Malay,Indonesian": 0.7066666666666667, + "Malay,Spanish": 0.6133333333333333, + "Malay,Chinese": 0.54, + "Malay,Filipino": 0.5666666666666667, + "Indonesian,Spanish": 0.66, + "Indonesian,Chinese": 0.58, + "Indonesian,Filipino": 0.6266666666666667, + "Spanish,Chinese": 0.66, + "Spanish,Filipino": 0.6133333333333333, + "Chinese,Filipino": 0.5933333333333334 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.4533333333333333, + "English,Vietnamese,Indonesian": 0.5133333333333333, + "English,Vietnamese,Spanish": 0.5333333333333333, + "English,Vietnamese,Chinese": 0.47333333333333333, + "English,Vietnamese,Filipino": 0.4533333333333333, + "English,Malay,Indonesian": 0.52, + "English,Malay,Spanish": 0.5133333333333333, + "English,Malay,Chinese": 0.41333333333333333, + "English,Malay,Filipino": 0.44, + "English,Indonesian,Spanish": 0.5666666666666667, + "English,Indonesian,Chinese": 0.46, + "English,Indonesian,Filipino": 0.5133333333333333, + "English,Spanish,Chinese": 0.52, + "English,Spanish,Filipino": 0.5133333333333333, + "English,Chinese,Filipino": 0.44666666666666666, + "Vietnamese,Malay,Indonesian": 0.5333333333333333, + "Vietnamese,Malay,Spanish": 0.4866666666666667, + "Vietnamese,Malay,Chinese": 0.4666666666666667, + "Vietnamese,Malay,Filipino": 0.43333333333333335, + "Vietnamese,Indonesian,Spanish": 0.5133333333333333, + "Vietnamese,Indonesian,Chinese": 0.47333333333333333, + "Vietnamese,Indonesian,Filipino": 0.47333333333333333, + "Vietnamese,Spanish,Chinese": 0.5, + "Vietnamese,Spanish,Filipino": 0.4533333333333333, + "Vietnamese,Chinese,Filipino": 0.44, + "Malay,Indonesian,Spanish": 0.5133333333333333, + "Malay,Indonesian,Chinese": 0.4533333333333333, + "Malay,Indonesian,Filipino": 0.48, + "Malay,Spanish,Chinese": 0.44666666666666666, + "Malay,Spanish,Filipino": 0.44, + "Malay,Chinese,Filipino": 0.3933333333333333, + "Indonesian,Spanish,Chinese": 0.4866666666666667, + "Indonesian,Spanish,Filipino": 0.49333333333333335, + "Indonesian,Chinese,Filipino": 0.44666666666666666, + "Spanish,Chinese,Filipino": 0.48 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.41333333333333333, + "English,Vietnamese,Malay,Spanish": 0.42, + "English,Vietnamese,Malay,Chinese": 0.36666666666666664, + "English,Vietnamese,Malay,Filipino": 0.34, + "English,Vietnamese,Indonesian,Spanish": 0.4533333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.4, + "English,Vietnamese,Indonesian,Filipino": 0.4, + "English,Vietnamese,Spanish,Chinese": 0.44, + "English,Vietnamese,Spanish,Filipino": 0.4, + "English,Vietnamese,Chinese,Filipino": 0.36, + "English,Malay,Indonesian,Spanish": 0.44666666666666666, + "English,Malay,Indonesian,Chinese": 0.36666666666666664, + "English,Malay,Indonesian,Filipino": 0.38666666666666666, + "English,Malay,Spanish,Chinese": 0.38, + "English,Malay,Spanish,Filipino": 0.38666666666666666, + "English,Malay,Chinese,Filipino": 0.32, + "English,Indonesian,Spanish,Chinese": 0.4066666666666667, + "English,Indonesian,Spanish,Filipino": 0.43333333333333335, + "English,Indonesian,Chinese,Filipino": 0.37333333333333335, + "English,Spanish,Chinese,Filipino": 0.4, + "Vietnamese,Malay,Indonesian,Spanish": 0.44, + "Vietnamese,Malay,Indonesian,Chinese": 0.4, + "Vietnamese,Malay,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Malay,Spanish,Chinese": 0.4066666666666667, + "Vietnamese,Malay,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Malay,Chinese,Filipino": 0.3466666666666667, + "Vietnamese,Indonesian,Spanish,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.37333333333333335, + "Vietnamese,Spanish,Chinese,Filipino": 0.38, + "Malay,Indonesian,Spanish,Chinese": 0.38666666666666666, + "Malay,Indonesian,Spanish,Filipino": 0.3933333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.3466666666666667, + "Malay,Spanish,Chinese,Filipino": 0.35333333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.3933333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.38, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.32666666666666666, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.31333333333333335, + "English,Vietnamese,Malay,Spanish,Chinese": 0.35333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.32, + "English,Vietnamese,Malay,Chinese,Filipino": 0.28, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.36666666666666664, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.35333333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.32, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.3333333333333333, + "English,Malay,Indonesian,Spanish,Chinese": 0.3333333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.3466666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.2866666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.3, + "English,Indonesian,Spanish,Chinese,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.36, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.32, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.3333333333333333, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.31333333333333335 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.31333333333333335, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.29333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.2733333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.29333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.2866666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.24666666666666667 + } + }, + "AC3_2": 0.5689830552643973, + "AC3_3": 0.4991441085552587, + "AC3_4": 0.4468489528488285, + "AC3_5": 0.40374131893446386, + "AC3_6": 0.3668774302164573, + "AC3_7": 0.33500206521870063 + }, + "prompt_5": { + "overall_acc": 0.5228571428571429, + "language_acc": { + "English": 0.64, + "Vietnamese": 0.4666666666666667, + "Malay": 0.44, + "Indonesian": 0.5133333333333333, + "Spanish": 0.5933333333333334, + "Chinese": 0.5, + "Filipino": 0.5066666666666667 + }, + "consistency_score_2": 0.6333333333333333, + "consistency_score_3": 0.4855238095238096, + "consistency_score_4": 0.40114285714285713, + "consistency_score_5": 0.3453968253968255, + "consistency_score_6": 0.30666666666666664, + "consistency_score_7": 0.28, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6133333333333333, + "English,Malay": 0.6266666666666667, + "English,Indonesian": 0.66, + "English,Spanish": 0.7666666666666667, + "English,Chinese": 0.6066666666666667, + "English,Filipino": 0.66, + "Vietnamese,Malay": 0.6, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Spanish": 0.6466666666666666, + "Vietnamese,Chinese": 0.6, + "Vietnamese,Filipino": 0.6066666666666667, + "Malay,Indonesian": 0.7066666666666667, + "Malay,Spanish": 0.6266666666666667, + "Malay,Chinese": 0.5533333333333333, + "Malay,Filipino": 0.6066666666666667, + "Indonesian,Spanish": 0.6666666666666666, + "Indonesian,Chinese": 0.58, + "Indonesian,Filipino": 0.6133333333333333, + "Spanish,Chinese": 0.7, + "Spanish,Filipino": 0.66, + "Chinese,Filipino": 0.5733333333333334 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.4533333333333333, + "English,Vietnamese,Indonesian": 0.48, + "English,Vietnamese,Spanish": 0.5333333333333333, + "English,Vietnamese,Chinese": 0.4666666666666667, + "English,Vietnamese,Filipino": 0.4666666666666667, + "English,Malay,Indonesian": 0.5333333333333333, + "English,Malay,Spanish": 0.5266666666666666, + "English,Malay,Chinese": 0.42, + "English,Malay,Filipino": 0.47333333333333333, + "English,Indonesian,Spanish": 0.5666666666666667, + "English,Indonesian,Chinese": 0.46, + "English,Indonesian,Filipino": 0.5, + "English,Spanish,Chinese": 0.5533333333333333, + "English,Spanish,Filipino": 0.5666666666666667, + "English,Chinese,Filipino": 0.4533333333333333, + "Vietnamese,Malay,Indonesian": 0.5, + "Vietnamese,Malay,Spanish": 0.4666666666666667, + "Vietnamese,Malay,Chinese": 0.44, + "Vietnamese,Malay,Filipino": 0.44666666666666666, + "Vietnamese,Indonesian,Spanish": 0.5, + "Vietnamese,Indonesian,Chinese": 0.46, + "Vietnamese,Indonesian,Filipino": 0.4666666666666667, + "Vietnamese,Spanish,Chinese": 0.5066666666666667, + "Vietnamese,Spanish,Filipino": 0.48, + "Vietnamese,Chinese,Filipino": 0.44666666666666666, + "Malay,Indonesian,Spanish": 0.5266666666666666, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Indonesian,Filipino": 0.49333333333333335, + "Malay,Spanish,Chinese": 0.4666666666666667, + "Malay,Spanish,Filipino": 0.4866666666666667, + "Malay,Chinese,Filipino": 0.41333333333333333, + "Indonesian,Spanish,Chinese": 0.5133333333333333, + "Indonesian,Spanish,Filipino": 0.5066666666666667, + "Indonesian,Chinese,Filipino": 0.44666666666666666, + "Spanish,Chinese,Filipino": 0.5133333333333333 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.41333333333333333, + "English,Vietnamese,Malay,Spanish": 0.41333333333333333, + "English,Vietnamese,Malay,Chinese": 0.36, + "English,Vietnamese,Malay,Filipino": 0.36666666666666664, + "English,Vietnamese,Indonesian,Spanish": 0.44, + "English,Vietnamese,Indonesian,Chinese": 0.38, + "English,Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.44666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.42, + "English,Vietnamese,Chinese,Filipino": 0.38, + "English,Malay,Indonesian,Spanish": 0.46, + "English,Malay,Indonesian,Chinese": 0.36666666666666664, + "English,Malay,Indonesian,Filipino": 0.41333333333333333, + "English,Malay,Spanish,Chinese": 0.4, + "English,Malay,Spanish,Filipino": 0.43333333333333335, + "English,Malay,Chinese,Filipino": 0.34, + "English,Indonesian,Spanish,Chinese": 0.44, + "English,Indonesian,Spanish,Filipino": 0.44666666666666666, + "English,Indonesian,Chinese,Filipino": 0.37333333333333335, + "English,Spanish,Chinese,Filipino": 0.43333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.38666666666666666, + "Vietnamese,Malay,Indonesian,Filipino": 0.3933333333333333, + "Vietnamese,Malay,Spanish,Chinese": 0.3933333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Malay,Chinese,Filipino": 0.3466666666666667, + "Vietnamese,Indonesian,Spanish,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.37333333333333335, + "Vietnamese,Spanish,Chinese,Filipino": 0.41333333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.41333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.4266666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.36666666666666664, + "Malay,Spanish,Chinese,Filipino": 0.3933333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.42 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.38666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.32666666666666666, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.34, + "English,Vietnamese,Malay,Spanish,Chinese": 0.35333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.34, + "English,Vietnamese,Malay,Chinese,Filipino": 0.3, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.36666666666666664, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.35333333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.32, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.36666666666666664, + "English,Malay,Indonesian,Spanish,Chinese": 0.36, + "English,Malay,Indonesian,Spanish,Filipino": 0.38, + "English,Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "English,Malay,Spanish,Chinese,Filipino": 0.34, + "English,Indonesian,Spanish,Chinese,Filipino": 0.36, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.36, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.32, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.3466666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.35333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.32666666666666666, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.32, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.28, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.3, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.28 + } + }, + "AC3_2": 0.5728171333936196, + "AC3_3": 0.5034993793013557, + "AC3_4": 0.45398356740382284, + "AC3_5": 0.41599164267864924, + "AC3_6": 0.386590126245015, + "AC3_7": 0.36469750885137225 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4025974025974026, + "language_acc": { + "Vietnamese": 0.39204545454545453, + "Indonesian": 0.42045454545454547, + "Malay": 0.36363636363636365, + "English": 0.48863636363636365, + "Spanish": 0.4147727272727273, + "Filipino": 0.3068181818181818, + "Chinese": 0.4318181818181818 + }, + "consistency_score_2": 0.49377705627705626, + "consistency_score_3": 0.30649350649350643, + "consistency_score_4": 0.21461038961038956, + "consistency_score_5": 0.1636904761904762, + "consistency_score_6": 0.13311688311688313, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4943181818181818, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,English": 0.44886363636363635, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Filipino": 0.3977272727272727, + "Vietnamese,Chinese": 0.4375, + "Indonesian,Malay": 0.6306818181818182, + "Indonesian,English": 0.5170454545454546, + "Indonesian,Spanish": 0.5454545454545454, + "Indonesian,Filipino": 0.4772727272727273, + "Indonesian,Chinese": 0.48295454545454547, + "Malay,English": 0.5056818181818182, + "Malay,Spanish": 0.5568181818181818, + "Malay,Filipino": 0.5056818181818182, + "Malay,Chinese": 0.48295454545454547, + "English,Spanish": 0.5625, + "English,Filipino": 0.48295454545454547, + "English,Chinese": 0.5227272727272727, + "Spanish,Filipino": 0.4090909090909091, + "Spanish,Chinese": 0.5170454545454546, + "Filipino,Chinese": 0.4147727272727273 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,English": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Indonesian,Filipino": 0.25, + "Vietnamese,Indonesian,Chinese": 0.2840909090909091, + "Vietnamese,Malay,English": 0.2897727272727273, + "Vietnamese,Malay,Spanish": 0.3352272727272727, + "Vietnamese,Malay,Filipino": 0.2556818181818182, + "Vietnamese,Malay,Chinese": 0.2784090909090909, + "Vietnamese,English,Spanish": 0.3181818181818182, + "Vietnamese,English,Filipino": 0.23295454545454544, + "Vietnamese,English,Chinese": 0.2784090909090909, + "Vietnamese,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,Spanish,Chinese": 0.2897727272727273, + "Vietnamese,Filipino,Chinese": 0.2159090909090909, + "Indonesian,Malay,English": 0.375, + "Indonesian,Malay,Spanish": 0.4090909090909091, + "Indonesian,Malay,Filipino": 0.36363636363636365, + "Indonesian,Malay,Chinese": 0.36363636363636365, + "Indonesian,English,Spanish": 0.35795454545454547, + "Indonesian,English,Filipino": 0.3068181818181818, + "Indonesian,English,Chinese": 0.32386363636363635, + "Indonesian,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Spanish,Chinese": 0.3465909090909091, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Malay,English,Spanish": 0.35795454545454547, + "Malay,English,Filipino": 0.30113636363636365, + "Malay,English,Chinese": 0.32386363636363635, + "Malay,Spanish,Filipino": 0.30113636363636365, + "Malay,Spanish,Chinese": 0.3465909090909091, + "Malay,Filipino,Chinese": 0.2897727272727273, + "English,Spanish,Filipino": 0.2897727272727273, + "English,Spanish,Chinese": 0.35795454545454547, + "English,Filipino,Chinese": 0.2784090909090909, + "Spanish,Filipino,Chinese": 0.23863636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,Malay,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,English,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,English,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,English,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish": 0.24431818181818182, + "Vietnamese,Malay,English,Filipino": 0.17045454545454544, + "Vietnamese,Malay,English,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Spanish,Filipino": 0.18181818181818182, + "Vietnamese,Malay,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,Malay,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,English,Spanish,Filipino": 0.17613636363636365, + "Vietnamese,English,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,English,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Spanish,Filipino,Chinese": 0.1534090909090909, + "Indonesian,Malay,English,Spanish": 0.2840909090909091, + "Indonesian,Malay,English,Filipino": 0.25, + "Indonesian,Malay,English,Chinese": 0.26704545454545453, + "Indonesian,Malay,Spanish,Filipino": 0.23863636363636365, + "Indonesian,Malay,Spanish,Chinese": 0.2784090909090909, + "Indonesian,Malay,Filipino,Chinese": 0.23863636363636365, + "Indonesian,English,Spanish,Filipino": 0.2159090909090909, + "Indonesian,English,Spanish,Chinese": 0.26136363636363635, + "Indonesian,English,Filipino,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Filipino,Chinese": 0.19886363636363635, + "Malay,English,Spanish,Filipino": 0.2215909090909091, + "Malay,English,Spanish,Chinese": 0.24431818181818182, + "Malay,English,Filipino,Chinese": 0.21022727272727273, + "Malay,Spanish,Filipino,Chinese": 0.19886363636363635, + "English,Spanish,Filipino,Chinese": 0.19318181818181818 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,English,Spanish,Filipino": 0.14772727272727273, + "Vietnamese,Malay,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.19318181818181818, + "Indonesian,Malay,English,Spanish,Chinese": 0.2159090909090909, + "Indonesian,Malay,English,Filipino,Chinese": 0.1875, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.18181818181818182, + "Indonesian,English,Spanish,Filipino,Chinese": 0.16477272727272727, + "Malay,English,Spanish,Filipino,Chinese": 0.1590909090909091 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.125, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.11363636363636363, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11363636363636363 + } + }, + "AC3_2": 0.44354980963374085, + "AC3_3": 0.3480329194124093, + "AC3_4": 0.279975679193863, + "AC3_5": 0.2327486178006848, + "AC3_6": 0.20007870913227024, + "AC3_7": 0.17724413947395637 + }, + "prompt_2": { + "overall_acc": 0.4383116883116883, + "language_acc": { + "Vietnamese": 0.4034090909090909, + "Indonesian": 0.4715909090909091, + "Malay": 0.4090909090909091, + "English": 0.5170454545454546, + "Spanish": 0.48863636363636365, + "Filipino": 0.32386363636363635, + "Chinese": 0.45454545454545453 + }, + "consistency_score_2": 0.5373376623376623, + "consistency_score_3": 0.3564935064935064, + "consistency_score_4": 0.25762987012987015, + "consistency_score_5": 0.19453463203463203, + "consistency_score_6": 0.15097402597402598, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5284090909090909, + "Vietnamese,Malay": 0.5056818181818182, + "Vietnamese,English": 0.5454545454545454, + "Vietnamese,Spanish": 0.5795454545454546, + "Vietnamese,Filipino": 0.5056818181818182, + "Vietnamese,Chinese": 0.4943181818181818, + "Indonesian,Malay": 0.6477272727272727, + "Indonesian,English": 0.5625, + "Indonesian,Spanish": 0.6306818181818182, + "Indonesian,Filipino": 0.5568181818181818, + "Indonesian,Chinese": 0.48295454545454547, + "Malay,English": 0.5454545454545454, + "Malay,Spanish": 0.5625, + "Malay,Filipino": 0.5454545454545454, + "Malay,Chinese": 0.5170454545454546, + "English,Spanish": 0.6875, + "English,Filipino": 0.4375, + "English,Chinese": 0.5284090909090909, + "Spanish,Filipino": 0.48295454545454547, + "Spanish,Chinese": 0.5340909090909091, + "Filipino,Chinese": 0.4034090909090909 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.38636363636363635, + "Vietnamese,Indonesian,English": 0.35795454545454547, + "Vietnamese,Indonesian,Spanish": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.3409090909090909, + "Vietnamese,Indonesian,Chinese": 0.3125, + "Vietnamese,Malay,English": 0.35795454545454547, + "Vietnamese,Malay,Spanish": 0.3806818181818182, + "Vietnamese,Malay,Filipino": 0.32386363636363635, + "Vietnamese,Malay,Chinese": 0.3125, + "Vietnamese,English,Spanish": 0.4318181818181818, + "Vietnamese,English,Filipino": 0.30113636363636365, + "Vietnamese,English,Chinese": 0.3352272727272727, + "Vietnamese,Spanish,Filipino": 0.32954545454545453, + "Vietnamese,Spanish,Chinese": 0.35795454545454547, + "Vietnamese,Filipino,Chinese": 0.26704545454545453, + "Indonesian,Malay,English": 0.42613636363636365, + "Indonesian,Malay,Spanish": 0.45454545454545453, + "Indonesian,Malay,Filipino": 0.4090909090909091, + "Indonesian,Malay,Chinese": 0.38636363636363635, + "Indonesian,English,Spanish": 0.4715909090909091, + "Indonesian,English,Filipino": 0.3352272727272727, + "Indonesian,English,Chinese": 0.32954545454545453, + "Indonesian,Spanish,Filipino": 0.3693181818181818, + "Indonesian,Spanish,Chinese": 0.3522727272727273, + "Indonesian,Filipino,Chinese": 0.30113636363636365, + "Malay,English,Spanish": 0.4431818181818182, + "Malay,English,Filipino": 0.32386363636363635, + "Malay,English,Chinese": 0.35795454545454547, + "Malay,Spanish,Filipino": 0.3465909090909091, + "Malay,Spanish,Chinese": 0.36363636363636365, + "Malay,Filipino,Chinese": 0.3068181818181818, + "English,Spanish,Filipino": 0.3352272727272727, + "English,Spanish,Chinese": 0.42045454545454547, + "English,Filipino,Chinese": 0.26704545454545453, + "Spanish,Filipino,Chinese": 0.2784090909090909 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.29545454545454547, + "Vietnamese,Indonesian,Malay,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,Chinese": 0.2556818181818182, + "Vietnamese,Indonesian,English,Spanish": 0.3125, + "Vietnamese,Indonesian,English,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,English,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.21022727272727273, + "Vietnamese,Malay,English,Spanish": 0.3125, + "Vietnamese,Malay,English,Filipino": 0.2215909090909091, + "Vietnamese,Malay,English,Chinese": 0.23863636363636365, + "Vietnamese,Malay,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,Malay,Spanish,Chinese": 0.2556818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.21022727272727273, + "Vietnamese,English,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,English,Spanish,Chinese": 0.29545454545454547, + "Vietnamese,English,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Spanish,Filipino,Chinese": 0.19886363636363635, + "Indonesian,Malay,English,Spanish": 0.3693181818181818, + "Indonesian,Malay,English,Filipino": 0.2840909090909091, + "Indonesian,Malay,English,Chinese": 0.2784090909090909, + "Indonesian,Malay,Spanish,Filipino": 0.2897727272727273, + "Indonesian,Malay,Spanish,Chinese": 0.29545454545454547, + "Indonesian,Malay,Filipino,Chinese": 0.2556818181818182, + "Indonesian,English,Spanish,Filipino": 0.26136363636363635, + "Indonesian,English,Spanish,Chinese": 0.2897727272727273, + "Indonesian,English,Filipino,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Filipino,Chinese": 0.2215909090909091, + "Malay,English,Spanish,Filipino": 0.26136363636363635, + "Malay,English,Spanish,Chinese": 0.3125, + "Malay,English,Filipino,Chinese": 0.2159090909090909, + "Malay,Spanish,Filipino,Chinese": 0.22727272727272727, + "English,Spanish,Filipino,Chinese": 0.2215909090909091 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish,Filipino": 0.1875, + "Vietnamese,Malay,English,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,Malay,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.1590909090909091, + "Indonesian,Malay,English,Spanish,Filipino": 0.22727272727272727, + "Indonesian,Malay,English,Spanish,Chinese": 0.25, + "Indonesian,Malay,English,Filipino,Chinese": 0.18181818181818182, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.19318181818181818, + "Indonesian,English,Spanish,Filipino,Chinese": 0.17045454545454544, + "Malay,English,Spanish,Filipino,Chinese": 0.1875 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.1875, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.13636363636363635, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11931818181818182 + } + }, + "AC3_2": 0.4827992306698789, + "AC3_3": 0.39319136740660315, + "AC3_4": 0.3245162814325628, + "AC3_5": 0.2694708027758065, + "AC3_6": 0.22458946008853808, + "AC3_7": 0.18757443426568735 + }, + "prompt_3": { + "overall_acc": 0.42775974025974023, + "language_acc": { + "Vietnamese": 0.3977272727272727, + "Indonesian": 0.4318181818181818, + "Malay": 0.39204545454545453, + "English": 0.5056818181818182, + "Spanish": 0.48295454545454547, + "Filipino": 0.32386363636363635, + "Chinese": 0.4602272727272727 + }, + "consistency_score_2": 0.5073051948051948, + "consistency_score_3": 0.32094155844155847, + "consistency_score_4": 0.21964285714285714, + "consistency_score_5": 0.15584415584415584, + "consistency_score_6": 0.11363636363636366, + "consistency_score_7": 0.08522727272727272, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Malay": 0.5056818181818182, + "Vietnamese,English": 0.4715909090909091, + "Vietnamese,Spanish": 0.5454545454545454, + "Vietnamese,Filipino": 0.4318181818181818, + "Vietnamese,Chinese": 0.5, + "Indonesian,Malay": 0.625, + "Indonesian,English": 0.5284090909090909, + "Indonesian,Spanish": 0.5965909090909091, + "Indonesian,Filipino": 0.5284090909090909, + "Indonesian,Chinese": 0.4375, + "Malay,English": 0.5227272727272727, + "Malay,Spanish": 0.5625, + "Malay,Filipino": 0.5227272727272727, + "Malay,Chinese": 0.4943181818181818, + "English,Spanish": 0.6193181818181818, + "English,Filipino": 0.4034090909090909, + "English,Chinese": 0.4772727272727273, + "Spanish,Filipino": 0.4715909090909091, + "Spanish,Chinese": 0.4943181818181818, + "Filipino,Chinese": 0.35795454545454547 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,English": 0.32386363636363635, + "Vietnamese,Indonesian,Spanish": 0.38636363636363635, + "Vietnamese,Indonesian,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Chinese": 0.3181818181818182, + "Vietnamese,Malay,English": 0.3125, + "Vietnamese,Malay,Spanish": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.2897727272727273, + "Vietnamese,Malay,Chinese": 0.3125, + "Vietnamese,English,Spanish": 0.36363636363636365, + "Vietnamese,English,Filipino": 0.23863636363636365, + "Vietnamese,English,Chinese": 0.30113636363636365, + "Vietnamese,Spanish,Filipino": 0.2784090909090909, + "Vietnamese,Spanish,Chinese": 0.3409090909090909, + "Vietnamese,Filipino,Chinese": 0.22727272727272727, + "Indonesian,Malay,English": 0.3806818181818182, + "Indonesian,Malay,Spanish": 0.42613636363636365, + "Indonesian,Malay,Filipino": 0.38636363636363635, + "Indonesian,Malay,Chinese": 0.3352272727272727, + "Indonesian,English,Spanish": 0.4090909090909091, + "Indonesian,English,Filipino": 0.29545454545454547, + "Indonesian,English,Chinese": 0.2784090909090909, + "Indonesian,Spanish,Filipino": 0.3522727272727273, + "Indonesian,Spanish,Chinese": 0.3068181818181818, + "Indonesian,Filipino,Chinese": 0.25, + "Malay,English,Spanish": 0.4147727272727273, + "Malay,English,Filipino": 0.2897727272727273, + "Malay,English,Chinese": 0.30113636363636365, + "Malay,Spanish,Filipino": 0.3409090909090909, + "Malay,Spanish,Chinese": 0.32954545454545453, + "Malay,Filipino,Chinese": 0.2727272727272727, + "English,Spanish,Filipino": 0.30113636363636365, + "English,Spanish,Chinese": 0.35795454545454547, + "English,Filipino,Chinese": 0.2159090909090909, + "Spanish,Filipino,Chinese": 0.23863636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Malay,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,English,Spanish": 0.2556818181818182, + "Vietnamese,Indonesian,English,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,English,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Malay,English,Spanish": 0.2556818181818182, + "Vietnamese,Malay,English,Filipino": 0.18181818181818182, + "Vietnamese,Malay,English,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Spanish,Filipino": 0.2159090909090909, + "Vietnamese,Malay,Spanish,Chinese": 0.23863636363636365, + "Vietnamese,Malay,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,English,Spanish,Filipino": 0.1875, + "Vietnamese,English,Spanish,Chinese": 0.25, + "Vietnamese,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Filipino,Chinese": 0.16477272727272727, + "Indonesian,Malay,English,Spanish": 0.3181818181818182, + "Indonesian,Malay,English,Filipino": 0.22727272727272727, + "Indonesian,Malay,English,Chinese": 0.2215909090909091, + "Indonesian,Malay,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Malay,Spanish,Chinese": 0.25, + "Indonesian,Malay,Filipino,Chinese": 0.20454545454545456, + "Indonesian,English,Spanish,Filipino": 0.23295454545454544, + "Indonesian,English,Spanish,Chinese": 0.23863636363636365, + "Indonesian,English,Filipino,Chinese": 0.1590909090909091, + "Indonesian,Spanish,Filipino,Chinese": 0.18181818181818182, + "Malay,English,Spanish,Filipino": 0.23863636363636365, + "Malay,English,Spanish,Chinese": 0.26136363636363635, + "Malay,English,Filipino,Chinese": 0.17045454545454544, + "Malay,Spanish,Filipino,Chinese": 0.19318181818181818, + "English,Spanish,Filipino,Chinese": 0.1875 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.1875, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.14772727272727273, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,English,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Malay,English,Spanish,Chinese": 0.17613636363636365, + "Vietnamese,Malay,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.125, + "Indonesian,Malay,English,Spanish,Filipino": 0.19318181818181818, + "Indonesian,Malay,English,Spanish,Chinese": 0.19886363636363635, + "Indonesian,Malay,English,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.1534090909090909, + "Indonesian,English,Spanish,Filipino,Chinese": 0.13636363636363635, + "Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.125, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.10227272727272728, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11931818181818182 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.08522727272727272 + } + }, + "AC3_2": 0.46414902366969807, + "AC3_3": 0.36673070528211343, + "AC3_4": 0.29025021488567276, + "AC3_5": 0.22845582786035393, + "AC3_6": 0.17956930622279513, + "AC3_7": 0.1421353567041683 + }, + "prompt_4": { + "overall_acc": 0.4383116883116883, + "language_acc": { + "Vietnamese": 0.4375, + "Indonesian": 0.4318181818181818, + "Malay": 0.42613636363636365, + "English": 0.4772727272727273, + "Spanish": 0.48863636363636365, + "Filipino": 0.32954545454545453, + "Chinese": 0.4772727272727273 + }, + "consistency_score_2": 0.5238095238095238, + "consistency_score_3": 0.3392857142857143, + "consistency_score_4": 0.24204545454545448, + "consistency_score_5": 0.18317099567099565, + "consistency_score_6": 0.1452922077922078, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5284090909090909, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,English": 0.45454545454545453, + "Vietnamese,Spanish": 0.5284090909090909, + "Vietnamese,Filipino": 0.5056818181818182, + "Vietnamese,Chinese": 0.44886363636363635, + "Indonesian,Malay": 0.6363636363636364, + "Indonesian,English": 0.4659090909090909, + "Indonesian,Spanish": 0.5795454545454546, + "Indonesian,Filipino": 0.5056818181818182, + "Indonesian,Chinese": 0.5397727272727273, + "Malay,English": 0.5340909090909091, + "Malay,Spanish": 0.6193181818181818, + "Malay,Filipino": 0.5113636363636364, + "Malay,Chinese": 0.5056818181818182, + "English,Spanish": 0.6647727272727273, + "English,Filipino": 0.4772727272727273, + "English,Chinese": 0.5568181818181818, + "Spanish,Filipino": 0.45454545454545453, + "Spanish,Chinese": 0.5625, + "Filipino,Chinese": 0.4431818181818182 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.3693181818181818, + "Vietnamese,Indonesian,English": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish": 0.35795454545454547, + "Vietnamese,Indonesian,Filipino": 0.32386363636363635, + "Vietnamese,Indonesian,Chinese": 0.3068181818181818, + "Vietnamese,Malay,English": 0.2840909090909091, + "Vietnamese,Malay,Spanish": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.30113636363636365, + "Vietnamese,Malay,Chinese": 0.2784090909090909, + "Vietnamese,English,Spanish": 0.3693181818181818, + "Vietnamese,English,Filipino": 0.2784090909090909, + "Vietnamese,English,Chinese": 0.2897727272727273, + "Vietnamese,Spanish,Filipino": 0.30113636363636365, + "Vietnamese,Spanish,Chinese": 0.3352272727272727, + "Vietnamese,Filipino,Chinese": 0.2556818181818182, + "Indonesian,Malay,English": 0.35795454545454547, + "Indonesian,Malay,Spanish": 0.4431818181818182, + "Indonesian,Malay,Filipino": 0.3806818181818182, + "Indonesian,Malay,Chinese": 0.3977272727272727, + "Indonesian,English,Spanish": 0.3806818181818182, + "Indonesian,English,Filipino": 0.3125, + "Indonesian,English,Chinese": 0.3465909090909091, + "Indonesian,Spanish,Filipino": 0.3181818181818182, + "Indonesian,Spanish,Chinese": 0.39204545454545453, + "Indonesian,Filipino,Chinese": 0.3125, + "Malay,English,Spanish": 0.4431818181818182, + "Malay,English,Filipino": 0.32386363636363635, + "Malay,English,Chinese": 0.3522727272727273, + "Malay,Spanish,Filipino": 0.3409090909090909, + "Malay,Spanish,Chinese": 0.3977272727272727, + "Malay,Filipino,Chinese": 0.3068181818181818, + "English,Spanish,Filipino": 0.35795454545454547, + "English,Spanish,Chinese": 0.42613636363636365, + "English,Filipino,Chinese": 0.3068181818181818, + "Spanish,Filipino,Chinese": 0.30113636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.25, + "Vietnamese,Indonesian,Malay,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,English,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,English,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,English,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2556818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Malay,English,Spanish": 0.2556818181818182, + "Vietnamese,Malay,English,Filipino": 0.1875, + "Vietnamese,Malay,English,Chinese": 0.1875, + "Vietnamese,Malay,Spanish,Filipino": 0.22727272727272727, + "Vietnamese,Malay,Spanish,Chinese": 0.25, + "Vietnamese,Malay,Filipino,Chinese": 0.1875, + "Vietnamese,English,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,English,Spanish,Chinese": 0.25, + "Vietnamese,English,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Spanish,Filipino,Chinese": 0.21022727272727273, + "Indonesian,Malay,English,Spanish": 0.30113636363636365, + "Indonesian,Malay,English,Filipino": 0.26136363636363635, + "Indonesian,Malay,English,Chinese": 0.2784090909090909, + "Indonesian,Malay,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Malay,Spanish,Chinese": 0.32954545454545453, + "Indonesian,Malay,Filipino,Chinese": 0.2727272727272727, + "Indonesian,English,Spanish,Filipino": 0.23863636363636365, + "Indonesian,English,Spanish,Chinese": 0.29545454545454547, + "Indonesian,English,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.23863636363636365, + "Malay,English,Spanish,Filipino": 0.26704545454545453, + "Malay,English,Spanish,Chinese": 0.3068181818181818, + "Malay,English,Filipino,Chinese": 0.23863636363636365, + "Malay,Spanish,Filipino,Chinese": 0.23863636363636365, + "English,Spanish,Filipino,Chinese": 0.24431818181818182 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.1875, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish,Filipino": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish,Chinese": 0.18181818181818182, + "Vietnamese,Malay,English,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.16477272727272727, + "Indonesian,Malay,English,Spanish,Filipino": 0.21022727272727273, + "Indonesian,Malay,English,Spanish,Chinese": 0.24431818181818182, + "Indonesian,Malay,English,Filipino,Chinese": 0.2159090909090909, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.2215909090909091, + "Indonesian,English,Spanish,Filipino,Chinese": 0.19318181818181818, + "Malay,English,Spanish,Filipino,Chinese": 0.19886363636363635 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.125, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.18181818181818182 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11931818181818182 + } + }, + "AC3_2": 0.47726177080046717, + "AC3_3": 0.38249328954222156, + "AC3_4": 0.31186959063549097, + "AC3_5": 0.2583691884538671, + "AC3_6": 0.2182414247408861, + "AC3_7": 0.18757443426568735 + }, + "prompt_5": { + "overall_acc": 0.4342532467532467, + "language_acc": { + "Vietnamese": 0.4375, + "Indonesian": 0.3977272727272727, + "Malay": 0.4147727272727273, + "English": 0.5284090909090909, + "Spanish": 0.45454545454545453, + "Filipino": 0.375, + "Chinese": 0.4318181818181818 + }, + "consistency_score_2": 0.5127164502164502, + "consistency_score_3": 0.3230519480519481, + "consistency_score_4": 0.226948051948052, + "consistency_score_5": 0.1718073593073593, + "consistency_score_6": 0.13879870129870128, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5625, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,English": 0.48295454545454547, + "Vietnamese,Spanish": 0.5056818181818182, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,Chinese": 0.4659090909090909, + "Indonesian,Malay": 0.6477272727272727, + "Indonesian,English": 0.48295454545454547, + "Indonesian,Spanish": 0.5170454545454546, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Chinese": 0.4943181818181818, + "Malay,English": 0.48863636363636365, + "Malay,Spanish": 0.5568181818181818, + "Malay,Filipino": 0.5056818181818182, + "Malay,Chinese": 0.48295454545454547, + "English,Spanish": 0.6647727272727273, + "English,Filipino": 0.45454545454545453, + "English,Chinese": 0.5397727272727273, + "Spanish,Filipino": 0.4375, + "Spanish,Chinese": 0.5397727272727273, + "Filipino,Chinese": 0.42045454545454547 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Indonesian,English": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino": 0.32386363636363635, + "Vietnamese,Indonesian,Chinese": 0.32954545454545453, + "Vietnamese,Malay,English": 0.29545454545454547, + "Vietnamese,Malay,Spanish": 0.3409090909090909, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,Chinese": 0.2784090909090909, + "Vietnamese,English,Spanish": 0.3806818181818182, + "Vietnamese,English,Filipino": 0.2897727272727273, + "Vietnamese,English,Chinese": 0.30113636363636365, + "Vietnamese,Spanish,Filipino": 0.2784090909090909, + "Vietnamese,Spanish,Chinese": 0.3181818181818182, + "Vietnamese,Filipino,Chinese": 0.2556818181818182, + "Indonesian,Malay,English": 0.3465909090909091, + "Indonesian,Malay,Spanish": 0.38636363636363635, + "Indonesian,Malay,Filipino": 0.35795454545454547, + "Indonesian,Malay,Chinese": 0.36363636363636365, + "Indonesian,English,Spanish": 0.375, + "Indonesian,English,Filipino": 0.30113636363636365, + "Indonesian,English,Chinese": 0.3068181818181818, + "Indonesian,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Spanish,Chinese": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Malay,English,Spanish": 0.3977272727272727, + "Malay,English,Filipino": 0.2784090909090909, + "Malay,English,Chinese": 0.3068181818181818, + "Malay,Spanish,Filipino": 0.30113636363636365, + "Malay,Spanish,Chinese": 0.3465909090909091, + "Malay,Filipino,Chinese": 0.26704545454545453, + "English,Spanish,Filipino": 0.3352272727272727, + "English,Spanish,Chinese": 0.4090909090909091, + "English,Filipino,Chinese": 0.2727272727272727, + "Spanish,Filipino,Chinese": 0.2727272727272727 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.25, + "Vietnamese,Indonesian,Malay,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,Chinese": 0.25, + "Vietnamese,Indonesian,English,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,English,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,English,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Malay,English,Spanish": 0.2556818181818182, + "Vietnamese,Malay,English,Filipino": 0.19318181818181818, + "Vietnamese,Malay,English,Chinese": 0.1875, + "Vietnamese,Malay,Spanish,Filipino": 0.21022727272727273, + "Vietnamese,Malay,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Malay,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,English,Spanish,Filipino": 0.22727272727272727, + "Vietnamese,English,Spanish,Chinese": 0.26136363636363635, + "Vietnamese,English,Filipino,Chinese": 0.1875, + "Vietnamese,Spanish,Filipino,Chinese": 0.1875, + "Indonesian,Malay,English,Spanish": 0.2897727272727273, + "Indonesian,Malay,English,Filipino": 0.2159090909090909, + "Indonesian,Malay,English,Chinese": 0.23295454545454544, + "Indonesian,Malay,Spanish,Filipino": 0.22727272727272727, + "Indonesian,Malay,Spanish,Chinese": 0.25, + "Indonesian,Malay,Filipino,Chinese": 0.2159090909090909, + "Indonesian,English,Spanish,Filipino": 0.2215909090909091, + "Indonesian,English,Spanish,Chinese": 0.2556818181818182, + "Indonesian,English,Filipino,Chinese": 0.19886363636363635, + "Indonesian,Spanish,Filipino,Chinese": 0.19318181818181818, + "Malay,English,Spanish,Filipino": 0.23863636363636365, + "Malay,English,Spanish,Chinese": 0.2556818181818182, + "Malay,English,Filipino,Chinese": 0.19318181818181818, + "Malay,Spanish,Filipino,Chinese": 0.19318181818181818, + "English,Spanish,Filipino,Chinese": 0.2215909090909091 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Malay,English,Spanish,Filipino": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.16477272727272727, + "Indonesian,Malay,English,Spanish,Filipino": 0.18181818181818182, + "Indonesian,Malay,English,Spanish,Chinese": 0.19318181818181818, + "Indonesian,Malay,English,Filipino,Chinese": 0.1590909090909091, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.1534090909090909, + "Indonesian,English,Spanish,Filipino,Chinese": 0.16477272727272727, + "Malay,English,Spanish,Filipino,Chinese": 0.16477272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.125, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.13636363636363635 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11931818181818182 + } + }, + "AC3_2": 0.47023423000600184, + "AC3_3": 0.3704883005036873, + "AC3_4": 0.29810264614251086, + "AC3_5": 0.24620608182394002, + "AC3_6": 0.21036063790891493, + "AC3_7": 0.18720007994485555 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5436893203883495 + }, + "prompt_2": { + "accuracy": 0.39805825242718446 + }, + "prompt_3": { + "accuracy": 0.47572815533980584 + }, + "prompt_4": { + "accuracy": 0.3786407766990291 + }, + "prompt_5": { + "accuracy": 0.5825242718446602 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4 + }, + "prompt_2": { + "accuracy": 0.37142857142857144 + }, + "prompt_3": { + "accuracy": 0.3523809523809524 + }, + "prompt_4": { + "accuracy": 0.3523809523809524 + }, + "prompt_5": { + "accuracy": 0.3333333333333333 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6448598130841121 + }, + "prompt_2": { + "accuracy": 0.45794392523364486 + }, + "prompt_3": { + "accuracy": 0.3925233644859813 + }, + "prompt_4": { + "accuracy": 0.3925233644859813 + }, + "prompt_5": { + "accuracy": 0.6635514018691588 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.3, + "history": 0.4, + "literature": 0.4, + "politics": 0.7, + "culture": 0.7, + "film": 0.5, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.7, + "demographics": 0.0, + "biology": 0.2, + "history": 0.4, + "literature": 0.4, + "politics": 0.6, + "culture": 0.3, + "film": 0.4, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.7, + "demographics": 0.0, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.8, + "culture": 0.4, + "film": 0.5, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.6, + "culture": 0.5, + "film": 0.5, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.47, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.3, + "history": 0.4, + "literature": 0.4, + "politics": 0.7, + "culture": 0.7, + "film": 0.5, + "law": 0.3, + "geography": 0.7 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2564603954817348 + }, + "prompt_2": { + "bleu_score": 0.20158609704888023 + }, + "prompt_3": { + "bleu_score": 0.2049108421332479 + }, + "prompt_4": { + "bleu_score": 0.2024575112956784 + }, + "prompt_5": { + "bleu_score": 0.07405076289541718 + } }, "indommlu": { "prompt_1": -1, @@ -10538,244 +90875,1821 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09409563912961216 + }, + "prompt_2": { + "bleu_score": 0.08712572583981154 + }, + "prompt_3": { + "bleu_score": 0.10277302624821422 + }, + "prompt_4": { + "bleu_score": 0.13274001693855825 + }, + "prompt_5": { + "bleu_score": 0.22253903136873882 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0721103343022267 + }, + "prompt_2": { + "bleu_score": 0.07669846743707878 + }, + "prompt_3": { + "bleu_score": 0.08168813666377463 + }, + "prompt_4": { + "bleu_score": 0.10069594874403878 + }, + "prompt_5": { + "bleu_score": 0.12366793959625925 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06432487966861467 + }, + "prompt_2": { + "bleu_score": 0.06944165933317413 + }, + "prompt_3": { + "bleu_score": 0.07136427083190557 + }, + "prompt_4": { + "bleu_score": 0.09066382620444256 + }, + "prompt_5": { + "bleu_score": 0.12905119203589832 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.09423984210327586 + }, + "prompt_2": { + "bleu_score": 0.08826596027590301 + }, + "prompt_3": { + "bleu_score": 0.10504262743430895 + }, + "prompt_4": { + "bleu_score": 0.12388548361953901 + }, + "prompt_5": { + "bleu_score": 0.21384755856116386 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5787631271878646 + }, + "prompt_2": { + "accuracy": 0.5344224037339557 + }, + "prompt_3": { + "accuracy": 0.5507584597432905 + }, + "prompt_4": { + "accuracy": 0.6079346557759626 + }, + "prompt_5": { + "accuracy": 0.6196032672112018 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5683947086163746, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.5252525252525253, + "clinical_knowledge": 0.5946969696969697, + "medical_genetics": 0.5858585858585859, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.711864406779661, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.5527426160337553, + "econometrics": 0.4247787610619469, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7022653721682848, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.4092526690391459, + "philosophy": 0.6290322580645161, + "professional_medicine": 0.7011070110701108, + "nutrition": 0.7311475409836066, + "global_facts": 0.36363636363636365, + "machine_learning": 0.43243243243243246, + "security_studies": 0.5778688524590164, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.5793780687397708, + "prehistory": 0.6749226006191951, + "anatomy": 0.5746268656716418, + "human_sexuality": 0.7230769230769231, + "college_medicine": 0.5755813953488372, + "high_school_government_and_politics": 0.7395833333333334, + "college_chemistry": 0.4444444444444444, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.751269035532995, + "elementary_mathematics": 0.4270557029177719, + "human_aging": 0.6396396396396397, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.7352941176470589, + "formal_logic": 0.392, + "high_school_statistics": 0.4325581395348837, + "international_law": 0.725, + "high_school_mathematics": 0.29739776951672864, + "high_school_computer_science": 0.5858585858585859, + "conceptual_physics": 0.5213675213675214, + "miscellaneous": 0.782608695652174, + "high_school_chemistry": 0.47029702970297027, + "marketing": 0.7811158798283262, + "professional_law": 0.4390084801043705, + "management": 0.6862745098039216, + "college_physics": 0.4158415841584158, + "jurisprudence": 0.6261682242990654, + "world_religions": 0.8058823529411765, + "sociology": 0.77, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.5218508997429306, + "computer_security": 0.7373737373737373, + "moral_scenarios": 0.24608501118568232, + "moral_disputes": 0.6695652173913044, + "electrical_engineering": 0.5694444444444444, + "astronomy": 0.6158940397350994, + "college_biology": 0.7062937062937062 + } + }, + "prompt_2": { + "accuracy": 0.4933142652842331, + "category_acc": { + "high_school_european_history": 0.7378048780487805, + "business_ethics": 0.5252525252525253, + "clinical_knowledge": 0.5606060606060606, + "medical_genetics": 0.5656565656565656, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.7288135593220338, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.5907172995780591, + "econometrics": 0.4247787610619469, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.45907473309608543, + "philosophy": 0.5483870967741935, + "professional_medicine": 0.6346863468634686, + "nutrition": 0.6098360655737705, + "global_facts": 0.29292929292929293, + "machine_learning": 0.44144144144144143, + "security_studies": 0.6229508196721312, + "public_relations": 0.43119266055045874, + "professional_psychology": 0.513911620294599, + "prehistory": 0.544891640866873, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.5461538461538461, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.5989583333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6040609137055838, + "elementary_mathematics": 0.3952254641909814, + "human_aging": 0.5045045045045045, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.5716911764705882, + "formal_logic": 0.392, + "high_school_statistics": 0.4604651162790698, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.31970260223048325, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.42710997442455245, + "high_school_chemistry": 0.3712871287128713, + "marketing": 0.8197424892703863, + "professional_law": 0.42204827136333983, + "management": 0.5392156862745098, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.4588235294117647, + "sociology": 0.7, + "us_foreign_policy": 0.5454545454545454, + "high_school_macroeconomics": 0.5115681233933161, + "computer_security": 0.6363636363636364, + "moral_scenarios": 0.22818791946308725, + "moral_disputes": 0.6173913043478261, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.5364238410596026, + "college_biology": 0.4825174825174825 + } + }, + "prompt_3": { + "accuracy": 0.5530210940293171, + "category_acc": { + "high_school_european_history": 0.725609756097561, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.625, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7783251231527094, + "high_school_physics": 0.32, + "high_school_world_history": 0.7330508474576272, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.5864978902953587, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.5050505050505051, + "high_school_biology": 0.6731391585760518, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.44483985765124556, + "philosophy": 0.6290322580645161, + "professional_medicine": 0.6826568265682657, + "nutrition": 0.6852459016393443, + "global_facts": 0.43434343434343436, + "machine_learning": 0.4594594594594595, + "security_studies": 0.6311475409836066, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.5695581014729951, + "prehistory": 0.631578947368421, + "anatomy": 0.5970149253731343, + "human_sexuality": 0.676923076923077, + "college_medicine": 0.5755813953488372, + "high_school_government_and_politics": 0.71875, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.6481481481481481, + "high_school_geography": 0.7157360406091371, + "elementary_mathematics": 0.38992042440318303, + "human_aging": 0.545045045045045, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.6672794117647058, + "formal_logic": 0.4, + "high_school_statistics": 0.4418604651162791, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.5757575757575758, + "conceptual_physics": 0.5, + "miscellaneous": 0.7122762148337596, + "high_school_chemistry": 0.44554455445544555, + "marketing": 0.8111587982832618, + "professional_law": 0.43770384866275275, + "management": 0.6862745098039216, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.788235294117647, + "sociology": 0.675, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.5398457583547558, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.2360178970917226, + "moral_disputes": 0.591304347826087, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.5960264900662252, + "college_biology": 0.6783216783216783 + } + }, + "prompt_4": { + "accuracy": 0.5917053986414015, + "category_acc": { + "high_school_european_history": 0.7317073170731707, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.6401515151515151, + "medical_genetics": 0.6767676767676768, + "high_school_us_history": 0.7487684729064039, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.7245762711864406, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.6244725738396625, + "econometrics": 0.45132743362831856, + "college_computer_science": 0.5151515151515151, + "high_school_biology": 0.7475728155339806, + "abstract_algebra": 0.36363636363636365, + "professional_accounting": 0.46619217081850534, + "philosophy": 0.6935483870967742, + "professional_medicine": 0.7084870848708487, + "nutrition": 0.7245901639344262, + "global_facts": 0.3939393939393939, + "machine_learning": 0.44144144144144143, + "security_studies": 0.680327868852459, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.6317512274959084, + "prehistory": 0.6965944272445821, + "anatomy": 0.6119402985074627, + "human_sexuality": 0.7461538461538462, + "college_medicine": 0.627906976744186, + "high_school_government_and_politics": 0.828125, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.7469135802469136, + "high_school_geography": 0.766497461928934, + "elementary_mathematics": 0.40318302387267907, + "human_aging": 0.6486486486486487, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.7683823529411765, + "formal_logic": 0.448, + "high_school_statistics": 0.4511627906976744, + "international_law": 0.7583333333333333, + "high_school_mathematics": 0.3382899628252788, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5170940170940171, + "miscellaneous": 0.7902813299232737, + "high_school_chemistry": 0.46534653465346537, + "marketing": 0.8454935622317596, + "professional_law": 0.4370515329419439, + "management": 0.7450980392156863, + "college_physics": 0.38613861386138615, + "jurisprudence": 0.7476635514018691, + "world_religions": 0.8058823529411765, + "sociology": 0.795, + "us_foreign_policy": 0.8282828282828283, + "high_school_macroeconomics": 0.5758354755784062, + "computer_security": 0.7171717171717171, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.6405797101449275, + "electrical_engineering": 0.5833333333333334, + "astronomy": 0.6026490066225165, + "college_biology": 0.7202797202797203 + } + }, + "prompt_5": { + "accuracy": 0.5921344297461566, + "category_acc": { + "high_school_european_history": 0.7317073170731707, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.6553030303030303, + "medical_genetics": 0.6767676767676768, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.7203389830508474, + "virology": 0.4909090909090909, + "high_school_microeconomics": 0.6329113924050633, + "econometrics": 0.4336283185840708, + "college_computer_science": 0.5252525252525253, + "high_school_biology": 0.7184466019417476, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.48398576512455516, + "philosophy": 0.6870967741935484, + "professional_medicine": 0.6863468634686347, + "nutrition": 0.7147540983606557, + "global_facts": 0.40404040404040403, + "machine_learning": 0.45045045045045046, + "security_studies": 0.6844262295081968, + "public_relations": 0.6422018348623854, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.7213622291021672, + "anatomy": 0.5522388059701493, + "human_sexuality": 0.7769230769230769, + "college_medicine": 0.5930232558139535, + "high_school_government_and_politics": 0.8385416666666666, + "college_chemistry": 0.45454545454545453, + "logical_fallacies": 0.7160493827160493, + "high_school_geography": 0.7715736040609137, + "elementary_mathematics": 0.38992042440318303, + "human_aging": 0.6666666666666666, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.7849264705882353, + "formal_logic": 0.424, + "high_school_statistics": 0.46511627906976744, + "international_law": 0.7583333333333333, + "high_school_mathematics": 0.32342007434944237, + "high_school_computer_science": 0.5757575757575758, + "conceptual_physics": 0.5085470085470085, + "miscellaneous": 0.789002557544757, + "high_school_chemistry": 0.5, + "marketing": 0.8755364806866953, + "professional_law": 0.4324853228962818, + "management": 0.7647058823529411, + "college_physics": 0.40594059405940597, + "jurisprudence": 0.7570093457943925, + "world_religions": 0.8176470588235294, + "sociology": 0.805, + "us_foreign_policy": 0.8484848484848485, + "high_school_macroeconomics": 0.5886889460154242, + "computer_security": 0.7171717171717171, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.6956521739130435, + "electrical_engineering": 0.5763888888888888, + "astronomy": 0.6158940397350994, + "college_biology": 0.6713286713286714 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4145616641901932 + }, + "prompt_2": { + "accuracy": 0.38781575037147104 + }, + "prompt_3": { + "accuracy": 0.3922734026745914 + }, + "prompt_4": { + "accuracy": 0.4086181277860327 + }, + "prompt_5": { + "accuracy": 0.35438335809806837 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40597758405977585, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.5, + "computer_architecture": 0.5, + "college_programming": 0.47619047619047616, + "college_physics": 0.25, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.5238095238095238, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.13043478260869565, + "high_school_physics": 0.5, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.55, + "business_administration": 0.34210526315789475, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.2222222222222222, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.4642857142857143, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.6, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.4423076923076923, + "sports_science": 0.3333333333333333, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.37037037037037035, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.25925925925925924, + "physician": 0.37037037037037035 + } + }, + "prompt_2": { + "accuracy": 0.38418430884184307, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.6153846153846154, + "college_programming": 0.5, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.4666666666666667, + "business_administration": 0.5263157894736842, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.4489795918367347, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4074074074074074, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.64, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.5, + "sports_science": 0.16666666666666666, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.4074074074074074, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.25925925925925924, + "physician": 0.3333333333333333 + } + }, + "prompt_3": { + "accuracy": 0.3866749688667497, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.5, + "computer_architecture": 0.6153846153846154, + "college_programming": 0.40476190476190477, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.43333333333333335, + "business_administration": 0.5, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.5, + "high_school_geography": 0.5, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4444444444444444, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.25, + "high_school_history": 0.6, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.40384615384615385, + "sports_science": 0.16666666666666666, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.5, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.5, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.2222222222222222, + "physician": 0.3148148148148148 + } + }, + "prompt_4": { + "accuracy": 0.41594022415940224, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.625, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.4583333333333333, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.48333333333333334, + "business_administration": 0.42105263157894735, + "marxism": 0.375, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.5, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.5185185185185185, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.4642857142857143, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.56, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.38461538461538464, + "sports_science": 0.2916666666666667, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.5, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.42592592592592593, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.3148148148148148, + "physician": 0.3888888888888889 + } + }, + "prompt_5": { + "accuracy": 0.36612702366127026, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.375, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.40476190476190477, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.125, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.25, + "college_economics": 0.4666666666666667, + "business_administration": 0.34210526315789475, + "marxism": 0.375, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.4489795918367347, + "high_school_politics": 0.5, + "high_school_geography": 0.375, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.4444444444444444, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.25, + "high_school_history": 0.68, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.28846153846153844, + "sports_science": 0.25, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.4074074074074074, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.5, + "tax_accountant": 0.25925925925925924, + "physician": 0.3148148148148148 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3655913978494624 + }, + "prompt_2": { + "accuracy": 0.3835125448028674 + }, + "prompt_3": { + "accuracy": 0.3870967741935484 + }, + "prompt_4": { + "accuracy": 0.4121863799283154 + }, + "prompt_5": { + "accuracy": 0.4050179211469534 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39846313244690035, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.23780487804878048, + "arts": 0.4125, + "astronomy": 0.2909090909090909, + "business_ethics": 0.430622009569378, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.5267175572519084, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.4766355140186916, + "chinese_history": 0.43962848297213625, + "chinese_literature": 0.27941176470588236, + "chinese_teacher_qualification": 0.4581005586592179, + "clinical_knowledge": 0.31223628691983124, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.4205607476635514, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.3333333333333333, + "college_mathematics": 0.3523809523809524, + "college_medical_statistics": 0.4528301886792453, + "college_medicine": 0.34798534798534797, + "computer_science": 0.4852941176470588, + "computer_security": 0.5555555555555556, + "conceptual_physics": 0.3877551020408163, + "construction_project_management": 0.38848920863309355, + "economics": 0.4528301886792453, + "education": 0.37423312883435583, + "electrical_engineering": 0.4069767441860465, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.3838383838383838, + "elementary_information_and_technology": 0.6386554621848739, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.3851851851851852, + "food_science": 0.46153846153846156, + "genetics": 0.3522727272727273, + "global_facts": 0.40939597315436244, + "high_school_biology": 0.3431952662721893, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.3888888888888889, + "international_law": 0.34594594594594597, + "journalism": 0.4476744186046512, + "jurisprudence": 0.43552311435523117, + "legal_and_moral_basis": 0.6495327102803738, + "logical": 0.4878048780487805, + "machine_learning": 0.4344262295081967, + "management": 0.45714285714285713, + "marketing": 0.5222222222222223, + "marxist_theory": 0.5238095238095238, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.41379310344827586, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.38285714285714284, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.32978723404255317, + "professional_psychology": 0.39655172413793105, + "public_relations": 0.4482758620689655, + "security_study": 0.4, + "sociology": 0.45132743362831856, + "sports_science": 0.43636363636363634, + "traditional_chinese_medicine": 0.31351351351351353, + "virology": 0.40828402366863903, + "world_history": 0.42857142857142855, + "world_religions": 0.41875 + } + }, + "prompt_2": { + "accuracy": 0.3874115006043861, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.31756756756756754, + "ancient_chinese": 0.1951219512195122, + "arts": 0.4125, + "astronomy": 0.2727272727272727, + "business_ethics": 0.430622009569378, + "chinese_civil_service_exam": 0.3875, + "chinese_driving_rule": 0.48091603053435117, + "chinese_food_culture": 0.35294117647058826, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.4117647058823529, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.441340782122905, + "clinical_knowledge": 0.350210970464135, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.4672897196261682, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.3333333333333333, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.2967032967032967, + "computer_science": 0.4411764705882353, + "computer_security": 0.5672514619883041, + "conceptual_physics": 0.3469387755102041, + "construction_project_management": 0.2805755395683453, + "economics": 0.4528301886792453, + "education": 0.44785276073619634, + "electrical_engineering": 0.4186046511627907, + "elementary_chinese": 0.27380952380952384, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.5168067226890757, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.34074074074074073, + "food_science": 0.3916083916083916, + "genetics": 0.3068181818181818, + "global_facts": 0.4563758389261745, + "high_school_biology": 0.34911242603550297, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.29878048780487804, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.35714285714285715, + "international_law": 0.31891891891891894, + "journalism": 0.48255813953488375, + "jurisprudence": 0.46472019464720193, + "legal_and_moral_basis": 0.677570093457944, + "logical": 0.3821138211382114, + "machine_learning": 0.4344262295081967, + "management": 0.4380952380952381, + "marketing": 0.5, + "marxist_theory": 0.49206349206349204, + "modern_chinese": 0.3620689655172414, + "nutrition": 0.3793103448275862, + "philosophy": 0.3523809523809524, + "professional_accounting": 0.4114285714285714, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.3191489361702128, + "professional_psychology": 0.3706896551724138, + "public_relations": 0.42528735632183906, + "security_study": 0.37037037037037035, + "sociology": 0.4424778761061947, + "sports_science": 0.36363636363636365, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.42011834319526625, + "world_history": 0.4472049689440994, + "world_religions": 0.3875 + } + }, + "prompt_3": { + "accuracy": 0.3853393196339147, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.21341463414634146, + "arts": 0.3875, + "astronomy": 0.2606060606060606, + "business_ethics": 0.4019138755980861, + "chinese_civil_service_exam": 0.3875, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.42990654205607476, + "chinese_history": 0.4086687306501548, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.4581005586592179, + "clinical_knowledge": 0.37130801687763715, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.3425925925925926, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.336996336996337, + "computer_science": 0.47549019607843135, + "computer_security": 0.5672514619883041, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.2733812949640288, + "economics": 0.41509433962264153, + "education": 0.44171779141104295, + "electrical_engineering": 0.4127906976744186, + "elementary_chinese": 0.32142857142857145, + "elementary_commonsense": 0.3686868686868687, + "elementary_information_and_technology": 0.5, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.32592592592592595, + "food_science": 0.40559440559440557, + "genetics": 0.3181818181818182, + "global_facts": 0.4228187919463087, + "high_school_biology": 0.35502958579881655, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.3644067796610169, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.34545454545454546, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.3968253968253968, + "international_law": 0.2972972972972973, + "journalism": 0.47093023255813954, + "jurisprudence": 0.44525547445255476, + "legal_and_moral_basis": 0.6448598130841121, + "logical": 0.36585365853658536, + "machine_learning": 0.45081967213114754, + "management": 0.41904761904761906, + "marketing": 0.48333333333333334, + "marxist_theory": 0.4603174603174603, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.3724137931034483, + "philosophy": 0.4380952380952381, + "professional_accounting": 0.42857142857142855, + "professional_law": 0.32701421800947866, + "professional_medicine": 0.31648936170212766, + "professional_psychology": 0.4051724137931034, + "public_relations": 0.41379310344827586, + "security_study": 0.4148148148148148, + "sociology": 0.4690265486725664, + "sports_science": 0.37575757575757573, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.4260355029585799, + "world_history": 0.4409937888198758, + "world_religions": 0.39375 + } + }, + "prompt_4": { + "accuracy": 0.39984458642721465, + "category_acc": { + "agronomy": 0.3431952662721893, + "anatomy": 0.3108108108108108, + "ancient_chinese": 0.20121951219512196, + "arts": 0.4, + "astronomy": 0.2787878787878788, + "business_ethics": 0.4258373205741627, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.48854961832061067, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.4485981308411215, + "chinese_history": 0.4241486068111455, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.4581005586592179, + "clinical_knowledge": 0.32489451476793246, + "college_actuarial_science": 0.32075471698113206, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.42452830188679247, + "college_law": 0.3148148148148148, + "college_mathematics": 0.34285714285714286, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.358974358974359, + "computer_science": 0.5196078431372549, + "computer_security": 0.5555555555555556, + "conceptual_physics": 0.3877551020408163, + "construction_project_management": 0.37410071942446044, + "economics": 0.48427672955974843, + "education": 0.4294478527607362, + "electrical_engineering": 0.4186046511627907, + "elementary_chinese": 0.3253968253968254, + "elementary_commonsense": 0.398989898989899, + "elementary_information_and_technology": 0.6428571428571429, + "elementary_mathematics": 0.3217391304347826, + "ethnology": 0.3851851851851852, + "food_science": 0.38461538461538464, + "genetics": 0.32954545454545453, + "global_facts": 0.44966442953020136, + "high_school_biology": 0.3254437869822485, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.3813559322033898, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.42857142857142855, + "international_law": 0.3837837837837838, + "journalism": 0.45348837209302323, + "jurisprudence": 0.4209245742092457, + "legal_and_moral_basis": 0.6495327102803738, + "logical": 0.44715447154471544, + "machine_learning": 0.4180327868852459, + "management": 0.49523809523809526, + "marketing": 0.5222222222222223, + "marxist_theory": 0.5185185185185185, + "modern_chinese": 0.31896551724137934, + "nutrition": 0.45517241379310347, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.44, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.2898936170212766, + "professional_psychology": 0.3706896551724138, + "public_relations": 0.4885057471264368, + "security_study": 0.4444444444444444, + "sociology": 0.42920353982300885, + "sports_science": 0.37575757575757573, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.4319526627218935, + "world_history": 0.39751552795031053, + "world_religions": 0.4125 + } + }, + "prompt_5": { + "accuracy": 0.36271801070626836, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.3581081081081081, + "ancient_chinese": 0.2682926829268293, + "arts": 0.31875, + "astronomy": 0.296969696969697, + "business_ethics": 0.40669856459330145, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.44274809160305345, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.4205607476635514, + "chinese_history": 0.39009287925696595, + "chinese_literature": 0.3088235294117647, + "chinese_teacher_qualification": 0.4134078212290503, + "clinical_knowledge": 0.2911392405063291, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.5233644859813084, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.34798534798534797, + "computer_science": 0.4362745098039216, + "computer_security": 0.4678362573099415, + "conceptual_physics": 0.3197278911564626, + "construction_project_management": 0.2517985611510791, + "economics": 0.37735849056603776, + "education": 0.37423312883435583, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.29365079365079366, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.47058823529411764, + "elementary_mathematics": 0.3217391304347826, + "ethnology": 0.34814814814814815, + "food_science": 0.4195804195804196, + "genetics": 0.32954545454545453, + "global_facts": 0.436241610738255, + "high_school_biology": 0.3254437869822485, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3706293706293706, + "human_sexuality": 0.3492063492063492, + "international_law": 0.34594594594594597, + "journalism": 0.4069767441860465, + "jurisprudence": 0.38929440389294406, + "legal_and_moral_basis": 0.5887850467289719, + "logical": 0.43902439024390244, + "machine_learning": 0.4344262295081967, + "management": 0.4142857142857143, + "marketing": 0.4222222222222222, + "marxist_theory": 0.43386243386243384, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.36551724137931035, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.35428571428571426, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.375, + "public_relations": 0.41954022988505746, + "security_study": 0.37777777777777777, + "sociology": 0.37168141592920356, + "sports_science": 0.3696969696969697, + "traditional_chinese_medicine": 0.2918918918918919, + "virology": 0.4319526627218935, + "world_history": 0.36645962732919257, + "world_religions": 0.39375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36363636363636365 + }, + "prompt_2": { + "accuracy": 0.3939393939393939 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.42424242424242425 + }, + "prompt_5": { + "accuracy": 0.2727272727272727 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32954545454545453 + }, + "prompt_2": { + "accuracy": 0.34545454545454546 + }, + "prompt_3": { + "accuracy": 0.32045454545454544 + }, + "prompt_4": { + "accuracy": 0.5727272727272728 + }, + "prompt_5": { + "accuracy": 0.5590909090909091 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30745762711864405 + }, + "prompt_2": { + "accuracy": 0.3505084745762712 + }, + "prompt_3": { + "accuracy": 0.308135593220339 + }, + "prompt_4": { + "accuracy": 0.3335593220338983 + }, + "prompt_5": { + "accuracy": 0.30474576271186443 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6353777112939416 + }, + "prompt_2": { + "accuracy": 0.6540762902019447 + }, + "prompt_3": { + "accuracy": 0.6492146596858639 + }, + "prompt_4": { + "accuracy": 0.6548242333582648 + }, + "prompt_5": { + "accuracy": 0.6574420344053852 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8417442430181283 + }, + "prompt_2": { + "accuracy": 0.802547770700637 + }, + "prompt_3": { + "accuracy": 0.850563449289564 + }, + "prompt_4": { + "accuracy": 0.8378245957863792 + }, + "prompt_5": { + "accuracy": 0.8427241548260657 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2522128782822429, + "rouge2": 0.08657121472194133, + "rougeL": 0.19388734585525721, + "avg_rouge": 0.17755714628648048 + }, + "prompt_2": { + "rouge1": 0.28425004323157466, + "rouge2": 0.11309389923069282, + "rougeL": 0.22104788191703095, + "avg_rouge": 0.2061306081264328 + }, + "prompt_3": { + "rouge1": 0.2762487784299151, + "rouge2": 0.11154785778508489, + "rougeL": 0.21847237192238034, + "avg_rouge": 0.20208966937912676 + }, + "prompt_4": { + "rouge1": 0.2762706784061377, + "rouge2": 0.10265565313604795, + "rougeL": 0.21110850137567955, + "avg_rouge": 0.19667827763928844 + }, + "prompt_5": { + "rouge1": 0.29040479616881754, + "rouge2": 0.11608515455129263, + "rougeL": 0.2260516051369098, + "avg_rouge": 0.21084718528567334 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2343556160591197, + "rouge2": 0.06461213437026242, + "rougeL": 0.172947114326383, + "avg_rouge": 0.15730495491858837 + }, + "prompt_2": { + "rouge1": 0.2303695137440053, + "rouge2": 0.06337369488399912, + "rougeL": 0.1695810613263265, + "avg_rouge": 0.1544414233181103 + }, + "prompt_3": { + "rouge1": 0.23381894597230257, + "rouge2": 0.06453135473257171, + "rougeL": 0.17237878110422106, + "avg_rouge": 0.1569096939363651 + }, + "prompt_4": { + "rouge1": 0.23384963509698004, + "rouge2": 0.0642863441570493, + "rougeL": 0.17240553761719177, + "avg_rouge": 0.15684717229040704 + }, + "prompt_5": { + "rouge1": 0.22570958532950347, + "rouge2": 0.062361067605806224, + "rougeL": 0.1659066757810212, + "avg_rouge": 0.15132577623877697 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8681192660550459 + }, + "prompt_2": { + "accuracy": 0.7488532110091743 + }, + "prompt_3": { + "accuracy": 0.7477064220183486 + }, + "prompt_4": { + "accuracy": 0.8715596330275229 + }, + "prompt_5": { + "accuracy": 0.7809633027522935 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5647171620325983 + }, + "prompt_2": { + "accuracy": 0.49760306807286675 + }, + "prompt_3": { + "accuracy": 0.5397890699904123 + }, + "prompt_4": { + "accuracy": 0.5570469798657718 + }, + "prompt_5": { + "accuracy": 0.50143815915628 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5745 + }, + "prompt_2": { + "accuracy": 0.58 + }, + "prompt_3": { + "accuracy": 0.541 + }, + "prompt_4": { + "accuracy": 0.599 + }, + "prompt_5": { + "accuracy": 0.62 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3945 + }, + "prompt_2": { + "accuracy": 0.3845 + }, + "prompt_3": { + "accuracy": 0.3955 + }, + "prompt_4": { + "accuracy": 0.367 + }, + "prompt_5": { + "accuracy": 0.375 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.516 + }, + "prompt_2": { + "accuracy": 0.4955 + }, + "prompt_3": { + "accuracy": 0.525 + }, + "prompt_4": { + "accuracy": 0.5565 + }, + "prompt_5": { + "accuracy": 0.5335 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5633802816901409 + }, + "prompt_2": { + "accuracy": 0.6338028169014085 + }, + "prompt_3": { + "accuracy": 0.5070422535211268 + }, + "prompt_4": { + "accuracy": 0.4507042253521127 + }, + "prompt_5": { + "accuracy": 0.5633802816901409 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5379061371841155 + }, + "prompt_2": { + "accuracy": 0.5451263537906137 + }, + "prompt_3": { + "accuracy": 0.5270758122743683 + }, + "prompt_4": { + "accuracy": 0.5451263537906137 + }, + "prompt_5": { + "accuracy": 0.4693140794223827 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46568627450980393 + }, + "prompt_2": { + "accuracy": 0.6519607843137255 + }, + "prompt_3": { + "accuracy": 0.6397058823529411 + }, + "prompt_4": { + "accuracy": 0.5269607843137255 + }, + "prompt_5": { + "accuracy": 0.33578431372549017 + } } }, "five_shot": { "cross_mmlu": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.42285714285714276, + "language_acc": { + "English": 0.52, + "Vietnamese": 0.3933333333333333, + "Malay": 0.3466666666666667, + "Indonesian": 0.44666666666666666, + "Spanish": 0.44, + "Chinese": 0.41333333333333333, + "Filipino": 0.4 + }, + "consistency_score_2": 0.4057142857142856, + "consistency_score_3": 0.19142857142857145, + "consistency_score_4": 0.09942857142857144, + "consistency_score_5": 0.0565079365079365, + "consistency_score_6": 0.036190476190476197, + "consistency_score_7": 0.02666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4066666666666667, + "English,Malay": 0.37333333333333335, + "English,Indonesian": 0.4066666666666667, + "English,Spanish": 0.4666666666666667, + "English,Chinese": 0.44, + "English,Filipino": 0.38, + "Vietnamese,Malay": 0.36, + "Vietnamese,Indonesian": 0.41333333333333333, + "Vietnamese,Spanish": 0.41333333333333333, + "Vietnamese,Chinese": 0.36, + "Vietnamese,Filipino": 0.42, + "Malay,Indonesian": 0.46, + "Malay,Spanish": 0.41333333333333333, + "Malay,Chinese": 0.36, + "Malay,Filipino": 0.36666666666666664, + "Indonesian,Spanish": 0.4066666666666667, + "Indonesian,Chinese": 0.36666666666666664, + "Indonesian,Filipino": 0.4266666666666667, + "Spanish,Chinese": 0.4866666666666667, + "Spanish,Filipino": 0.36, + "Chinese,Filipino": 0.43333333333333335 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.15333333333333332, + "English,Vietnamese,Indonesian": 0.21333333333333335, + "English,Vietnamese,Spanish": 0.22, + "English,Vietnamese,Chinese": 0.17333333333333334, + "English,Vietnamese,Filipino": 0.2, + "English,Malay,Indonesian": 0.2, + "English,Malay,Spanish": 0.20666666666666667, + "English,Malay,Chinese": 0.16, + "English,Malay,Filipino": 0.14666666666666667, + "English,Indonesian,Spanish": 0.22666666666666666, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino": 0.21333333333333335, + "English,Spanish,Chinese": 0.23333333333333334, + "English,Spanish,Filipino": 0.18666666666666668, + "English,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Filipino": 0.18, + "Vietnamese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Filipino": 0.19333333333333333, + "Vietnamese,Chinese,Filipino": 0.2, + "Malay,Indonesian,Spanish": 0.22, + "Malay,Indonesian,Chinese": 0.18, + "Malay,Indonesian,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese": 0.18666666666666668, + "Malay,Spanish,Filipino": 0.16, + "Malay,Chinese,Filipino": 0.16, + "Indonesian,Spanish,Chinese": 0.20666666666666667, + "Indonesian,Spanish,Filipino": 0.17333333333333334, + "Indonesian,Chinese,Filipino": 0.19333333333333333, + "Spanish,Chinese,Filipino": 0.20666666666666667 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.1, + "English,Vietnamese,Malay,Spanish": 0.09333333333333334, + "English,Vietnamese,Malay,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.14666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.12, + "English,Vietnamese,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.09333333333333334, + "English,Malay,Indonesian,Spanish": 0.12, + "English,Malay,Indonesian,Chinese": 0.1, + "English,Malay,Indonesian,Filipino": 0.11333333333333333, + "English,Malay,Spanish,Chinese": 0.09333333333333334, + "English,Malay,Spanish,Filipino": 0.09333333333333334, + "English,Malay,Chinese,Filipino": 0.08, + "English,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Indonesian,Spanish,Filipino": 0.11333333333333333, + "English,Indonesian,Chinese,Filipino": 0.1, + "English,Spanish,Chinese,Filipino": 0.10666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.10666666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.08, + "Vietnamese,Malay,Indonesian,Filipino": 0.12, + "Vietnamese,Malay,Spanish,Chinese": 0.08666666666666667, + "Vietnamese,Malay,Spanish,Filipino": 0.08666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.08, + "Vietnamese,Indonesian,Spanish,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Spanish,Filipino": 0.11333333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.10666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.1, + "Malay,Indonesian,Spanish,Chinese": 0.1, + "Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.10666666666666667, + "Malay,Spanish,Chinese,Filipino": 0.08666666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.07333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.06, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.06666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.05333333333333334, + "English,Vietnamese,Malay,Chinese,Filipino": 0.04, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.08, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.05333333333333334, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.04666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.05333333333333334, + "English,Malay,Indonesian,Spanish,Filipino": 0.07333333333333333, + "English,Malay,Indonesian,Chinese,Filipino": 0.06666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.05333333333333334, + "English,Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.06 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.04, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667 + } + }, + "AC3_2": 0.41410837433425773, + "AC3_3": 0.26354817271457187, + "AC3_4": 0.16100031256686065, + "AC3_5": 0.09969347207002607, + "AC3_6": 0.06667457022851044, + "AC3_7": 0.05016949151426314 + } }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2912621359223301 + } }, "cn_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3523809523809524 + } }, "us_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4205607476635514 + } }, "ph_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.4, + "history": 0.2, + "literature": 0.4, + "politics": 0.4, + "culture": 0.4, + "film": 0.6, + "law": 0.2, + "geography": 0.4 + } + } }, "sing2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.09107462741491763 + } }, "indommlu": { "prompt_1": -1 }, "flores_ind2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.1133400440084646 + } }, "flores_vie2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.11249108964772524 + } }, "flores_zho2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.10632436627330219 + } }, "flores_zsm2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.11775922454062634 + } }, "mmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.44457409568261375 + } }, "mmlu_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4449052556310332, + "category_acc": { + "high_school_european_history": 0.5914634146341463, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.4696969696969697, + "medical_genetics": 0.43434343434343436, + "high_school_us_history": 0.6108374384236454, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.5805084745762712, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.4810126582278481, + "econometrics": 0.40707964601769914, + "college_computer_science": 0.30303030303030304, + "high_school_biology": 0.511326860841424, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.398576512455516, + "philosophy": 0.4806451612903226, + "professional_medicine": 0.5608856088560885, + "nutrition": 0.4819672131147541, + "global_facts": 0.23232323232323232, + "machine_learning": 0.35135135135135137, + "security_studies": 0.5573770491803278, + "public_relations": 0.43119266055045874, + "professional_psychology": 0.48936170212765956, + "prehistory": 0.43653250773993807, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.5076923076923077, + "college_medicine": 0.45930232558139533, + "high_school_government_and_politics": 0.5260416666666666, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5246913580246914, + "high_school_geography": 0.48223350253807107, + "elementary_mathematics": 0.33421750663129973, + "human_aging": 0.4774774774774775, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.5202205882352942, + "formal_logic": 0.328, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.525, + "high_school_mathematics": 0.29739776951672864, + "high_school_computer_science": 0.5151515151515151, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.5127877237851662, + "high_school_chemistry": 0.3910891089108911, + "marketing": 0.5879828326180258, + "professional_law": 0.40769732550554466, + "management": 0.43137254901960786, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.5327102803738317, + "world_religions": 0.5176470588235295, + "sociology": 0.58, + "us_foreign_policy": 0.5959595959595959, + "high_school_macroeconomics": 0.4832904884318766, + "computer_security": 0.45454545454545453, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.5304347826086957, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.4370860927152318, + "college_biology": 0.5034965034965035 + } + } }, "c_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3655274888558692 + } }, "c_eval_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.37546699875467, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.40476190476190477, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.4583333333333333, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.3333333333333333, + "business_administration": 0.3157894736842105, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.46938775510204084, + "high_school_politics": 0.5, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.37037037037037035, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.5882352941176471, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.25, + "high_school_history": 0.68, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.23076923076923078, + "sports_science": 0.25, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.35185185185185186, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.18518518518518517, + "physician": 0.35185185185185186 + } + } }, "cmmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4229390681003584 + } }, "cmmlu_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.39777240545674325, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.2682926829268293, + "arts": 0.38125, + "astronomy": 0.296969696969697, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.35, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.4485981308411215, + "chinese_history": 0.4086687306501548, + "chinese_literature": 0.3480392156862745, + "chinese_teacher_qualification": 0.44692737430167595, + "clinical_knowledge": 0.350210970464135, + "college_actuarial_science": 0.33962264150943394, + "college_education": 0.4953271028037383, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.25, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.42452830188679247, + "college_medicine": 0.326007326007326, + "computer_science": 0.46078431372549017, + "computer_security": 0.52046783625731, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.381294964028777, + "economics": 0.4591194968553459, + "education": 0.4049079754601227, + "electrical_engineering": 0.42441860465116277, + "elementary_chinese": 0.30158730158730157, + "elementary_commonsense": 0.4494949494949495, + "elementary_information_and_technology": 0.5966386554621849, + "elementary_mathematics": 0.3, + "ethnology": 0.362962962962963, + "food_science": 0.43356643356643354, + "genetics": 0.3693181818181818, + "global_facts": 0.4966442953020134, + "high_school_biology": 0.31952662721893493, + "high_school_chemistry": 0.32575757575757575, + "high_school_geography": 0.3983050847457627, + "high_school_mathematics": 0.3170731707317073, + "high_school_physics": 0.3, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.5079365079365079, + "international_law": 0.34054054054054056, + "journalism": 0.436046511627907, + "jurisprudence": 0.39902676399026765, + "legal_and_moral_basis": 0.602803738317757, + "logical": 0.43089430894308944, + "machine_learning": 0.4262295081967213, + "management": 0.4666666666666667, + "marketing": 0.5444444444444444, + "marxist_theory": 0.4497354497354497, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.42758620689655175, + "philosophy": 0.38095238095238093, + "professional_accounting": 0.44, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.29521276595744683, + "professional_psychology": 0.44396551724137934, + "public_relations": 0.4942528735632184, + "security_study": 0.43703703703703706, + "sociology": 0.4469026548672566, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.3027027027027027, + "virology": 0.378698224852071, + "world_history": 0.5093167701863354, + "world_religions": 0.36875 + } + } }, "zbench": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.36363636363636365 + } }, "ind_emotion": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4727272727272727 + } }, "ocnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3786440677966102 + } }, "c3": { "prompt_1": -1 @@ -10784,34 +92698,60 @@ "prompt_1": -1 }, "samsum": { - "prompt_1": -1 + "prompt_1": { + "rouge1": 0.20815276001544852, + "rouge2": 0.08549294523834353, + "rougeL": 0.1630905244870767, + "avg_rouge": 0.15224540991362293 + } }, "dialogsum": { - "prompt_1": -1 + "prompt_1": { + "rouge1": 0.1844099561435674, + "rouge2": 0.06439198191982298, + "rougeL": 0.1440043840764991, + "avg_rouge": 0.1309354407132965 + } }, "sst2": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5080275229357798 + } }, "cola": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3700862895493768 + } }, "qqp": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.595 + } }, "mnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4225 + } }, "qnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4925 + } }, "wnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5342960288808665 + } }, "mrpc": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4632352941176471 + } } } }, @@ -10820,53 +92760,1733 @@ "model_link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.39047619047619053, + "language_acc": { + "English": 0.6333333333333333, + "Vietnamese": 0.32666666666666666, + "Malay": 0.2733333333333333, + "Indonesian": 0.34, + "Spanish": 0.46, + "Chinese": 0.36666666666666664, + "Filipino": 0.3333333333333333 + }, + "consistency_score_2": 0.38380952380952377, + "consistency_score_3": 0.19580952380952377, + "consistency_score_4": 0.12495238095238093, + "consistency_score_5": 0.09365079365079365, + "consistency_score_6": 0.07714285714285715, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38, + "English,Malay": 0.34, + "English,Indonesian": 0.3466666666666667, + "English,Spanish": 0.52, + "English,Chinese": 0.42, + "English,Filipino": 0.42, + "Vietnamese,Malay": 0.35333333333333333, + "Vietnamese,Indonesian": 0.36, + "Vietnamese,Spanish": 0.4, + "Vietnamese,Chinese": 0.3466666666666667, + "Vietnamese,Filipino": 0.34, + "Malay,Indonesian": 0.34, + "Malay,Spanish": 0.38, + "Malay,Chinese": 0.36666666666666664, + "Malay,Filipino": 0.38, + "Indonesian,Spanish": 0.3933333333333333, + "Indonesian,Chinese": 0.38, + "Indonesian,Filipino": 0.38666666666666666, + "Spanish,Chinese": 0.46, + "Spanish,Filipino": 0.3933333333333333, + "Chinese,Filipino": 0.35333333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.15333333333333332, + "English,Vietnamese,Indonesian": 0.17333333333333334, + "English,Vietnamese,Spanish": 0.21333333333333335, + "English,Vietnamese,Chinese": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.21333333333333335, + "English,Malay,Indonesian": 0.14666666666666667, + "English,Malay,Spanish": 0.22, + "English,Malay,Chinese": 0.16, + "English,Malay,Filipino": 0.18666666666666668, + "English,Indonesian,Spanish": 0.22, + "English,Indonesian,Chinese": 0.19333333333333333, + "English,Indonesian,Filipino": 0.20666666666666667, + "English,Spanish,Chinese": 0.28, + "English,Spanish,Filipino": 0.2866666666666667, + "English,Chinese,Filipino": 0.2, + "Vietnamese,Malay,Indonesian": 0.15333333333333332, + "Vietnamese,Malay,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese": 0.16666666666666666, + "Vietnamese,Malay,Filipino": 0.17333333333333334, + "Vietnamese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Filipino": 0.22, + "Vietnamese,Chinese,Filipino": 0.17333333333333334, + "Malay,Indonesian,Spanish": 0.18, + "Malay,Indonesian,Chinese": 0.17333333333333334, + "Malay,Indonesian,Filipino": 0.18666666666666668, + "Malay,Spanish,Chinese": 0.22, + "Malay,Spanish,Filipino": 0.18, + "Malay,Chinese,Filipino": 0.16666666666666666, + "Indonesian,Spanish,Chinese": 0.24, + "Indonesian,Spanish,Filipino": 0.21333333333333335, + "Indonesian,Chinese,Filipino": 0.2, + "Spanish,Chinese,Filipino": 0.19333333333333333 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.08666666666666667, + "English,Vietnamese,Malay,Spanish": 0.1, + "English,Vietnamese,Malay,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.10666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.15333333333333332, + "English,Vietnamese,Spanish,Filipino": 0.14666666666666667, + "English,Vietnamese,Chinese,Filipino": 0.13333333333333333, + "English,Malay,Indonesian,Spanish": 0.12, + "English,Malay,Indonesian,Chinese": 0.08666666666666667, + "English,Malay,Indonesian,Filipino": 0.12666666666666668, + "English,Malay,Spanish,Chinese": 0.13333333333333333, + "English,Malay,Spanish,Filipino": 0.12666666666666668, + "English,Malay,Chinese,Filipino": 0.1, + "English,Indonesian,Spanish,Chinese": 0.14, + "English,Indonesian,Spanish,Filipino": 0.16, + "English,Indonesian,Chinese,Filipino": 0.13333333333333333, + "English,Spanish,Chinese,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Spanish,Chinese": 0.11333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Chinese,Filipino": 0.1, + "Vietnamese,Indonesian,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Spanish,Filipino": 0.13333333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.14666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.13333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "Malay,Indonesian,Chinese,Filipino": 0.12, + "Malay,Spanish,Chinese,Filipino": 0.11333333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.07333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.08, + "English,Vietnamese,Malay,Spanish,Filipino": 0.08, + "English,Vietnamese,Malay,Chinese,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.1, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.1, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Spanish,Chinese": 0.08, + "English,Malay,Indonesian,Spanish,Filipino": 0.1, + "English,Malay,Indonesian,Chinese,Filipino": 0.08666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.08666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.1, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.1, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.06666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.07333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.06666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.06666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667 + } + }, + "AC3_2": 0.38711415680585803, + "AC3_3": 0.2608249017160866, + "AC3_4": 0.18932178928505836, + "AC3_5": 0.15106947693991182, + "AC3_6": 0.1288332848138806, + "AC3_7": 0.11388888886397569 + }, + "prompt_2": { + "overall_acc": 0.3742857142857143, + "language_acc": { + "English": 0.62, + "Vietnamese": 0.32666666666666666, + "Malay": 0.32, + "Indonesian": 0.29333333333333333, + "Spanish": 0.43333333333333335, + "Chinese": 0.34, + "Filipino": 0.2866666666666667 + }, + "consistency_score_2": 0.36253968253968255, + "consistency_score_3": 0.168952380952381, + "consistency_score_4": 0.09238095238095241, + "consistency_score_5": 0.055238095238095246, + "consistency_score_6": 0.03428571428571429, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.30666666666666664, + "English,Malay": 0.4266666666666667, + "English,Indonesian": 0.38, + "English,Spanish": 0.5, + "English,Chinese": 0.4666666666666667, + "English,Filipino": 0.41333333333333333, + "Vietnamese,Malay": 0.32, + "Vietnamese,Indonesian": 0.32666666666666666, + "Vietnamese,Spanish": 0.38, + "Vietnamese,Chinese": 0.31333333333333335, + "Vietnamese,Filipino": 0.29333333333333333, + "Malay,Indonesian": 0.31333333333333335, + "Malay,Spanish": 0.3933333333333333, + "Malay,Chinese": 0.35333333333333333, + "Malay,Filipino": 0.3466666666666667, + "Indonesian,Spanish": 0.3, + "Indonesian,Chinese": 0.36, + "Indonesian,Filipino": 0.31333333333333335, + "Spanish,Chinese": 0.38, + "Spanish,Filipino": 0.37333333333333335, + "Chinese,Filipino": 0.35333333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.15333333333333332, + "English,Vietnamese,Indonesian": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.21333333333333335, + "English,Vietnamese,Chinese": 0.16, + "English,Vietnamese,Filipino": 0.14666666666666667, + "English,Malay,Indonesian": 0.17333333333333334, + "English,Malay,Spanish": 0.26, + "English,Malay,Chinese": 0.22, + "English,Malay,Filipino": 0.22, + "English,Indonesian,Spanish": 0.22, + "English,Indonesian,Chinese": 0.22, + "English,Indonesian,Filipino": 0.17333333333333334, + "English,Spanish,Chinese": 0.26666666666666666, + "English,Spanish,Filipino": 0.22666666666666666, + "English,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Indonesian": 0.12, + "Vietnamese,Malay,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese": 0.14, + "Vietnamese,Malay,Filipino": 0.10666666666666667, + "Vietnamese,Indonesian,Spanish": 0.13333333333333333, + "Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino": 0.16, + "Vietnamese,Chinese,Filipino": 0.11333333333333333, + "Malay,Indonesian,Spanish": 0.12666666666666668, + "Malay,Indonesian,Chinese": 0.13333333333333333, + "Malay,Indonesian,Filipino": 0.13333333333333333, + "Malay,Spanish,Chinese": 0.19333333333333333, + "Malay,Spanish,Filipino": 0.16666666666666666, + "Malay,Chinese,Filipino": 0.15333333333333332, + "Indonesian,Spanish,Chinese": 0.17333333333333334, + "Indonesian,Spanish,Filipino": 0.12, + "Indonesian,Chinese,Filipino": 0.14, + "Spanish,Chinese,Filipino": 0.18666666666666668 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.08, + "English,Vietnamese,Malay,Spanish": 0.10666666666666667, + "English,Vietnamese,Malay,Chinese": 0.08666666666666667, + "English,Vietnamese,Malay,Filipino": 0.06666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.11333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.08, + "English,Malay,Indonesian,Spanish": 0.1, + "English,Malay,Indonesian,Chinese": 0.09333333333333334, + "English,Malay,Indonesian,Filipino": 0.10666666666666667, + "English,Malay,Spanish,Chinese": 0.14666666666666667, + "English,Malay,Spanish,Filipino": 0.12666666666666668, + "English,Malay,Chinese,Filipino": 0.12666666666666668, + "English,Indonesian,Spanish,Chinese": 0.14, + "English,Indonesian,Spanish,Filipino": 0.09333333333333334, + "English,Indonesian,Chinese,Filipino": 0.11333333333333333, + "English,Spanish,Chinese,Filipino": 0.14, + "Vietnamese,Malay,Indonesian,Spanish": 0.07333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.09333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.08, + "Vietnamese,Malay,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese": 0.11333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.06, + "Vietnamese,Indonesian,Chinese,Filipino": 0.06, + "Vietnamese,Spanish,Chinese,Filipino": 0.07333333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.07333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.06666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.06666666666666667, + "Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.06, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.05333333333333334, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.06666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.05333333333333334, + "English,Vietnamese,Malay,Chinese,Filipino": 0.04, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.06, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.05333333333333334, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.06666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.06, + "English,Malay,Indonesian,Spanish,Filipino": 0.06, + "English,Malay,Indonesian,Chinese,Filipino": 0.06, + "English,Malay,Spanish,Chinese,Filipino": 0.08, + "English,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.06, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.03333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.02666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02 + } + }, + "AC3_2": 0.36831907424065097, + "AC3_3": 0.23281306347146927, + "AC3_4": 0.14818658888952857, + "AC3_5": 0.09626860941692829, + "AC3_6": 0.06281718280180798, + "AC3_7": 0.03797101448312329 + }, + "prompt_3": { + "overall_acc": 0.41238095238095235, + "language_acc": { + "English": 0.56, + "Vietnamese": 0.35333333333333333, + "Malay": 0.31333333333333335, + "Indonesian": 0.3933333333333333, + "Spanish": 0.5066666666666667, + "Chinese": 0.3933333333333333, + "Filipino": 0.36666666666666664 + }, + "consistency_score_2": 0.39047619047619053, + "consistency_score_3": 0.19542857142857142, + "consistency_score_4": 0.11238095238095235, + "consistency_score_5": 0.0692063492063492, + "consistency_score_6": 0.04380952380952381, + "consistency_score_7": 0.02666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.36, + "English,Malay": 0.3933333333333333, + "English,Indonesian": 0.4666666666666667, + "English,Spanish": 0.6133333333333333, + "English,Chinese": 0.46, + "English,Filipino": 0.37333333333333335, + "Vietnamese,Malay": 0.31333333333333335, + "Vietnamese,Indonesian": 0.36666666666666664, + "Vietnamese,Spanish": 0.32666666666666666, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.32, + "Malay,Indonesian": 0.43333333333333335, + "Malay,Spanish": 0.35333333333333333, + "Malay,Chinese": 0.32666666666666666, + "Malay,Filipino": 0.34, + "Indonesian,Spanish": 0.43333333333333335, + "Indonesian,Chinese": 0.4266666666666667, + "Indonesian,Filipino": 0.41333333333333333, + "Spanish,Chinese": 0.4533333333333333, + "Spanish,Filipino": 0.36, + "Chinese,Filipino": 0.29333333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.16, + "English,Vietnamese,Indonesian": 0.20666666666666667, + "English,Vietnamese,Spanish": 0.22666666666666666, + "English,Vietnamese,Chinese": 0.18, + "English,Vietnamese,Filipino": 0.18, + "English,Malay,Indonesian": 0.22666666666666666, + "English,Malay,Spanish": 0.25333333333333335, + "English,Malay,Chinese": 0.17333333333333334, + "English,Malay,Filipino": 0.2, + "English,Indonesian,Spanish": 0.32666666666666666, + "English,Indonesian,Chinese": 0.26666666666666666, + "English,Indonesian,Filipino": 0.24, + "English,Spanish,Chinese": 0.31333333333333335, + "English,Spanish,Filipino": 0.2733333333333333, + "English,Chinese,Filipino": 0.2, + "Vietnamese,Malay,Indonesian": 0.18, + "Vietnamese,Malay,Spanish": 0.12, + "Vietnamese,Malay,Chinese": 0.11333333333333333, + "Vietnamese,Malay,Filipino": 0.11333333333333333, + "Vietnamese,Indonesian,Spanish": 0.18, + "Vietnamese,Indonesian,Chinese": 0.18, + "Vietnamese,Indonesian,Filipino": 0.18, + "Vietnamese,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Spanish,Filipino": 0.16, + "Vietnamese,Chinese,Filipino": 0.14666666666666667, + "Malay,Indonesian,Spanish": 0.20666666666666667, + "Malay,Indonesian,Chinese": 0.20666666666666667, + "Malay,Indonesian,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese": 0.16, + "Malay,Spanish,Filipino": 0.16, + "Malay,Chinese,Filipino": 0.11333333333333333, + "Indonesian,Spanish,Chinese": 0.25333333333333335, + "Indonesian,Spanish,Filipino": 0.21333333333333335, + "Indonesian,Chinese,Filipino": 0.18, + "Spanish,Chinese,Filipino": 0.17333333333333334 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.11333333333333333, + "English,Vietnamese,Malay,Spanish": 0.09333333333333334, + "English,Vietnamese,Malay,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.12666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.12, + "English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Spanish": 0.16, + "English,Malay,Indonesian,Chinese": 0.12, + "English,Malay,Indonesian,Filipino": 0.14666666666666667, + "English,Malay,Spanish,Chinese": 0.12, + "English,Malay,Spanish,Filipino": 0.14666666666666667, + "English,Malay,Chinese,Filipino": 0.09333333333333334, + "English,Indonesian,Spanish,Chinese": 0.19333333333333333, + "English,Indonesian,Spanish,Filipino": 0.18666666666666668, + "English,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Spanish,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.08, + "Vietnamese,Malay,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Malay,Indonesian,Filipino": 0.08, + "Vietnamese,Malay,Spanish,Chinese": 0.05333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.06, + "Vietnamese,Malay,Chinese,Filipino": 0.05333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese": 0.10666666666666667, + "Vietnamese,Indonesian,Spanish,Filipino": 0.10666666666666667, + "Vietnamese,Indonesian,Chinese,Filipino": 0.1, + "Vietnamese,Spanish,Chinese,Filipino": 0.10666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.11333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.12, + "Malay,Indonesian,Chinese,Filipino": 0.1, + "Malay,Spanish,Chinese,Filipino": 0.06666666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.06666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.06, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.06666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.06, + "English,Vietnamese,Malay,Chinese,Filipino": 0.04666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.08, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.08666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.08666666666666667, + "English,Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Chinese,Filipino": 0.08666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.06666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.06 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.06, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667 + } + }, + "AC3_2": 0.40112975196946893, + "AC3_3": 0.2651851188174528, + "AC3_4": 0.17662777630373655, + "AC3_5": 0.11852214581460828, + "AC3_6": 0.0792046922979773, + "AC3_7": 0.05009399854245871 + }, + "prompt_4": { + "overall_acc": 0.4123809523809524, + "language_acc": { + "English": 0.5733333333333334, + "Vietnamese": 0.42, + "Malay": 0.31333333333333335, + "Indonesian": 0.3466666666666667, + "Spanish": 0.49333333333333335, + "Chinese": 0.37333333333333335, + "Filipino": 0.36666666666666664 + }, + "consistency_score_2": 0.3577777777777778, + "consistency_score_3": 0.168, + "consistency_score_4": 0.0931428571428572, + "consistency_score_5": 0.056825396825396834, + "consistency_score_6": 0.03523809523809524, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.36666666666666664, + "English,Malay": 0.36666666666666664, + "English,Indonesian": 0.36666666666666664, + "English,Spanish": 0.4866666666666667, + "English,Chinese": 0.43333333333333335, + "English,Filipino": 0.3933333333333333, + "Vietnamese,Malay": 0.30666666666666664, + "Vietnamese,Indonesian": 0.34, + "Vietnamese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese": 0.38666666666666666, + "Vietnamese,Filipino": 0.3, + "Malay,Indonesian": 0.3333333333333333, + "Malay,Spanish": 0.2866666666666667, + "Malay,Chinese": 0.32, + "Malay,Filipino": 0.3933333333333333, + "Indonesian,Spanish": 0.36, + "Indonesian,Chinese": 0.36666666666666664, + "Indonesian,Filipino": 0.3, + "Spanish,Chinese": 0.36, + "Spanish,Filipino": 0.4066666666666667, + "Chinese,Filipino": 0.2733333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.15333333333333332, + "English,Vietnamese,Indonesian": 0.14666666666666667, + "English,Vietnamese,Spanish": 0.22, + "English,Vietnamese,Chinese": 0.20666666666666667, + "English,Vietnamese,Filipino": 0.18, + "English,Malay,Indonesian": 0.15333333333333332, + "English,Malay,Spanish": 0.20666666666666667, + "English,Malay,Chinese": 0.16666666666666666, + "English,Malay,Filipino": 0.22666666666666666, + "English,Indonesian,Spanish": 0.20666666666666667, + "English,Indonesian,Chinese": 0.20666666666666667, + "English,Indonesian,Filipino": 0.16, + "English,Spanish,Chinese": 0.22, + "English,Spanish,Filipino": 0.25333333333333335, + "English,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Indonesian": 0.15333333333333332, + "Vietnamese,Malay,Spanish": 0.13333333333333333, + "Vietnamese,Malay,Chinese": 0.14666666666666667, + "Vietnamese,Malay,Filipino": 0.14666666666666667, + "Vietnamese,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "Vietnamese,Spanish,Chinese": 0.17333333333333334, + "Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Chinese,Filipino": 0.12666666666666668, + "Malay,Indonesian,Spanish": 0.13333333333333333, + "Malay,Indonesian,Chinese": 0.14, + "Malay,Indonesian,Filipino": 0.14666666666666667, + "Malay,Spanish,Chinese": 0.11333333333333333, + "Malay,Spanish,Filipino": 0.18, + "Malay,Chinese,Filipino": 0.13333333333333333, + "Indonesian,Spanish,Chinese": 0.17333333333333334, + "Indonesian,Spanish,Filipino": 0.14666666666666667, + "Indonesian,Chinese,Filipino": 0.12666666666666668, + "Spanish,Chinese,Filipino": 0.16 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.08, + "English,Vietnamese,Malay,Spanish": 0.09333333333333334, + "English,Vietnamese,Malay,Chinese": 0.08666666666666667, + "English,Vietnamese,Malay,Filipino": 0.10666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.09333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino": 0.09333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.13333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Chinese,Filipino": 0.10666666666666667, + "English,Malay,Indonesian,Spanish": 0.1, + "English,Malay,Indonesian,Chinese": 0.09333333333333334, + "English,Malay,Indonesian,Filipino": 0.1, + "English,Malay,Spanish,Chinese": 0.08666666666666667, + "English,Malay,Spanish,Filipino": 0.14666666666666667, + "English,Malay,Chinese,Filipino": 0.11333333333333333, + "English,Indonesian,Spanish,Chinese": 0.1, + "English,Indonesian,Spanish,Filipino": 0.1, + "English,Indonesian,Chinese,Filipino": 0.10666666666666667, + "English,Spanish,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Indonesian,Spanish": 0.07333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.08666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.08, + "Vietnamese,Malay,Spanish,Chinese": 0.06666666666666667, + "Vietnamese,Malay,Spanish,Filipino": 0.08, + "Vietnamese,Malay,Chinese,Filipino": 0.06, + "Vietnamese,Indonesian,Spanish,Chinese": 0.12, + "Vietnamese,Indonesian,Spanish,Filipino": 0.08, + "Vietnamese,Indonesian,Chinese,Filipino": 0.08, + "Vietnamese,Spanish,Chinese,Filipino": 0.08666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.06666666666666667, + "Malay,Indonesian,Spanish,Filipino": 0.06666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.06666666666666667, + "Malay,Spanish,Chinese,Filipino": 0.06666666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.05333333333333334, + "English,Vietnamese,Malay,Spanish,Chinese": 0.05333333333333334, + "English,Vietnamese,Malay,Spanish,Filipino": 0.06666666666666667, + "English,Vietnamese,Malay,Chinese,Filipino": 0.05333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.08, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.06, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.07333333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.08, + "English,Malay,Indonesian,Spanish,Chinese": 0.04666666666666667, + "English,Malay,Indonesian,Spanish,Filipino": 0.06, + "English,Malay,Indonesian,Chinese,Filipino": 0.06, + "English,Malay,Spanish,Chinese,Filipino": 0.06, + "English,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.05333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.06, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.02666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.05333333333333334, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02 + } + }, + "AC3_2": 0.38314372075266956, + "AC3_3": 0.23873974396936712, + "AC3_4": 0.1519625363032371, + "AC3_5": 0.09988659060955121, + "AC3_6": 0.06492806482845326, + "AC3_7": 0.03814977972685964 + }, + "prompt_5": { + "overall_acc": 0.3780952380952381, + "language_acc": { + "English": 0.5533333333333333, + "Vietnamese": 0.35333333333333333, + "Malay": 0.3, + "Indonesian": 0.3466666666666667, + "Spanish": 0.4066666666666667, + "Chinese": 0.30666666666666664, + "Filipino": 0.38 + }, + "consistency_score_2": 0.3853968253968254, + "consistency_score_3": 0.19733333333333333, + "consistency_score_4": 0.11980952380952381, + "consistency_score_5": 0.07936507936507937, + "consistency_score_6": 0.05523809523809524, + "consistency_score_7": 0.04, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.44, + "English,Malay": 0.37333333333333335, + "English,Indonesian": 0.44666666666666666, + "English,Spanish": 0.5333333333333333, + "English,Chinese": 0.36666666666666664, + "English,Filipino": 0.4533333333333333, + "Vietnamese,Malay": 0.32, + "Vietnamese,Indonesian": 0.4066666666666667, + "Vietnamese,Spanish": 0.35333333333333333, + "Vietnamese,Chinese": 0.43333333333333335, + "Vietnamese,Filipino": 0.36, + "Malay,Indonesian": 0.37333333333333335, + "Malay,Spanish": 0.30666666666666664, + "Malay,Chinese": 0.35333333333333333, + "Malay,Filipino": 0.3, + "Indonesian,Spanish": 0.4, + "Indonesian,Chinese": 0.4266666666666667, + "Indonesian,Filipino": 0.36666666666666664, + "Spanish,Chinese": 0.3933333333333333, + "Spanish,Filipino": 0.3466666666666667, + "Chinese,Filipino": 0.34 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.17333333333333334, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Spanish": 0.24, + "English,Vietnamese,Chinese": 0.22, + "English,Vietnamese,Filipino": 0.22666666666666666, + "English,Malay,Indonesian": 0.2, + "English,Malay,Spanish": 0.21333333333333335, + "English,Malay,Chinese": 0.15333333333333332, + "English,Malay,Filipino": 0.19333333333333333, + "English,Indonesian,Spanish": 0.28, + "English,Indonesian,Chinese": 0.24, + "English,Indonesian,Filipino": 0.23333333333333334, + "English,Spanish,Chinese": 0.24666666666666667, + "English,Spanish,Filipino": 0.2733333333333333, + "English,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Spanish": 0.14, + "Vietnamese,Malay,Chinese": 0.18, + "Vietnamese,Malay,Filipino": 0.14, + "Vietnamese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Chinese": 0.2, + "Vietnamese,Spanish,Filipino": 0.14666666666666667, + "Vietnamese,Chinese,Filipino": 0.19333333333333333, + "Malay,Indonesian,Spanish": 0.18666666666666668, + "Malay,Indonesian,Chinese": 0.2, + "Malay,Indonesian,Filipino": 0.14666666666666667, + "Malay,Spanish,Chinese": 0.15333333333333332, + "Malay,Spanish,Filipino": 0.13333333333333333, + "Malay,Chinese,Filipino": 0.13333333333333333, + "Indonesian,Spanish,Chinese": 0.21333333333333335, + "Indonesian,Spanish,Filipino": 0.19333333333333333, + "Indonesian,Chinese,Filipino": 0.2, + "Spanish,Chinese,Filipino": 0.18 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.12666666666666668, + "English,Vietnamese,Malay,Spanish": 0.12, + "English,Vietnamese,Malay,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.17333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.14, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Spanish,Filipino": 0.13333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.12, + "English,Malay,Indonesian,Spanish": 0.14666666666666667, + "English,Malay,Indonesian,Chinese": 0.12666666666666668, + "English,Malay,Indonesian,Filipino": 0.11333333333333333, + "English,Malay,Spanish,Chinese": 0.11333333333333333, + "English,Malay,Spanish,Filipino": 0.12, + "English,Malay,Chinese,Filipino": 0.08, + "English,Indonesian,Spanish,Chinese": 0.17333333333333334, + "English,Indonesian,Spanish,Filipino": 0.16666666666666666, + "English,Indonesian,Chinese,Filipino": 0.14, + "English,Spanish,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Filipino": 0.08, + "Vietnamese,Malay,Spanish,Chinese": 0.09333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.08, + "Vietnamese,Indonesian,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.1, + "Vietnamese,Indonesian,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Spanish,Chinese,Filipino": 0.10666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.12, + "Malay,Indonesian,Spanish,Filipino": 0.08666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.1, + "Malay,Spanish,Chinese,Filipino": 0.08, + "Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.1, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.08666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.06, + "English,Vietnamese,Malay,Spanish,Chinese": 0.08, + "English,Vietnamese,Malay,Spanish,Filipino": 0.06, + "English,Vietnamese,Malay,Chinese,Filipino": 0.04666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.1, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.1, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Malay,Indonesian,Spanish,Chinese": 0.1, + "English,Malay,Indonesian,Spanish,Filipino": 0.08, + "English,Malay,Indonesian,Chinese,Filipino": 0.07333333333333333, + "English,Malay,Spanish,Chinese,Filipino": 0.06666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.08, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.08, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.08, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.04 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.04 + } + }, + "AC3_2": 0.3817111176611223, + "AC3_3": 0.25932252009175405, + "AC3_4": 0.18196014132680965, + "AC3_5": 0.13119196322435364, + "AC3_6": 0.09639351122840945, + "AC3_7": 0.072346241440555 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3482142857142857, + "language_acc": { + "Vietnamese": 0.2784090909090909, + "Indonesian": 0.3125, + "Malay": 0.3068181818181818, + "English": 0.4602272727272727, + "Spanish": 0.42045454545454547, + "Filipino": 0.25, + "Chinese": 0.4090909090909091 + }, + "consistency_score_2": 0.3571428571428572, + "consistency_score_3": 0.16363636363636364, + "consistency_score_4": 0.08944805194805196, + "consistency_score_5": 0.056818181818181816, + "consistency_score_6": 0.03977272727272727, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3125, + "Vietnamese,Malay": 0.29545454545454547, + "Vietnamese,English": 0.2784090909090909, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Filipino": 0.32954545454545453, + "Vietnamese,Chinese": 0.3181818181818182, + "Indonesian,Malay": 0.3806818181818182, + "Indonesian,English": 0.375, + "Indonesian,Spanish": 0.35795454545454547, + "Indonesian,Filipino": 0.36363636363636365, + "Indonesian,Chinese": 0.38636363636363635, + "Malay,English": 0.32386363636363635, + "Malay,Spanish": 0.32386363636363635, + "Malay,Filipino": 0.3693181818181818, + "Malay,Chinese": 0.35795454545454547, + "English,Spanish": 0.5397727272727273, + "English,Filipino": 0.3522727272727273, + "English,Chinese": 0.4659090909090909, + "Spanish,Filipino": 0.29545454545454547, + "Spanish,Chinese": 0.44886363636363635, + "Filipino,Chinese": 0.2897727272727273 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.11931818181818182, + "Vietnamese,Indonesian,English": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Chinese": 0.14772727272727273, + "Vietnamese,Malay,English": 0.10227272727272728, + "Vietnamese,Malay,Spanish": 0.13068181818181818, + "Vietnamese,Malay,Filipino": 0.1534090909090909, + "Vietnamese,Malay,Chinese": 0.11931818181818182, + "Vietnamese,English,Spanish": 0.1875, + "Vietnamese,English,Filipino": 0.125, + "Vietnamese,English,Chinese": 0.17045454545454544, + "Vietnamese,Spanish,Filipino": 0.125, + "Vietnamese,Spanish,Chinese": 0.18181818181818182, + "Vietnamese,Filipino,Chinese": 0.14772727272727273, + "Indonesian,Malay,English": 0.14772727272727273, + "Indonesian,Malay,Spanish": 0.14204545454545456, + "Indonesian,Malay,Filipino": 0.18181818181818182, + "Indonesian,Malay,Chinese": 0.1590909090909091, + "Indonesian,English,Spanish": 0.25, + "Indonesian,English,Filipino": 0.17045454545454544, + "Indonesian,English,Chinese": 0.19318181818181818, + "Indonesian,Spanish,Filipino": 0.1590909090909091, + "Indonesian,Spanish,Chinese": 0.19886363636363635, + "Indonesian,Filipino,Chinese": 0.1590909090909091, + "Malay,English,Spanish": 0.21022727272727273, + "Malay,English,Filipino": 0.14772727272727273, + "Malay,English,Chinese": 0.18181818181818182, + "Malay,Spanish,Filipino": 0.13636363636363635, + "Malay,Spanish,Chinese": 0.1875, + "Malay,Filipino,Chinese": 0.1534090909090909, + "English,Spanish,Filipino": 0.19318181818181818, + "English,Spanish,Chinese": 0.3125, + "English,Filipino,Chinese": 0.18181818181818182, + "Spanish,Filipino,Chinese": 0.16477272727272727 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,Filipino": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,English,Spanish": 0.08522727272727272, + "Vietnamese,Indonesian,English,Filipino": 0.056818181818181816, + "Vietnamese,Indonesian,English,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Spanish,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish,Chinese": 0.09090909090909091, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Spanish": 0.09659090909090909, + "Vietnamese,Malay,English,Filipino": 0.07386363636363637, + "Vietnamese,Malay,English,Chinese": 0.07954545454545454, + "Vietnamese,Malay,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,Malay,Spanish,Chinese": 0.09090909090909091, + "Vietnamese,Malay,Filipino,Chinese": 0.09090909090909091, + "Vietnamese,English,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,English,Spanish,Chinese": 0.13068181818181818, + "Vietnamese,English,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Spanish,Filipino,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Spanish": 0.10795454545454546, + "Indonesian,Malay,English,Filipino": 0.07954545454545454, + "Indonesian,Malay,English,Chinese": 0.07954545454545454, + "Indonesian,Malay,Spanish,Filipino": 0.07386363636363637, + "Indonesian,Malay,Spanish,Chinese": 0.08522727272727272, + "Indonesian,Malay,Filipino,Chinese": 0.07954545454545454, + "Indonesian,English,Spanish,Filipino": 0.10795454545454546, + "Indonesian,English,Spanish,Chinese": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese": 0.09659090909090909, + "Indonesian,Spanish,Filipino,Chinese": 0.09090909090909091, + "Malay,English,Spanish,Filipino": 0.09659090909090909, + "Malay,English,Spanish,Chinese": 0.13636363636363635, + "Malay,English,Filipino,Chinese": 0.09659090909090909, + "Malay,Spanish,Filipino,Chinese": 0.10227272727272728, + "English,Spanish,Filipino,Chinese": 0.125 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.0625, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Malay,English,Spanish,Filipino": 0.06818181818181818, + "Vietnamese,Malay,English,Spanish,Chinese": 0.07386363636363637, + "Vietnamese,Malay,English,Filipino,Chinese": 0.06818181818181818, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Malay,English,Spanish,Filipino": 0.056818181818181816, + "Indonesian,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Indonesian,Malay,English,Filipino,Chinese": 0.05113636363636364, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.05113636363636364, + "Indonesian,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish,Filipino,Chinese": 0.07954545454545454 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.0625, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.045454545454545456 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908 + } + }, + "AC3_2": 0.35262206143282904, + "AC3_3": 0.22264509986135445, + "AC3_4": 0.14233388084920817, + "AC3_5": 0.09769539075744274, + "AC3_6": 0.07139121337072092, + "AC3_7": 0.05253232757225866 + }, + "prompt_2": { + "overall_acc": 0.35064935064935066, + "language_acc": { + "Vietnamese": 0.26704545454545453, + "Indonesian": 0.30113636363636365, + "Malay": 0.35795454545454547, + "English": 0.5056818181818182, + "Spanish": 0.4090909090909091, + "Filipino": 0.26704545454545453, + "Chinese": 0.3465909090909091 + }, + "consistency_score_2": 0.36038961038961037, + "consistency_score_3": 0.17159090909090904, + "consistency_score_4": 0.09935064935064934, + "consistency_score_5": 0.06358225108225109, + "consistency_score_6": 0.04220779220779221, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3693181818181818, + "Vietnamese,Malay": 0.3068181818181818, + "Vietnamese,English": 0.36363636363636365, + "Vietnamese,Spanish": 0.3409090909090909, + "Vietnamese,Filipino": 0.2784090909090909, + "Vietnamese,Chinese": 0.3465909090909091, + "Indonesian,Malay": 0.3125, + "Indonesian,English": 0.3806818181818182, + "Indonesian,Spanish": 0.42045454545454547, + "Indonesian,Filipino": 0.3125, + "Indonesian,Chinese": 0.3806818181818182, + "Malay,English": 0.3465909090909091, + "Malay,Spanish": 0.3352272727272727, + "Malay,Filipino": 0.3465909090909091, + "Malay,Chinese": 0.38636363636363635, + "English,Spanish": 0.5, + "English,Filipino": 0.3125, + "English,Chinese": 0.4147727272727273, + "Spanish,Filipino": 0.3352272727272727, + "Spanish,Chinese": 0.4375, + "Filipino,Chinese": 0.3409090909090909 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.14204545454545456, + "Vietnamese,Indonesian,English": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Chinese": 0.19318181818181818, + "Vietnamese,Malay,English": 0.14204545454545456, + "Vietnamese,Malay,Spanish": 0.14772727272727273, + "Vietnamese,Malay,Filipino": 0.10227272727272728, + "Vietnamese,Malay,Chinese": 0.17613636363636365, + "Vietnamese,English,Spanish": 0.22727272727272727, + "Vietnamese,English,Filipino": 0.10795454545454546, + "Vietnamese,English,Chinese": 0.18181818181818182, + "Vietnamese,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,Spanish,Chinese": 0.1875, + "Vietnamese,Filipino,Chinese": 0.13636363636363635, + "Indonesian,Malay,English": 0.17613636363636365, + "Indonesian,Malay,Spanish": 0.18181818181818182, + "Indonesian,Malay,Filipino": 0.10227272727272728, + "Indonesian,Malay,Chinese": 0.16477272727272727, + "Indonesian,English,Spanish": 0.2556818181818182, + "Indonesian,English,Filipino": 0.14204545454545456, + "Indonesian,English,Chinese": 0.2159090909090909, + "Indonesian,Spanish,Filipino": 0.16477272727272727, + "Indonesian,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Filipino,Chinese": 0.14204545454545456, + "Malay,English,Spanish": 0.20454545454545456, + "Malay,English,Filipino": 0.11931818181818182, + "Malay,English,Chinese": 0.18181818181818182, + "Malay,Spanish,Filipino": 0.125, + "Malay,Spanish,Chinese": 0.20454545454545456, + "Malay,Filipino,Chinese": 0.1590909090909091, + "English,Spanish,Filipino": 0.19318181818181818, + "English,Spanish,Chinese": 0.2784090909090909, + "English,Filipino,Chinese": 0.16477272727272727, + "Spanish,Filipino,Chinese": 0.21022727272727273 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,English,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,English,Filipino": 0.07386363636363637, + "Vietnamese,Indonesian,English,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Filipino": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Spanish": 0.11931818181818182, + "Vietnamese,Malay,English,Filipino": 0.045454545454545456, + "Vietnamese,Malay,English,Chinese": 0.10795454545454546, + "Vietnamese,Malay,Spanish,Filipino": 0.0625, + "Vietnamese,Malay,Spanish,Chinese": 0.11931818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,English,Spanish,Chinese": 0.13636363636363635, + "Vietnamese,English,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Spanish,Filipino,Chinese": 0.09090909090909091, + "Indonesian,Malay,English,Spanish": 0.13636363636363635, + "Indonesian,Malay,English,Filipino": 0.0625, + "Indonesian,Malay,English,Chinese": 0.11931818181818182, + "Indonesian,Malay,Spanish,Filipino": 0.07386363636363637, + "Indonesian,Malay,Spanish,Chinese": 0.125, + "Indonesian,Malay,Filipino,Chinese": 0.056818181818181816, + "Indonesian,English,Spanish,Filipino": 0.10795454545454546, + "Indonesian,English,Spanish,Chinese": 0.17613636363636365, + "Indonesian,English,Filipino,Chinese": 0.08522727272727272, + "Indonesian,Spanish,Filipino,Chinese": 0.11931818181818182, + "Malay,English,Spanish,Filipino": 0.07386363636363637, + "Malay,English,Spanish,Chinese": 0.13636363636363635, + "Malay,English,Filipino,Chinese": 0.07386363636363637, + "Malay,Spanish,Filipino,Chinese": 0.09659090909090909, + "English,Spanish,Filipino,Chinese": 0.13068181818181818 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.0625, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.06818181818181818, + "Vietnamese,Malay,English,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Malay,English,Spanish,Chinese": 0.09090909090909091, + "Vietnamese,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.0625, + "Indonesian,Malay,English,Spanish,Filipino": 0.05113636363636364, + "Indonesian,Malay,English,Spanish,Chinese": 0.10227272727272728, + "Indonesian,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "Indonesian,English,Spanish,Filipino,Chinese": 0.07954545454545454, + "Malay,English,Spanish,Filipino,Chinese": 0.056818181818181816 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.03977272727272727 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908 + } + }, + "AC3_2": 0.3554527663616799, + "AC3_3": 0.2304236018435767, + "AC3_4": 0.1548321807717996, + "AC3_5": 0.10764545706427764, + "AC3_6": 0.07534614144266492, + "AC3_7": 0.05255985982650786 + }, + "prompt_3": { + "overall_acc": 0.36850649350649345, + "language_acc": { + "Vietnamese": 0.2784090909090909, + "Indonesian": 0.35795454545454547, + "Malay": 0.375, + "English": 0.5056818181818182, + "Spanish": 0.44886363636363635, + "Filipino": 0.24431818181818182, + "Chinese": 0.3693181818181818 + }, + "consistency_score_2": 0.38392857142857134, + "consistency_score_3": 0.18652597402597407, + "consistency_score_4": 0.10438311688311691, + "consistency_score_5": 0.061688311688311674, + "consistency_score_6": 0.03571428571428571, + "consistency_score_7": 0.017045454545454544, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4090909090909091, + "Vietnamese,Malay": 0.3352272727272727, + "Vietnamese,English": 0.3522727272727273, + "Vietnamese,Spanish": 0.4431818181818182, + "Vietnamese,Filipino": 0.30113636363636365, + "Vietnamese,Chinese": 0.3693181818181818, + "Indonesian,Malay": 0.4034090909090909, + "Indonesian,English": 0.3352272727272727, + "Indonesian,Spanish": 0.42613636363636365, + "Indonesian,Filipino": 0.3409090909090909, + "Indonesian,Chinese": 0.3806818181818182, + "Malay,English": 0.3806818181818182, + "Malay,Spanish": 0.39204545454545453, + "Malay,Filipino": 0.3522727272727273, + "Malay,Chinese": 0.3806818181818182, + "English,Spanish": 0.5568181818181818, + "English,Filipino": 0.3352272727272727, + "English,Chinese": 0.4602272727272727, + "Spanish,Filipino": 0.3068181818181818, + "Spanish,Chinese": 0.4318181818181818, + "Filipino,Chinese": 0.3693181818181818 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,English": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Indonesian,Chinese": 0.18181818181818182, + "Vietnamese,Malay,English": 0.1534090909090909, + "Vietnamese,Malay,Spanish": 0.19886363636363635, + "Vietnamese,Malay,Filipino": 0.13068181818181818, + "Vietnamese,Malay,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish": 0.2727272727272727, + "Vietnamese,English,Filipino": 0.13636363636363635, + "Vietnamese,English,Chinese": 0.21022727272727273, + "Vietnamese,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Spanish,Chinese": 0.23863636363636365, + "Vietnamese,Filipino,Chinese": 0.14204545454545456, + "Indonesian,Malay,English": 0.17045454545454544, + "Indonesian,Malay,Spanish": 0.19318181818181818, + "Indonesian,Malay,Filipino": 0.17045454545454544, + "Indonesian,Malay,Chinese": 0.1875, + "Indonesian,English,Spanish": 0.25, + "Indonesian,English,Filipino": 0.13068181818181818, + "Indonesian,English,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Filipino": 0.1590909090909091, + "Indonesian,Spanish,Chinese": 0.2159090909090909, + "Indonesian,Filipino,Chinese": 0.17045454545454544, + "Malay,English,Spanish": 0.25, + "Malay,English,Filipino": 0.17045454545454544, + "Malay,English,Chinese": 0.2215909090909091, + "Malay,Spanish,Filipino": 0.17045454545454544, + "Malay,Spanish,Chinese": 0.19886363636363635, + "Malay,Filipino,Chinese": 0.1875, + "English,Spanish,Filipino": 0.19318181818181818, + "English,Spanish,Chinese": 0.29545454545454547, + "English,Filipino,Chinese": 0.17613636363636365, + "Spanish,Filipino,Chinese": 0.1590909090909091 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Spanish": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,English,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,English,Filipino": 0.0625, + "Vietnamese,Indonesian,English,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,Indonesian,Spanish,Chinese": 0.125, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Spanish": 0.13636363636363635, + "Vietnamese,Malay,English,Filipino": 0.07386363636363637, + "Vietnamese,Malay,English,Chinese": 0.09659090909090909, + "Vietnamese,Malay,Spanish,Filipino": 0.09090909090909091, + "Vietnamese,Malay,Spanish,Chinese": 0.11363636363636363, + "Vietnamese,Malay,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,English,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,English,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Spanish,Filipino,Chinese": 0.10227272727272728, + "Indonesian,Malay,English,Spanish": 0.14204545454545456, + "Indonesian,Malay,English,Filipino": 0.07954545454545454, + "Indonesian,Malay,English,Chinese": 0.11363636363636363, + "Indonesian,Malay,Spanish,Filipino": 0.09659090909090909, + "Indonesian,Malay,Spanish,Chinese": 0.11931818181818182, + "Indonesian,Malay,Filipino,Chinese": 0.10227272727272728, + "Indonesian,English,Spanish,Filipino": 0.09659090909090909, + "Indonesian,English,Spanish,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Chinese": 0.08522727272727272, + "Indonesian,Spanish,Filipino,Chinese": 0.10795454545454546, + "Malay,English,Spanish,Filipino": 0.10795454545454546, + "Malay,English,Spanish,Chinese": 0.14772727272727273, + "Malay,English,Filipino,Chinese": 0.11363636363636363, + "Malay,Spanish,Filipino,Chinese": 0.09659090909090909, + "English,Spanish,Filipino,Chinese": 0.11363636363636363 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.0625, + "Vietnamese,Malay,English,Spanish,Filipino": 0.0625, + "Vietnamese,Malay,English,Spanish,Chinese": 0.08522727272727272, + "Vietnamese,Malay,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.06818181818181818, + "Indonesian,Malay,English,Spanish,Chinese": 0.09090909090909091, + "Indonesian,Malay,English,Filipino,Chinese": 0.056818181818181816, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.06818181818181818, + "Indonesian,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish,Filipino,Chinese": 0.06818181818181818 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.045454545454545456 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.017045454545454544 + } + }, + "AC3_2": 0.37605948523280713, + "AC3_3": 0.2476829253949545, + "AC3_4": 0.16268429473022217, + "AC3_5": 0.10568488113201249, + "AC3_6": 0.06511761329427497, + "AC3_7": 0.03258373204896508 + }, + "prompt_4": { + "overall_acc": 0.3701298701298702, + "language_acc": { + "Vietnamese": 0.29545454545454547, + "Indonesian": 0.3465909090909091, + "Malay": 0.39204545454545453, + "English": 0.4715909090909091, + "Spanish": 0.4147727272727273, + "Filipino": 0.3125, + "Chinese": 0.35795454545454547 + }, + "consistency_score_2": 0.3349567099567099, + "consistency_score_3": 0.13425324675324676, + "consistency_score_4": 0.06055194805194804, + "consistency_score_5": 0.02922077922077922, + "consistency_score_6": 0.013798701298701298, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3693181818181818, + "Vietnamese,Malay": 0.39204545454545453, + "Vietnamese,English": 0.3125, + "Vietnamese,Spanish": 0.3409090909090909, + "Vietnamese,Filipino": 0.26704545454545453, + "Vietnamese,Chinese": 0.29545454545454547, + "Indonesian,Malay": 0.35795454545454547, + "Indonesian,English": 0.35795454545454547, + "Indonesian,Spanish": 0.38636363636363635, + "Indonesian,Filipino": 0.3352272727272727, + "Indonesian,Chinese": 0.29545454545454547, + "Malay,English": 0.3465909090909091, + "Malay,Spanish": 0.3522727272727273, + "Malay,Filipino": 0.30113636363636365, + "Malay,Chinese": 0.30113636363636365, + "English,Spanish": 0.39204545454545453, + "English,Filipino": 0.2897727272727273, + "English,Chinese": 0.36363636363636365, + "Spanish,Filipino": 0.29545454545454547, + "Spanish,Chinese": 0.36363636363636365, + "Filipino,Chinese": 0.3181818181818182 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.1875, + "Vietnamese,Indonesian,English": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Indonesian,Chinese": 0.10795454545454546, + "Vietnamese,Malay,English": 0.1590909090909091, + "Vietnamese,Malay,Spanish": 0.13636363636363635, + "Vietnamese,Malay,Filipino": 0.125, + "Vietnamese,Malay,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish": 0.14772727272727273, + "Vietnamese,English,Filipino": 0.09090909090909091, + "Vietnamese,English,Chinese": 0.09659090909090909, + "Vietnamese,Spanish,Filipino": 0.09090909090909091, + "Vietnamese,Spanish,Chinese": 0.125, + "Vietnamese,Filipino,Chinese": 0.09090909090909091, + "Indonesian,Malay,English": 0.1590909090909091, + "Indonesian,Malay,Spanish": 0.16477272727272727, + "Indonesian,Malay,Filipino": 0.14204545454545456, + "Indonesian,Malay,Chinese": 0.13068181818181818, + "Indonesian,English,Spanish": 0.17613636363636365, + "Indonesian,English,Filipino": 0.13636363636363635, + "Indonesian,English,Chinese": 0.13636363636363635, + "Indonesian,Spanish,Filipino": 0.14772727272727273, + "Indonesian,Spanish,Chinese": 0.14204545454545456, + "Indonesian,Filipino,Chinese": 0.10227272727272728, + "Malay,English,Spanish": 0.17613636363636365, + "Malay,English,Filipino": 0.11363636363636363, + "Malay,English,Chinese": 0.11363636363636363, + "Malay,Spanish,Filipino": 0.13068181818181818, + "Malay,Spanish,Chinese": 0.14204545454545456, + "Malay,Filipino,Chinese": 0.10795454545454546, + "English,Spanish,Filipino": 0.11363636363636363, + "English,Spanish,Chinese": 0.16477272727272727, + "English,Filipino,Chinese": 0.125, + "Spanish,Filipino,Chinese": 0.11363636363636363 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,Filipino": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,English,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,English,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,English,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Spanish,Filipino": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Malay,English,Spanish": 0.07386363636363637, + "Vietnamese,Malay,English,Filipino": 0.05113636363636364, + "Vietnamese,Malay,English,Chinese": 0.05113636363636364, + "Vietnamese,Malay,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Malay,Spanish,Chinese": 0.06818181818181818, + "Vietnamese,Malay,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,English,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,English,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Spanish,Filipino,Chinese": 0.03409090909090909, + "Indonesian,Malay,English,Spanish": 0.10227272727272728, + "Indonesian,Malay,English,Filipino": 0.06818181818181818, + "Indonesian,Malay,English,Chinese": 0.0625, + "Indonesian,Malay,Spanish,Filipino": 0.07386363636363637, + "Indonesian,Malay,Spanish,Chinese": 0.06818181818181818, + "Indonesian,Malay,Filipino,Chinese": 0.03977272727272727, + "Indonesian,English,Spanish,Filipino": 0.08522727272727272, + "Indonesian,English,Spanish,Chinese": 0.07386363636363637, + "Indonesian,English,Filipino,Chinese": 0.05113636363636364, + "Indonesian,Spanish,Filipino,Chinese": 0.0625, + "Malay,English,Spanish,Filipino": 0.056818181818181816, + "Malay,English,Spanish,Chinese": 0.0625, + "Malay,English,Filipino,Chinese": 0.03409090909090909, + "Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "English,Spanish,Filipino,Chinese": 0.045454545454545456 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Malay,English,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,Malay,English,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Indonesian,Malay,English,Spanish,Filipino": 0.05113636363636364, + "Indonesian,Malay,English,Spanish,Chinese": 0.045454545454545456, + "Indonesian,Malay,English,Filipino,Chinese": 0.017045454545454544, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.022727272727272728, + "Indonesian,English,Spanish,Filipino,Chinese": 0.03977272727272727, + "Malay,English,Spanish,Filipino,Chinese": 0.017045454545454544 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.017045454545454544 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + } + }, + "AC3_2": 0.35166598553782913, + "AC3_3": 0.19703727231201487, + "AC3_4": 0.10407722693282001, + "AC3_5": 0.05416534683471032, + "AC3_6": 0.02660552976984732, + "AC3_7": 0.011191831923195136 + }, + "prompt_5": { + "overall_acc": 0.3709415584415584, + "language_acc": { + "Vietnamese": 0.3125, + "Indonesian": 0.3409090909090909, + "Malay": 0.30113636363636365, + "English": 0.4602272727272727, + "Spanish": 0.4431818181818182, + "Filipino": 0.32954545454545453, + "Chinese": 0.4090909090909091 + }, + "consistency_score_2": 0.37608225108225113, + "consistency_score_3": 0.18344155844155846, + "consistency_score_4": 0.10698051948051948, + "consistency_score_5": 0.07142857142857144, + "consistency_score_6": 0.05357142857142858, + "consistency_score_7": 0.045454545454545456, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3806818181818182, + "Vietnamese,Malay": 0.3125, + "Vietnamese,English": 0.38636363636363635, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Filipino": 0.3522727272727273, + "Vietnamese,Chinese": 0.3352272727272727, + "Indonesian,Malay": 0.3522727272727273, + "Indonesian,English": 0.3352272727272727, + "Indonesian,Spanish": 0.4090909090909091, + "Indonesian,Filipino": 0.3352272727272727, + "Indonesian,Chinese": 0.42613636363636365, + "Malay,English": 0.3693181818181818, + "Malay,Spanish": 0.38636363636363635, + "Malay,Filipino": 0.36363636363636365, + "Malay,Chinese": 0.36363636363636365, + "English,Spanish": 0.4659090909090909, + "English,Filipino": 0.38636363636363635, + "English,Chinese": 0.4147727272727273, + "Spanish,Filipino": 0.38636363636363635, + "Spanish,Chinese": 0.42613636363636365, + "Filipino,Chinese": 0.375 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.14772727272727273, + "Vietnamese,Indonesian,English": 0.1875, + "Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English": 0.18181818181818182, + "Vietnamese,Malay,Spanish": 0.14772727272727273, + "Vietnamese,Malay,Filipino": 0.1590909090909091, + "Vietnamese,Malay,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish": 0.20454545454545456, + "Vietnamese,English,Filipino": 0.19886363636363635, + "Vietnamese,English,Chinese": 0.18181818181818182, + "Vietnamese,Spanish,Filipino": 0.16477272727272727, + "Vietnamese,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Filipino,Chinese": 0.17045454545454544, + "Indonesian,Malay,English": 0.1875, + "Indonesian,Malay,Spanish": 0.1875, + "Indonesian,Malay,Filipino": 0.14772727272727273, + "Indonesian,Malay,Chinese": 0.17613636363636365, + "Indonesian,English,Spanish": 0.19886363636363635, + "Indonesian,English,Filipino": 0.1590909090909091, + "Indonesian,English,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Filipino": 0.19318181818181818, + "Indonesian,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Filipino,Chinese": 0.18181818181818182, + "Malay,English,Spanish": 0.23863636363636365, + "Malay,English,Filipino": 0.1875, + "Malay,English,Chinese": 0.20454545454545456, + "Malay,Spanish,Filipino": 0.16477272727272727, + "Malay,Spanish,Chinese": 0.2159090909090909, + "Malay,Filipino,Chinese": 0.13636363636363635, + "English,Spanish,Filipino": 0.19886363636363635, + "English,Spanish,Chinese": 0.23295454545454544, + "English,Filipino,Chinese": 0.1875, + "Spanish,Filipino,Chinese": 0.21022727272727273 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,English,Spanish": 0.11363636363636363, + "Vietnamese,Indonesian,English,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,English,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.10227272727272728, + "Vietnamese,Malay,English,Spanish": 0.11363636363636363, + "Vietnamese,Malay,English,Filipino": 0.11363636363636363, + "Vietnamese,Malay,English,Chinese": 0.09659090909090909, + "Vietnamese,Malay,Spanish,Filipino": 0.07386363636363637, + "Vietnamese,Malay,Spanish,Chinese": 0.09090909090909091, + "Vietnamese,Malay,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,English,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,English,Spanish,Chinese": 0.11363636363636363, + "Vietnamese,English,Filipino,Chinese": 0.10795454545454546, + "Vietnamese,Spanish,Filipino,Chinese": 0.10227272727272728, + "Indonesian,Malay,English,Spanish": 0.13068181818181818, + "Indonesian,Malay,English,Filipino": 0.09659090909090909, + "Indonesian,Malay,English,Chinese": 0.11363636363636363, + "Indonesian,Malay,Spanish,Filipino": 0.09659090909090909, + "Indonesian,Malay,Spanish,Chinese": 0.13068181818181818, + "Indonesian,Malay,Filipino,Chinese": 0.07386363636363637, + "Indonesian,English,Spanish,Filipino": 0.10227272727272728, + "Indonesian,English,Spanish,Chinese": 0.13068181818181818, + "Indonesian,English,Filipino,Chinese": 0.10227272727272728, + "Indonesian,Spanish,Filipino,Chinese": 0.11931818181818182, + "Malay,English,Spanish,Filipino": 0.11931818181818182, + "Malay,English,Spanish,Chinese": 0.1534090909090909, + "Malay,English,Filipino,Chinese": 0.09659090909090909, + "Malay,Spanish,Filipino,Chinese": 0.10795454545454546, + "English,Spanish,Filipino,Chinese": 0.11363636363636363 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.056818181818181816, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.07386363636363637, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Malay,English,Spanish,Filipino": 0.06818181818181818, + "Vietnamese,Malay,English,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Filipino,Chinese": 0.0625, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Malay,English,Spanish,Filipino": 0.07386363636363637, + "Indonesian,Malay,English,Spanish,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Filipino,Chinese": 0.056818181818181816, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.06818181818181818, + "Indonesian,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish,Filipino,Chinese": 0.07954545454545454 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.0625, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.05113636363636364, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.056818181818181816 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.045454545454545456 + } + }, + "AC3_2": 0.3734942167075295, + "AC3_3": 0.24548401813373472, + "AC3_4": 0.16606690690342282, + "AC3_5": 0.11979030141459839, + "AC3_6": 0.09362196119072944, + "AC3_7": 0.08098529149215819 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46601941747572817 + }, + "prompt_2": { + "accuracy": 0.4563106796116505 + }, + "prompt_3": { + "accuracy": 0.4563106796116505 + }, + "prompt_4": { + "accuracy": 0.46601941747572817 + }, + "prompt_5": { + "accuracy": 0.47572815533980584 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29523809523809524 + }, + "prompt_2": { + "accuracy": 0.34285714285714286 + }, + "prompt_3": { + "accuracy": 0.37142857142857144 + }, + "prompt_4": { + "accuracy": 0.3238095238095238 + }, + "prompt_5": { + "accuracy": 0.2571428571428571 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5514018691588785 + }, + "prompt_2": { + "accuracy": 0.5514018691588785 + }, + "prompt_3": { + "accuracy": 0.5327102803738317 + }, + "prompt_4": { + "accuracy": 0.5514018691588785 + }, + "prompt_5": { + "accuracy": 0.6074766355140186 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.1, + "politics": 0.6, + "culture": 0.6, + "film": 0.2, + "law": 0.2, + "geography": 0.8 + } + }, + "prompt_2": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.7, + "demographics": 0.2, + "biology": 0.3, + "history": 0.2, + "literature": 0.3, + "politics": 0.7, + "culture": 0.5, + "film": 0.5, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.1, + "demographics": 0.4, + "biology": 0.3, + "history": 0.2, + "literature": 0.2, + "politics": 0.4, + "culture": 0.6, + "film": 0.5, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.45, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.7, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.7, + "culture": 0.6, + "film": 0.6, + "law": 0.2, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.5, + "demographics": 0.6, + "biology": 0.5, + "history": 0.2, + "literature": 0.5, + "politics": 0.5, + "culture": 0.5, + "film": 0.4, + "law": 0.4, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17934743041539958 + }, + "prompt_2": { + "bleu_score": 0.1964644813240733 + }, + "prompt_3": { + "bleu_score": 0.19879004044129303 + }, + "prompt_4": { + "bleu_score": 0.1814133410337293 + }, + "prompt_5": { + "bleu_score": 0.1560597423355046 + } }, "indommlu": { "prompt_1": -1, @@ -10876,179 +94496,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.21723786662416072 + }, + "prompt_2": { + "bleu_score": 0.2281311103259181 + }, + "prompt_3": { + "bleu_score": 0.23100503176374243 + }, + "prompt_4": { + "bleu_score": 0.23219748303898446 + }, + "prompt_5": { + "bleu_score": 0.19680938705105205 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.12991382521248743 + }, + "prompt_2": { + "bleu_score": 0.13678470510696256 + }, + "prompt_3": { + "bleu_score": 0.13737499670562459 + }, + "prompt_4": { + "bleu_score": 0.13701669350019102 + }, + "prompt_5": { + "bleu_score": 0.11770314745969822 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.15090607263426564 + }, + "prompt_2": { + "bleu_score": 0.15495827962254066 + }, + "prompt_3": { + "bleu_score": 0.15642916217616992 + }, + "prompt_4": { + "bleu_score": 0.1527398480246314 + }, + "prompt_5": { + "bleu_score": 0.14085792693899302 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.18963573586671778 + }, + "prompt_2": { + "bleu_score": 0.21158196156519182 + }, + "prompt_3": { + "bleu_score": 0.20248020992143118 + }, + "prompt_4": { + "bleu_score": 0.20245091691403666 + }, + "prompt_5": { + "bleu_score": 0.17551660905981642 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49241540256709454 + }, + "prompt_2": { + "accuracy": 0.46557759626604434 + }, + "prompt_3": { + "accuracy": 0.5052508751458576 + }, + "prompt_4": { + "accuracy": 0.49941656942823803 + }, + "prompt_5": { + "accuracy": 0.49941656942823803 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.482445477297104, + "category_acc": { + "high_school_european_history": 0.5853658536585366, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.5075757575757576, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.6305418719211823, + "high_school_physics": 0.32, + "high_school_world_history": 0.6186440677966102, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.48523206751054854, + "econometrics": 0.34513274336283184, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.5922330097087378, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.3594306049822064, + "philosophy": 0.4870967741935484, + "professional_medicine": 0.47601476014760147, + "nutrition": 0.4885245901639344, + "global_facts": 0.32323232323232326, + "machine_learning": 0.42342342342342343, + "security_studies": 0.4713114754098361, + "public_relations": 0.5229357798165137, + "professional_psychology": 0.48117839607201307, + "prehistory": 0.5572755417956656, + "anatomy": 0.417910447761194, + "human_sexuality": 0.5307692307692308, + "college_medicine": 0.46511627906976744, + "high_school_government_and_politics": 0.703125, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.649746192893401, + "elementary_mathematics": 0.3952254641909814, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.7242647058823529, + "formal_logic": 0.352, + "high_school_statistics": 0.2930232558139535, + "international_law": 0.6416666666666667, + "high_school_mathematics": 0.26394052044609667, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.4230769230769231, + "miscellaneous": 0.7199488491048593, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.721030042918455, + "professional_law": 0.3515981735159817, + "management": 0.6862745098039216, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.6882352941176471, + "sociology": 0.7, + "us_foreign_policy": 0.6666666666666666, + "high_school_macroeconomics": 0.390745501285347, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.2785234899328859, + "moral_disputes": 0.4782608695652174, + "electrical_engineering": 0.3680555555555556, + "astronomy": 0.543046357615894, + "college_biology": 0.5594405594405595 + } + }, + "prompt_2": { + "accuracy": 0.4678584197354308, + "category_acc": { + "high_school_european_history": 0.6036585365853658, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.49242424242424243, + "medical_genetics": 0.5050505050505051, + "high_school_us_history": 0.6305418719211823, + "high_school_physics": 0.36, + "high_school_world_history": 0.6101694915254238, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.45147679324894513, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.5566343042071198, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.35587188612099646, + "philosophy": 0.5161290322580645, + "professional_medicine": 0.5129151291512916, + "nutrition": 0.4852459016393443, + "global_facts": 0.3333333333333333, + "machine_learning": 0.32432432432432434, + "security_studies": 0.47540983606557374, + "public_relations": 0.46788990825688076, + "professional_psychology": 0.486088379705401, + "prehistory": 0.5356037151702786, + "anatomy": 0.44776119402985076, + "human_sexuality": 0.5076923076923077, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.6197916666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6049382716049383, + "high_school_geography": 0.5685279187817259, + "elementary_mathematics": 0.32891246684350134, + "human_aging": 0.5495495495495496, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.6764705882352942, + "formal_logic": 0.344, + "high_school_statistics": 0.3209302325581395, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.4230769230769231, + "miscellaneous": 0.6982097186700768, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.721030042918455, + "professional_law": 0.3385518590998043, + "management": 0.6470588235294118, + "college_physics": 0.297029702970297, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.6705882352941176, + "sociology": 0.685, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.4498714652956298, + "computer_security": 0.5858585858585859, + "moral_scenarios": 0.2539149888143177, + "moral_disputes": 0.43768115942028984, + "electrical_engineering": 0.4236111111111111, + "astronomy": 0.48344370860927155, + "college_biology": 0.5244755244755245 + } + }, + "prompt_3": { + "accuracy": 0.4927422238112263, + "category_acc": { + "high_school_european_history": 0.6280487804878049, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.5303030303030303, + "medical_genetics": 0.6060606060606061, + "high_school_us_history": 0.6354679802955665, + "high_school_physics": 0.32, + "high_school_world_history": 0.673728813559322, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.5316455696202531, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.5728155339805825, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.3701067615658363, + "philosophy": 0.5612903225806452, + "professional_medicine": 0.5018450184501845, + "nutrition": 0.5442622950819672, + "global_facts": 0.32323232323232326, + "machine_learning": 0.42342342342342343, + "security_studies": 0.5450819672131147, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.486088379705401, + "prehistory": 0.5294117647058824, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.45930232558139533, + "high_school_government_and_politics": 0.703125, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.6604938271604939, + "high_school_geography": 0.6548223350253807, + "elementary_mathematics": 0.3448275862068966, + "human_aging": 0.5495495495495496, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.71875, + "formal_logic": 0.392, + "high_school_statistics": 0.3395348837209302, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.5050505050505051, + "conceptual_physics": 0.36752136752136755, + "miscellaneous": 0.7276214833759591, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.7939914163090128, + "professional_law": 0.3522504892367906, + "management": 0.6862745098039216, + "college_physics": 0.2376237623762376, + "jurisprudence": 0.5887850467289719, + "world_religions": 0.7058823529411765, + "sociology": 0.725, + "us_foreign_policy": 0.6868686868686869, + "high_school_macroeconomics": 0.455012853470437, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.23042505592841164, + "moral_disputes": 0.527536231884058, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.543046357615894, + "college_biology": 0.5594405594405595 + } + }, + "prompt_4": { + "accuracy": 0.48773686092241686, + "category_acc": { + "high_school_european_history": 0.5975609756097561, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.5378787878787878, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.6551724137931034, + "high_school_physics": 0.3, + "high_school_world_history": 0.6483050847457628, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.4978902953586498, + "econometrics": 0.336283185840708, + "college_computer_science": 0.43434343434343436, + "high_school_biology": 0.5889967637540453, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.5129032258064516, + "professional_medicine": 0.5202952029520295, + "nutrition": 0.5311475409836065, + "global_facts": 0.32323232323232326, + "machine_learning": 0.44144144144144143, + "security_studies": 0.45901639344262296, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.49263502454991814, + "prehistory": 0.5603715170278638, + "anatomy": 0.4552238805970149, + "human_sexuality": 0.6, + "college_medicine": 0.45930232558139533, + "high_school_government_and_politics": 0.671875, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.6728395061728395, + "high_school_geography": 0.6395939086294417, + "elementary_mathematics": 0.35013262599469497, + "human_aging": 0.5945945945945946, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.7022058823529411, + "formal_logic": 0.352, + "high_school_statistics": 0.3395348837209302, + "international_law": 0.6333333333333333, + "high_school_mathematics": 0.21933085501858737, + "high_school_computer_science": 0.5252525252525253, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.7097186700767263, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.7424892703862661, + "professional_law": 0.36007827788649704, + "management": 0.6862745098039216, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.616822429906542, + "world_religions": 0.6588235294117647, + "sociology": 0.73, + "us_foreign_policy": 0.7171717171717171, + "high_school_macroeconomics": 0.442159383033419, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.2348993288590604, + "moral_disputes": 0.4927536231884058, + "electrical_engineering": 0.3958333333333333, + "astronomy": 0.5364238410596026, + "college_biology": 0.5664335664335665 + } + }, + "prompt_5": { + "accuracy": 0.4700750804433321, + "category_acc": { + "high_school_european_history": 0.573170731707317, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.5, + "medical_genetics": 0.5151515151515151, + "high_school_us_history": 0.6403940886699507, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.6313559322033898, + "virology": 0.32727272727272727, + "high_school_microeconomics": 0.459915611814346, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.5501618122977346, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.5096774193548387, + "professional_medicine": 0.5202952029520295, + "nutrition": 0.5245901639344263, + "global_facts": 0.24242424242424243, + "machine_learning": 0.3333333333333333, + "security_studies": 0.47950819672131145, + "public_relations": 0.47706422018348627, + "professional_psychology": 0.4713584288052373, + "prehistory": 0.5232198142414861, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.5846153846153846, + "college_medicine": 0.4883720930232558, + "high_school_government_and_politics": 0.640625, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.5925925925925926, + "high_school_geography": 0.5888324873096447, + "elementary_mathematics": 0.33687002652519893, + "human_aging": 0.5180180180180181, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.6819852941176471, + "formal_logic": 0.32, + "high_school_statistics": 0.3674418604651163, + "international_law": 0.6166666666666667, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.48484848484848486, + "conceptual_physics": 0.41025641025641024, + "miscellaneous": 0.7186700767263428, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.7296137339055794, + "professional_law": 0.3411611219830398, + "management": 0.696078431372549, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.45794392523364486, + "world_religions": 0.6647058823529411, + "sociology": 0.69, + "us_foreign_policy": 0.6565656565656566, + "high_school_macroeconomics": 0.4087403598971722, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.26174496644295303, + "moral_disputes": 0.4579710144927536, + "electrical_engineering": 0.4722222222222222, + "astronomy": 0.47019867549668876, + "college_biology": 0.5454545454545454 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.31723625557206536 + }, + "prompt_2": { + "accuracy": 0.31575037147102525 + }, + "prompt_3": { + "accuracy": 0.33060921248142644 + }, + "prompt_4": { + "accuracy": 0.32838038632986627 + }, + "prompt_5": { + "accuracy": 0.31946508172362553 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32627646326276466, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.4523809523809524, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.25, + "high_school_chemistry": 0.25, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.31666666666666665, + "business_administration": 0.2894736842105263, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.42857142857142855, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.4074074074074074, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.23076923076923078, + "sports_science": 0.20833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.2777777777777778, + "fire_engineer": 0.3888888888888889, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2777777777777778, + "physician": 0.2222222222222222 + } + }, + "prompt_2": { + "accuracy": 0.33499377334993774, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.375, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.40476190476190477, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.125, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.26666666666666666, + "business_administration": 0.34210526315789475, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.25, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.56, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.36538461538461536, + "sports_science": 0.3333333333333333, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.375, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2777777777777778, + "physician": 0.3148148148148148 + } + }, + "prompt_3": { + "accuracy": 0.3206724782067248, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.19230769230769232, + "college_programming": 0.4523809523809524, + "college_physics": 0.125, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.38333333333333336, + "business_administration": 0.3157894736842105, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.3877551020408163, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.5, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.18518518518518517, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.4166666666666667, + "high_school_history": 0.52, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.46153846153846156, + "sports_science": 0.2916666666666667, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.25, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.2037037037037037, + "physician": 0.3888888888888889 + } + }, + "prompt_4": { + "accuracy": 0.298879202988792, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.4166666666666667, + "computer_architecture": 0.5, + "college_programming": 0.42857142857142855, + "college_physics": 0.125, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.375, + "high_school_biology": 0.125, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.23333333333333334, + "business_administration": 0.39473684210526316, + "marxism": 0.375, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.5, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.25, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.25925925925925924, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.125, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.3269230769230769, + "sports_science": 0.16666666666666666, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.25, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.25925925925925924, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.12962962962962962, + "physician": 0.3888888888888889 + } + }, + "prompt_5": { + "accuracy": 0.31569115815691157, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.3333333333333333, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.10344827586206896, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.125, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.25, + "college_economics": 0.43333333333333335, + "business_administration": 0.34210526315789475, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.38235294117647056, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.2962962962962963, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.4411764705882353, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.48, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.3076923076923077, + "sports_science": 0.375, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.39215686274509803, + "accountant": 0.3333333333333333, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.16666666666666666, + "physician": 0.24074074074074073 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34050179211469533 + }, + "prompt_2": { + "accuracy": 0.32616487455197135 + }, + "prompt_3": { + "accuracy": 0.2831541218637993 + }, + "prompt_4": { + "accuracy": 0.35125448028673834 + }, + "prompt_5": { + "accuracy": 0.34408602150537637 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3256777758590917, + "category_acc": { + "agronomy": 0.2958579881656805, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2804878048780488, + "arts": 0.35625, + "astronomy": 0.3212121212121212, + "business_ethics": 0.35406698564593303, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.42748091603053434, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.3560371517027864, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.39664804469273746, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.1792452830188679, + "college_education": 0.34579439252336447, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.26851851851851855, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.3076923076923077, + "computer_science": 0.3137254901960784, + "computer_security": 0.4093567251461988, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.30935251798561153, + "economics": 0.36477987421383645, + "education": 0.4171779141104294, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.3282828282828283, + "elementary_information_and_technology": 0.48739495798319327, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.34074074074074073, + "food_science": 0.36363636363636365, + "genetics": 0.26704545454545453, + "global_facts": 0.2751677852348993, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3559322033898305, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.32867132867132864, + "human_sexuality": 0.31746031746031744, + "international_law": 0.34054054054054056, + "journalism": 0.38953488372093026, + "jurisprudence": 0.30413625304136255, + "legal_and_moral_basis": 0.46261682242990654, + "logical": 0.3821138211382114, + "machine_learning": 0.3360655737704918, + "management": 0.38571428571428573, + "marketing": 0.4388888888888889, + "marxist_theory": 0.36507936507936506, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.3310344827586207, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.30857142857142855, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.22074468085106383, + "professional_psychology": 0.35344827586206895, + "public_relations": 0.39655172413793105, + "security_study": 0.4074074074074074, + "sociology": 0.3938053097345133, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.31952662721893493, + "world_history": 0.3167701863354037, + "world_religions": 0.275 + } + }, + "prompt_2": { + "accuracy": 0.3336211362458988, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.2682926829268293, + "arts": 0.275, + "astronomy": 0.28484848484848485, + "business_ethics": 0.36363636363636365, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.3511450381679389, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.34579439252336447, + "chinese_history": 0.3993808049535604, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.4134078212290503, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.2962962962962963, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.326007326007326, + "computer_science": 0.3872549019607843, + "computer_security": 0.5029239766081871, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.2949640287769784, + "economics": 0.4088050314465409, + "education": 0.3558282208588957, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.29292929292929293, + "elementary_information_and_technology": 0.5630252100840336, + "elementary_mathematics": 0.20869565217391303, + "ethnology": 0.34814814814814815, + "food_science": 0.34265734265734266, + "genetics": 0.2897727272727273, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.25, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.35714285714285715, + "international_law": 0.32432432432432434, + "journalism": 0.27325581395348836, + "jurisprudence": 0.3260340632603406, + "legal_and_moral_basis": 0.5233644859813084, + "logical": 0.2926829268292683, + "machine_learning": 0.27049180327868855, + "management": 0.4238095238095238, + "marketing": 0.4777777777777778, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.3103448275862069, + "philosophy": 0.3523809523809524, + "professional_accounting": 0.41714285714285715, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.3577586206896552, + "public_relations": 0.40804597701149425, + "security_study": 0.3333333333333333, + "sociology": 0.34513274336283184, + "sports_science": 0.32727272727272727, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.3609467455621302, + "world_history": 0.35403726708074534, + "world_religions": 0.29375 + } + }, + "prompt_3": { + "accuracy": 0.33612502158521845, + "category_acc": { + "agronomy": 0.33136094674556216, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.23780487804878048, + "arts": 0.325, + "astronomy": 0.26666666666666666, + "business_ethics": 0.41626794258373206, + "chinese_civil_service_exam": 0.26875, + "chinese_driving_rule": 0.4580152671755725, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.38317757009345793, + "chinese_history": 0.3560371517027864, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.37988826815642457, + "clinical_knowledge": 0.28270042194092826, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.23148148148148148, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.32967032967032966, + "computer_science": 0.43137254901960786, + "computer_security": 0.4444444444444444, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.3381294964028777, + "economics": 0.37735849056603776, + "education": 0.38650306748466257, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.3333333333333333, + "elementary_information_and_technology": 0.5336134453781513, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.2962962962962963, + "food_science": 0.3356643356643357, + "genetics": 0.3465909090909091, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.31952662721893493, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3135593220338983, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.36507936507936506, + "international_law": 0.33513513513513515, + "journalism": 0.31976744186046513, + "jurisprudence": 0.35766423357664234, + "legal_and_moral_basis": 0.5514018691588785, + "logical": 0.2845528455284553, + "machine_learning": 0.29508196721311475, + "management": 0.36666666666666664, + "marketing": 0.4388888888888889, + "marxist_theory": 0.37037037037037035, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.31724137931034485, + "philosophy": 0.3047619047619048, + "professional_accounting": 0.30857142857142855, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.34913793103448276, + "public_relations": 0.42528735632183906, + "security_study": 0.362962962962963, + "sociology": 0.3672566371681416, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.32972972972972975, + "virology": 0.3905325443786982, + "world_history": 0.33540372670807456, + "world_religions": 0.29375 + } + }, + "prompt_4": { + "accuracy": 0.32092902780176136, + "category_acc": { + "agronomy": 0.31952662721893493, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.25, + "arts": 0.26875, + "astronomy": 0.2545454545454545, + "business_ethics": 0.3157894736842105, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.48091603053435117, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.29906542056074764, + "chinese_history": 0.3188854489164087, + "chinese_literature": 0.2107843137254902, + "chinese_teacher_qualification": 0.3854748603351955, + "clinical_knowledge": 0.2911392405063291, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.4205607476635514, + "college_engineering_hydrology": 0.2830188679245283, + "college_law": 0.28703703703703703, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.2893772893772894, + "computer_science": 0.38235294117647056, + "computer_security": 0.4327485380116959, + "conceptual_physics": 0.30612244897959184, + "construction_project_management": 0.3381294964028777, + "economics": 0.33962264150943394, + "education": 0.3374233128834356, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.31313131313131315, + "elementary_information_and_technology": 0.5126050420168067, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.3333333333333333, + "food_science": 0.34265734265734266, + "genetics": 0.3125, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.34965034965034963, + "human_sexuality": 0.3253968253968254, + "international_law": 0.2972972972972973, + "journalism": 0.29651162790697677, + "jurisprudence": 0.3236009732360097, + "legal_and_moral_basis": 0.48130841121495327, + "logical": 0.37398373983739835, + "machine_learning": 0.28688524590163933, + "management": 0.3380952380952381, + "marketing": 0.42777777777777776, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.3448275862068966, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.32, + "professional_law": 0.23222748815165878, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.34051724137931033, + "public_relations": 0.41954022988505746, + "security_study": 0.2814814814814815, + "sociology": 0.2920353982300885, + "sports_science": 0.36363636363636365, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.33136094674556216, + "world_history": 0.33540372670807456, + "world_religions": 0.34375 + } + }, + "prompt_5": { + "accuracy": 0.3193748920739078, + "category_acc": { + "agronomy": 0.33727810650887574, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.2682926829268293, + "arts": 0.3, + "astronomy": 0.3090909090909091, + "business_ethics": 0.32057416267942584, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.33587786259541985, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.35514018691588783, + "chinese_history": 0.37770897832817335, + "chinese_literature": 0.2107843137254902, + "chinese_teacher_qualification": 0.36312849162011174, + "clinical_knowledge": 0.27848101265822783, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.33962264150943394, + "college_law": 0.2962962962962963, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.2893772893772894, + "computer_science": 0.3137254901960784, + "computer_security": 0.391812865497076, + "conceptual_physics": 0.3877551020408163, + "construction_project_management": 0.28776978417266186, + "economics": 0.37735849056603776, + "education": 0.3128834355828221, + "electrical_engineering": 0.3488372093023256, + "elementary_chinese": 0.21825396825396826, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.5042016806722689, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2814814814814815, + "food_science": 0.40559440559440557, + "genetics": 0.3125, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.3135593220338983, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.2777777777777778, + "international_law": 0.31891891891891894, + "journalism": 0.3081395348837209, + "jurisprudence": 0.34306569343065696, + "legal_and_moral_basis": 0.48598130841121495, + "logical": 0.2764227642276423, + "machine_learning": 0.27049180327868855, + "management": 0.36666666666666664, + "marketing": 0.4388888888888889, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.35172413793103446, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.30857142857142855, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.22872340425531915, + "professional_psychology": 0.3620689655172414, + "public_relations": 0.39655172413793105, + "security_study": 0.2814814814814815, + "sociology": 0.37610619469026546, + "sports_science": 0.3090909090909091, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.3136094674556213, + "world_history": 0.3105590062111801, + "world_religions": 0.34375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.30303030303030304 + }, + "prompt_3": { + "accuracy": 0.30303030303030304 + }, + "prompt_4": { + "accuracy": 0.2727272727272727 + }, + "prompt_5": { + "accuracy": 0.30303030303030304 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.375 + }, + "prompt_2": { + "accuracy": 0.4068181818181818 + }, + "prompt_3": { + "accuracy": 0.4113636363636364 + }, + "prompt_4": { + "accuracy": 0.43636363636363634 + }, + "prompt_5": { + "accuracy": 0.4068181818181818 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37728813559322033 + }, + "prompt_2": { + "accuracy": 0.36983050847457627 + }, + "prompt_3": { + "accuracy": 0.3494915254237288 + }, + "prompt_4": { + "accuracy": 0.36915254237288136 + }, + "prompt_5": { + "accuracy": 0.36915254237288136 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5923709798055348 + }, + "prompt_2": { + "accuracy": 0.6050860134629769 + }, + "prompt_3": { + "accuracy": 0.5699326851159312 + }, + "prompt_4": { + "accuracy": 0.5916230366492147 + }, + "prompt_5": { + "accuracy": 0.6234106207928197 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7398334149926507 + }, + "prompt_2": { + "accuracy": 0.7339539441450269 + }, + "prompt_3": { + "accuracy": 0.7760901518863302 + }, + "prompt_4": { + "accuracy": 0.7535521803037727 + }, + "prompt_5": { + "accuracy": 0.7648211660950515 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.32331783382419704, + "rouge2": 0.12043093546447878, + "rougeL": 0.24425001387452192, + "avg_rouge": 0.22933292772106592 + }, + "prompt_2": { + "rouge1": 0.34949752748474194, + "rouge2": 0.12930907117751259, + "rougeL": 0.26549155409579445, + "avg_rouge": 0.248099384252683 + }, + "prompt_3": { + "rouge1": 0.32288947483296804, + "rouge2": 0.11604789603957202, + "rougeL": 0.2431820386648606, + "avg_rouge": 0.22737313651246685 + }, + "prompt_4": { + "rouge1": 0.32354088736250536, + "rouge2": 0.11650022475007384, + "rougeL": 0.24302668247285347, + "avg_rouge": 0.22768926486181087 + }, + "prompt_5": { + "rouge1": 0.34352586896866033, + "rouge2": 0.11886472522139745, + "rougeL": 0.25809708903246575, + "avg_rouge": 0.24016256107417452 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2251158677590241, + "rouge2": 0.06238407994651637, + "rougeL": 0.16873266598847494, + "avg_rouge": 0.15207753789800513 + }, + "prompt_2": { + "rouge1": 0.22789381279990703, + "rouge2": 0.06059283396869814, + "rougeL": 0.1689521938005374, + "avg_rouge": 0.15247961352304754 + }, + "prompt_3": { + "rouge1": 0.23028907325940265, + "rouge2": 0.06273806809864396, + "rougeL": 0.17059253615484968, + "avg_rouge": 0.15453989250429878 + }, + "prompt_4": { + "rouge1": 0.22186345082270365, + "rouge2": 0.058481771806688544, + "rougeL": 0.16475053629307831, + "avg_rouge": 0.14836525297415684 + }, + "prompt_5": { + "rouge1": 0.22535881378922112, + "rouge2": 0.05983360603885301, + "rougeL": 0.16838459595473698, + "avg_rouge": 0.15119233859427036 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8761467889908257 + }, + "prompt_2": { + "accuracy": 0.8784403669724771 + }, + "prompt_3": { + "accuracy": 0.8612385321100917 + }, + "prompt_4": { + "accuracy": 0.8784403669724771 + }, + "prompt_5": { + "accuracy": 0.8165137614678899 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7114093959731543 + }, + "prompt_2": { + "accuracy": 0.7593480345158198 + }, + "prompt_3": { + "accuracy": 0.7497603068072867 + }, + "prompt_4": { + "accuracy": 0.713326941514861 + }, + "prompt_5": { + "accuracy": 0.6596356663470757 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.58 + }, + "prompt_2": { + "accuracy": 0.624 + }, + "prompt_3": { + "accuracy": 0.6195 + }, + "prompt_4": { + "accuracy": 0.637 + }, + "prompt_5": { + "accuracy": 0.642 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5095 + }, + "prompt_2": { + "accuracy": 0.5195 + }, + "prompt_3": { + "accuracy": 0.529 + }, + "prompt_4": { + "accuracy": 0.5125 + }, + "prompt_5": { + "accuracy": 0.519 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6515 + }, + "prompt_2": { + "accuracy": 0.6365 + }, + "prompt_3": { + "accuracy": 0.647 + }, + "prompt_4": { + "accuracy": 0.61 + }, + "prompt_5": { + "accuracy": 0.6165 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4647887323943662 + }, + "prompt_2": { + "accuracy": 0.49295774647887325 + }, + "prompt_3": { + "accuracy": 0.6056338028169014 + }, + "prompt_4": { + "accuracy": 0.49295774647887325 + }, + "prompt_5": { + "accuracy": 0.4788732394366197 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6425992779783394 + }, + "prompt_2": { + "accuracy": 0.5234657039711191 + }, + "prompt_3": { + "accuracy": 0.6425992779783394 + }, + "prompt_4": { + "accuracy": 0.5776173285198556 + }, + "prompt_5": { + "accuracy": 0.6064981949458483 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5955882352941176 + }, + "prompt_2": { + "accuracy": 0.5833333333333334 + }, + "prompt_3": { + "accuracy": 0.5857843137254902 + }, + "prompt_4": { + "accuracy": 0.5735294117647058 + }, + "prompt_5": { + "accuracy": 0.5784313725490197 + } } }, "five_shot": { @@ -11158,53 +95968,1733 @@ "model_link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.48571428571428565, + "language_acc": { + "English": 0.64, + "Vietnamese": 0.47333333333333333, + "Malay": 0.3933333333333333, + "Indonesian": 0.46, + "Spanish": 0.52, + "Chinese": 0.4533333333333333, + "Filipino": 0.46 + }, + "consistency_score_2": 0.47396825396825387, + "consistency_score_3": 0.29257142857142854, + "consistency_score_4": 0.20647619047619048, + "consistency_score_5": 0.15746031746031747, + "consistency_score_6": 0.12666666666666668, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.46, + "English,Malay": 0.47333333333333333, + "English,Indonesian": 0.5266666666666666, + "English,Spanish": 0.6, + "English,Chinese": 0.5066666666666667, + "English,Filipino": 0.5266666666666666, + "Vietnamese,Malay": 0.43333333333333335, + "Vietnamese,Indonesian": 0.4666666666666667, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Chinese": 0.4266666666666667, + "Vietnamese,Filipino": 0.48, + "Malay,Indonesian": 0.49333333333333335, + "Malay,Spanish": 0.42, + "Malay,Chinese": 0.38666666666666666, + "Malay,Filipino": 0.46, + "Indonesian,Spanish": 0.4533333333333333, + "Indonesian,Chinese": 0.4266666666666667, + "Indonesian,Filipino": 0.43333333333333335, + "Spanish,Chinese": 0.5, + "Spanish,Filipino": 0.54, + "Chinese,Filipino": 0.44 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.26, + "English,Vietnamese,Indonesian": 0.32, + "English,Vietnamese,Spanish": 0.32666666666666666, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.31333333333333335, + "English,Malay,Indonesian": 0.32, + "English,Malay,Spanish": 0.32, + "English,Malay,Chinese": 0.2733333333333333, + "English,Malay,Filipino": 0.32666666666666666, + "English,Indonesian,Spanish": 0.3466666666666667, + "English,Indonesian,Chinese": 0.31333333333333335, + "English,Indonesian,Filipino": 0.30666666666666664, + "English,Spanish,Chinese": 0.36, + "English,Spanish,Filipino": 0.3933333333333333, + "English,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,Malay,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Chinese": 0.23333333333333334, + "Vietnamese,Malay,Filipino": 0.26666666666666666, + "Vietnamese,Indonesian,Spanish": 0.3, + "Vietnamese,Indonesian,Chinese": 0.26, + "Vietnamese,Indonesian,Filipino": 0.3, + "Vietnamese,Spanish,Chinese": 0.28, + "Vietnamese,Spanish,Filipino": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.25333333333333335, + "Malay,Indonesian,Spanish": 0.26666666666666666, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.28, + "Malay,Spanish,Chinese": 0.22666666666666666, + "Malay,Spanish,Filipino": 0.31333333333333335, + "Malay,Chinese,Filipino": 0.25333333333333335, + "Indonesian,Spanish,Chinese": 0.26, + "Indonesian,Spanish,Filipino": 0.30666666666666664, + "Indonesian,Chinese,Filipino": 0.24666666666666667, + "Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.20666666666666667, + "English,Vietnamese,Malay,Spanish": 0.19333333333333333, + "English,Vietnamese,Malay,Chinese": 0.18, + "English,Vietnamese,Malay,Filipino": 0.2, + "English,Vietnamese,Indonesian,Spanish": 0.24, + "English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Chinese,Filipino": 0.2, + "English,Malay,Indonesian,Spanish": 0.21333333333333335, + "English,Malay,Indonesian,Chinese": 0.20666666666666667, + "English,Malay,Indonesian,Filipino": 0.22666666666666666, + "English,Malay,Spanish,Chinese": 0.21333333333333335, + "English,Malay,Spanish,Filipino": 0.26666666666666666, + "English,Malay,Chinese,Filipino": 0.21333333333333335, + "English,Indonesian,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Spanish,Filipino": 0.25333333333333335, + "English,Indonesian,Chinese,Filipino": 0.2, + "English,Spanish,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Spanish,Chinese": 0.15333333333333332, + "Vietnamese,Malay,Spanish,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.16, + "Vietnamese,Indonesian,Spanish,Chinese": 0.18, + "Vietnamese,Indonesian,Spanish,Filipino": 0.24, + "Vietnamese,Indonesian,Chinese,Filipino": 0.18, + "Vietnamese,Spanish,Chinese,Filipino": 0.2, + "Malay,Indonesian,Spanish,Chinese": 0.17333333333333334, + "Malay,Indonesian,Spanish,Filipino": 0.21333333333333335, + "Malay,Indonesian,Chinese,Filipino": 0.17333333333333334, + "Malay,Spanish,Chinese,Filipino": 0.2, + "Indonesian,Spanish,Chinese,Filipino": 0.18666666666666668 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.15333333333333332, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.16, + "English,Vietnamese,Malay,Spanish,Chinese": 0.14, + "English,Vietnamese,Malay,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Malay,Chinese,Filipino": 0.14, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish,Chinese": 0.16, + "English,Malay,Indonesian,Spanish,Filipino": 0.18666666666666668, + "English,Malay,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "English,Indonesian,Spanish,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.14, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.12, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.14, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.12, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.12666666666666668, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.14, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667 + } + }, + "AC3_2": 0.47976938703001315, + "AC3_3": 0.3651772602845245, + "AC3_4": 0.28977120838647263, + "AC3_5": 0.2378225919810747, + "AC3_6": 0.20093312593919443, + "AC3_7": 0.17491961411838172 + }, + "prompt_2": { + "overall_acc": 0.4952380952380953, + "language_acc": { + "English": 0.6266666666666667, + "Vietnamese": 0.4266666666666667, + "Malay": 0.4066666666666667, + "Indonesian": 0.5333333333333333, + "Spanish": 0.5666666666666667, + "Chinese": 0.43333333333333335, + "Filipino": 0.47333333333333333 + }, + "consistency_score_2": 0.473015873015873, + "consistency_score_3": 0.296, + "consistency_score_4": 0.21085714285714288, + "consistency_score_5": 0.16158730158730158, + "consistency_score_6": 0.12952380952380954, + "consistency_score_7": 0.10666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.44666666666666666, + "English,Malay": 0.4, + "English,Indonesian": 0.47333333333333333, + "English,Spanish": 0.58, + "English,Chinese": 0.48, + "English,Filipino": 0.48, + "Vietnamese,Malay": 0.46, + "Vietnamese,Indonesian": 0.5066666666666667, + "Vietnamese,Spanish": 0.5133333333333333, + "Vietnamese,Chinese": 0.43333333333333335, + "Vietnamese,Filipino": 0.44666666666666666, + "Malay,Indonesian": 0.52, + "Malay,Spanish": 0.4533333333333333, + "Malay,Chinese": 0.44666666666666666, + "Malay,Filipino": 0.4666666666666667, + "Indonesian,Spanish": 0.5, + "Indonesian,Chinese": 0.47333333333333333, + "Indonesian,Filipino": 0.46, + "Spanish,Chinese": 0.48, + "Spanish,Filipino": 0.4866666666666667, + "Chinese,Filipino": 0.4266666666666667 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.24666666666666667, + "English,Vietnamese,Indonesian": 0.3, + "English,Vietnamese,Spanish": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.28, + "English,Malay,Indonesian": 0.29333333333333333, + "English,Malay,Spanish": 0.31333333333333335, + "English,Malay,Chinese": 0.26666666666666666, + "English,Malay,Filipino": 0.28, + "English,Indonesian,Spanish": 0.35333333333333333, + "English,Indonesian,Chinese": 0.3, + "English,Indonesian,Filipino": 0.3, + "English,Spanish,Chinese": 0.32, + "English,Spanish,Filipino": 0.3333333333333333, + "English,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Indonesian": 0.34, + "Vietnamese,Malay,Spanish": 0.28, + "Vietnamese,Malay,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Indonesian,Filipino": 0.30666666666666664, + "Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Filipino": 0.32, + "Vietnamese,Chinese,Filipino": 0.24, + "Malay,Indonesian,Spanish": 0.31333333333333335, + "Malay,Indonesian,Chinese": 0.31333333333333335, + "Malay,Indonesian,Filipino": 0.31333333333333335, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Spanish,Filipino": 0.32666666666666666, + "Malay,Chinese,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Chinese": 0.30666666666666664, + "Indonesian,Spanish,Filipino": 0.30666666666666664, + "Indonesian,Chinese,Filipino": 0.28, + "Spanish,Chinese,Filipino": 0.28 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.2, + "English,Vietnamese,Malay,Spanish": 0.20666666666666667, + "English,Vietnamese,Malay,Chinese": 0.17333333333333334, + "English,Vietnamese,Malay,Filipino": 0.2, + "English,Vietnamese,Indonesian,Spanish": 0.24666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.2, + "English,Vietnamese,Indonesian,Filipino": 0.22666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Chinese,Filipino": 0.18666666666666668, + "English,Malay,Indonesian,Spanish": 0.23333333333333334, + "English,Malay,Indonesian,Chinese": 0.22, + "English,Malay,Indonesian,Filipino": 0.21333333333333335, + "English,Malay,Spanish,Chinese": 0.22, + "English,Malay,Spanish,Filipino": 0.22666666666666666, + "English,Malay,Chinese,Filipino": 0.2, + "English,Indonesian,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Spanish,Filipino": 0.22, + "English,Indonesian,Chinese,Filipino": 0.20666666666666667, + "English,Spanish,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.22666666666666666, + "Vietnamese,Malay,Spanish,Chinese": 0.18666666666666668, + "Vietnamese,Malay,Spanish,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Indonesian,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Spanish,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Chinese,Filipino": 0.2, + "Malay,Indonesian,Spanish,Chinese": 0.22666666666666666, + "Malay,Indonesian,Spanish,Filipino": 0.22666666666666666, + "Malay,Indonesian,Chinese,Filipino": 0.2, + "Malay,Spanish,Chinese,Filipino": 0.22, + "Indonesian,Spanish,Chinese,Filipino": 0.20666666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.16, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Malay,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.16, + "English,Vietnamese,Malay,Chinese,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.16666666666666666, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.16, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.16, + "English,Malay,Indonesian,Spanish,Chinese": 0.18, + "English,Malay,Indonesian,Spanish,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Chinese,Filipino": 0.16, + "English,Malay,Spanish,Chinese,Filipino": 0.17333333333333334, + "English,Indonesian,Spanish,Chinese,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.16 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.13333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.12666666666666668, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.13333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667 + } + }, + "AC3_2": 0.4838719749695423, + "AC3_3": 0.3705344246041092, + "AC3_4": 0.2957801841655944, + "AC3_5": 0.24366958959672552, + "AC3_6": 0.20534262482195265, + "AC3_7": 0.17552742613117558 + }, + "prompt_3": { + "overall_acc": 0.49047619047619045, + "language_acc": { + "English": 0.6466666666666666, + "Vietnamese": 0.4533333333333333, + "Malay": 0.37333333333333335, + "Indonesian": 0.43333333333333335, + "Spanish": 0.5933333333333334, + "Chinese": 0.47333333333333333, + "Filipino": 0.46 + }, + "consistency_score_2": 0.4606349206349207, + "consistency_score_3": 0.27180952380952383, + "consistency_score_4": 0.18095238095238098, + "consistency_score_5": 0.12984126984126987, + "consistency_score_6": 0.09904761904761906, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.47333333333333333, + "English,Malay": 0.4, + "English,Indonesian": 0.5, + "English,Spanish": 0.6066666666666667, + "English,Chinese": 0.52, + "English,Filipino": 0.49333333333333335, + "Vietnamese,Malay": 0.44, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish": 0.44, + "Vietnamese,Chinese": 0.43333333333333335, + "Vietnamese,Filipino": 0.44666666666666666, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.46, + "Malay,Chinese": 0.3466666666666667, + "Malay,Filipino": 0.44666666666666666, + "Indonesian,Spanish": 0.4866666666666667, + "Indonesian,Chinese": 0.36666666666666664, + "Indonesian,Filipino": 0.41333333333333333, + "Spanish,Chinese": 0.46, + "Spanish,Filipino": 0.5133333333333333, + "Chinese,Filipino": 0.41333333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.24666666666666667, + "English,Vietnamese,Indonesian": 0.29333333333333333, + "English,Vietnamese,Spanish": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.2733333333333333, + "English,Vietnamese,Filipino": 0.2733333333333333, + "English,Malay,Indonesian": 0.28, + "English,Malay,Spanish": 0.31333333333333335, + "English,Malay,Chinese": 0.22666666666666666, + "English,Malay,Filipino": 0.24666666666666667, + "English,Indonesian,Spanish": 0.36, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.29333333333333333, + "English,Spanish,Chinese": 0.3333333333333333, + "English,Spanish,Filipino": 0.36, + "English,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Malay,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Spanish": 0.24, + "Vietnamese,Malay,Chinese": 0.20666666666666667, + "Vietnamese,Malay,Filipino": 0.24666666666666667, + "Vietnamese,Indonesian,Spanish": 0.26, + "Vietnamese,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Chinese": 0.25333333333333335, + "Vietnamese,Spanish,Filipino": 0.26, + "Vietnamese,Chinese,Filipino": 0.26, + "Malay,Indonesian,Spanish": 0.32, + "Malay,Indonesian,Chinese": 0.22, + "Malay,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Chinese": 0.22666666666666666, + "Malay,Spanish,Filipino": 0.3, + "Malay,Chinese,Filipino": 0.21333333333333335, + "Indonesian,Spanish,Chinese": 0.25333333333333335, + "Indonesian,Spanish,Filipino": 0.29333333333333333, + "Indonesian,Chinese,Filipino": 0.22666666666666666, + "Spanish,Chinese,Filipino": 0.26666666666666666 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.19333333333333333, + "English,Vietnamese,Malay,Spanish": 0.19333333333333333, + "English,Vietnamese,Malay,Chinese": 0.12666666666666668, + "English,Vietnamese,Malay,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Spanish": 0.22, + "English,Vietnamese,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Indonesian,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.20666666666666667, + "English,Vietnamese,Chinese,Filipino": 0.19333333333333333, + "English,Malay,Indonesian,Spanish": 0.24, + "English,Malay,Indonesian,Chinese": 0.16666666666666666, + "English,Malay,Indonesian,Filipino": 0.19333333333333333, + "English,Malay,Spanish,Chinese": 0.17333333333333334, + "English,Malay,Spanish,Filipino": 0.22, + "English,Malay,Chinese,Filipino": 0.15333333333333332, + "English,Indonesian,Spanish,Chinese": 0.2, + "English,Indonesian,Spanish,Filipino": 0.24, + "English,Indonesian,Chinese,Filipino": 0.18666666666666668, + "English,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.16, + "Vietnamese,Malay,Indonesian,Filipino": 0.18, + "Vietnamese,Malay,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.16, + "Vietnamese,Malay,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese": 0.16666666666666666, + "Vietnamese,Indonesian,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Chinese,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Chinese,Filipino": 0.16, + "Malay,Indonesian,Spanish,Chinese": 0.18, + "Malay,Indonesian,Spanish,Filipino": 0.22666666666666666, + "Malay,Indonesian,Chinese,Filipino": 0.16, + "Malay,Spanish,Chinese,Filipino": 0.17333333333333334, + "Indonesian,Spanish,Chinese,Filipino": 0.18 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.16, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Spanish,Filipino": 0.13333333333333333, + "English,Vietnamese,Malay,Chinese,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.13333333333333333, + "English,Malay,Indonesian,Spanish,Chinese": 0.14, + "English,Malay,Indonesian,Spanish,Filipino": 0.18, + "English,Malay,Indonesian,Chinese,Filipino": 0.12666666666666668, + "English,Malay,Spanish,Chinese,Filipino": 0.14, + "English,Indonesian,Spanish,Chinese,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.1, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.12, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.14 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.08666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.08, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08 + } + }, + "AC3_2": 0.47508741809487726, + "AC3_3": 0.3497798719228919, + "AC3_4": 0.26437014518183477, + "AC3_5": 0.20532728953165083, + "AC3_6": 0.16481267787034296, + "AC3_7": 0.13756260431645398 + }, + "prompt_4": { + "overall_acc": 0.478095238095238, + "language_acc": { + "English": 0.62, + "Vietnamese": 0.41333333333333333, + "Malay": 0.44, + "Indonesian": 0.44, + "Spanish": 0.5666666666666667, + "Chinese": 0.3933333333333333, + "Filipino": 0.47333333333333333 + }, + "consistency_score_2": 0.45015873015873015, + "consistency_score_3": 0.26171428571428573, + "consistency_score_4": 0.17390476190476195, + "consistency_score_5": 0.12444444444444444, + "consistency_score_6": 0.09333333333333334, + "consistency_score_7": 0.07333333333333333, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4266666666666667, + "English,Malay": 0.44666666666666666, + "English,Indonesian": 0.5466666666666666, + "English,Spanish": 0.6533333333333333, + "English,Chinese": 0.44, + "English,Filipino": 0.5066666666666667, + "Vietnamese,Malay": 0.38666666666666666, + "Vietnamese,Indonesian": 0.4533333333333333, + "Vietnamese,Spanish": 0.4533333333333333, + "Vietnamese,Chinese": 0.36, + "Vietnamese,Filipino": 0.3466666666666667, + "Malay,Indonesian": 0.49333333333333335, + "Malay,Spanish": 0.44666666666666666, + "Malay,Chinese": 0.38666666666666666, + "Malay,Filipino": 0.4533333333333333, + "Indonesian,Spanish": 0.47333333333333333, + "Indonesian,Chinese": 0.3933333333333333, + "Indonesian,Filipino": 0.4266666666666667, + "Spanish,Chinese": 0.4666666666666667, + "Spanish,Filipino": 0.5066666666666667, + "Chinese,Filipino": 0.38666666666666666 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.21333333333333335, + "English,Vietnamese,Indonesian": 0.28, + "English,Vietnamese,Spanish": 0.30666666666666664, + "English,Vietnamese,Chinese": 0.19333333333333333, + "English,Vietnamese,Filipino": 0.22, + "English,Malay,Indonesian": 0.32, + "English,Malay,Spanish": 0.3333333333333333, + "English,Malay,Chinese": 0.25333333333333335, + "English,Malay,Filipino": 0.2866666666666667, + "English,Indonesian,Spanish": 0.38, + "English,Indonesian,Chinese": 0.26666666666666666, + "English,Indonesian,Filipino": 0.32, + "English,Spanish,Chinese": 0.3333333333333333, + "English,Spanish,Filipino": 0.38666666666666666, + "English,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Chinese": 0.18666666666666668, + "Vietnamese,Malay,Filipino": 0.20666666666666667, + "Vietnamese,Indonesian,Spanish": 0.26, + "Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Filipino": 0.24, + "Vietnamese,Chinese,Filipino": 0.17333333333333334, + "Malay,Indonesian,Spanish": 0.28, + "Malay,Indonesian,Chinese": 0.24, + "Malay,Indonesian,Filipino": 0.26666666666666666, + "Malay,Spanish,Chinese": 0.24666666666666667, + "Malay,Spanish,Filipino": 0.30666666666666664, + "Malay,Chinese,Filipino": 0.24, + "Indonesian,Spanish,Chinese": 0.23333333333333334, + "Indonesian,Spanish,Filipino": 0.2866666666666667, + "Indonesian,Chinese,Filipino": 0.23333333333333334, + "Spanish,Chinese,Filipino": 0.26666666666666666 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.16666666666666666, + "English,Vietnamese,Malay,Spanish": 0.17333333333333334, + "English,Vietnamese,Malay,Chinese": 0.12, + "English,Vietnamese,Malay,Filipino": 0.14666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.21333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Chinese,Filipino": 0.12666666666666668, + "English,Malay,Indonesian,Spanish": 0.23333333333333334, + "English,Malay,Indonesian,Chinese": 0.18666666666666668, + "English,Malay,Indonesian,Filipino": 0.22666666666666666, + "English,Malay,Spanish,Chinese": 0.2, + "English,Malay,Spanish,Filipino": 0.24666666666666667, + "English,Malay,Chinese,Filipino": 0.18, + "English,Indonesian,Spanish,Chinese": 0.19333333333333333, + "English,Indonesian,Spanish,Filipino": 0.25333333333333335, + "English,Indonesian,Chinese,Filipino": 0.18666666666666668, + "English,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.16, + "Vietnamese,Malay,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Indonesian,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Indonesian,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Spanish,Chinese,Filipino": 0.14, + "Malay,Indonesian,Spanish,Chinese": 0.16, + "Malay,Indonesian,Spanish,Filipino": 0.20666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.17333333333333334, + "Malay,Spanish,Chinese,Filipino": 0.18666666666666668, + "Indonesian,Spanish,Chinese,Filipino": 0.18 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.13333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Spanish,Chinese": 0.1, + "English,Vietnamese,Malay,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Chinese,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.10666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Spanish,Chinese": 0.13333333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.18666666666666668, + "English,Malay,Indonesian,Chinese,Filipino": 0.14666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.16, + "English,Indonesian,Spanish,Chinese,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.1, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.1, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.10666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.14 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.10666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.08, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333 + } + }, + "AC3_2": 0.4637065988686825, + "AC3_3": 0.33826099745317384, + "AC3_4": 0.25503999550920503, + "AC3_5": 0.19748507197284448, + "AC3_6": 0.15617777775044664, + "AC3_7": 0.12716177314907542 + }, + "prompt_5": { + "overall_acc": 0.5028571428571429, + "language_acc": { + "English": 0.66, + "Vietnamese": 0.48, + "Malay": 0.42, + "Indonesian": 0.46, + "Spanish": 0.54, + "Chinese": 0.46, + "Filipino": 0.5 + }, + "consistency_score_2": 0.4533333333333334, + "consistency_score_3": 0.2676190476190476, + "consistency_score_4": 0.1820952380952381, + "consistency_score_5": 0.13587301587301584, + "consistency_score_6": 0.10666666666666667, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.42, + "English,Malay": 0.4, + "English,Indonesian": 0.4866666666666667, + "English,Spanish": 0.66, + "English,Chinese": 0.48, + "English,Filipino": 0.5266666666666666, + "Vietnamese,Malay": 0.5, + "Vietnamese,Indonesian": 0.4266666666666667, + "Vietnamese,Spanish": 0.42, + "Vietnamese,Chinese": 0.3933333333333333, + "Vietnamese,Filipino": 0.3933333333333333, + "Malay,Indonesian": 0.49333333333333335, + "Malay,Spanish": 0.44, + "Malay,Chinese": 0.43333333333333335, + "Malay,Filipino": 0.43333333333333335, + "Indonesian,Spanish": 0.4, + "Indonesian,Chinese": 0.36666666666666664, + "Indonesian,Filipino": 0.4266666666666667, + "Spanish,Chinese": 0.52, + "Spanish,Filipino": 0.43333333333333335, + "Chinese,Filipino": 0.4666666666666667 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.24, + "English,Vietnamese,Indonesian": 0.26, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Chinese": 0.23333333333333334, + "English,Vietnamese,Filipino": 0.25333333333333335, + "English,Malay,Indonesian": 0.2733333333333333, + "English,Malay,Spanish": 0.30666666666666664, + "English,Malay,Chinese": 0.26, + "English,Malay,Filipino": 0.2733333333333333, + "English,Indonesian,Spanish": 0.34, + "English,Indonesian,Chinese": 0.25333333333333335, + "English,Indonesian,Filipino": 0.3, + "English,Spanish,Chinese": 0.38666666666666666, + "English,Spanish,Filipino": 0.36, + "English,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.28, + "Vietnamese,Malay,Spanish": 0.26, + "Vietnamese,Malay,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino": 0.26, + "Vietnamese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese": 0.2, + "Vietnamese,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Vietnamese,Chinese,Filipino": 0.22, + "Malay,Indonesian,Spanish": 0.2733333333333333, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.29333333333333333, + "Malay,Spanish,Chinese": 0.28, + "Malay,Spanish,Filipino": 0.25333333333333335, + "Malay,Chinese,Filipino": 0.26, + "Indonesian,Spanish,Chinese": 0.24, + "Indonesian,Spanish,Filipino": 0.24666666666666667, + "Indonesian,Chinese,Filipino": 0.22666666666666666, + "Spanish,Chinese,Filipino": 0.2866666666666667 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.18, + "English,Vietnamese,Malay,Spanish": 0.19333333333333333, + "English,Vietnamese,Malay,Chinese": 0.16666666666666666, + "English,Vietnamese,Malay,Filipino": 0.18, + "English,Vietnamese,Indonesian,Spanish": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.15333333333333332, + "English,Vietnamese,Indonesian,Filipino": 0.18666666666666668, + "English,Vietnamese,Spanish,Chinese": 0.18666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish": 0.23333333333333334, + "English,Malay,Indonesian,Chinese": 0.18666666666666668, + "English,Malay,Indonesian,Filipino": 0.20666666666666667, + "English,Malay,Spanish,Chinese": 0.22666666666666666, + "English,Malay,Spanish,Filipino": 0.22, + "English,Malay,Chinese,Filipino": 0.19333333333333333, + "English,Indonesian,Spanish,Chinese": 0.21333333333333335, + "English,Indonesian,Spanish,Filipino": 0.21333333333333335, + "English,Indonesian,Chinese,Filipino": 0.18666666666666668, + "English,Spanish,Chinese,Filipino": 0.24, + "Vietnamese,Malay,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.18, + "Vietnamese,Malay,Spanish,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14, + "Vietnamese,Indonesian,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Chinese,Filipino": 0.14, + "Vietnamese,Spanish,Chinese,Filipino": 0.15333333333333332, + "Malay,Indonesian,Spanish,Chinese": 0.18666666666666668, + "Malay,Indonesian,Spanish,Filipino": 0.2, + "Malay,Indonesian,Chinese,Filipino": 0.18, + "Malay,Spanish,Chinese,Filipino": 0.18666666666666668, + "Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.14666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.12, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.14, + "English,Vietnamese,Malay,Spanish,Filipino": 0.14, + "English,Vietnamese,Malay,Chinese,Filipino": 0.12666666666666668, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.12, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.12, + "English,Malay,Indonesian,Spanish,Chinese": 0.17333333333333334, + "English,Malay,Indonesian,Spanish,Filipino": 0.18, + "English,Malay,Indonesian,Chinese,Filipino": 0.14666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.10666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.1, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.1, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + } + }, + "AC3_2": 0.47681274895411824, + "AC3_3": 0.34932721167989483, + "AC3_4": 0.267370093715931, + "AC3_5": 0.21393922177849073, + "AC3_6": 0.175999999971125, + "AC3_7": 0.1478513731574727 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.4391233766233766, + "language_acc": { + "Vietnamese": 0.42613636363636365, + "Indonesian": 0.4375, + "Malay": 0.3806818181818182, + "English": 0.5568181818181818, + "Spanish": 0.4318181818181818, + "Filipino": 0.3409090909090909, + "Chinese": 0.5 + }, + "consistency_score_2": 0.47294372294372294, + "consistency_score_3": 0.28733766233766234, + "consistency_score_4": 0.19464285714285715, + "consistency_score_5": 0.13906926406926406, + "consistency_score_6": 0.1030844155844156, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4943181818181818, + "Vietnamese,Malay": 0.4715909090909091, + "Vietnamese,English": 0.4431818181818182, + "Vietnamese,Spanish": 0.5227272727272727, + "Vietnamese,Filipino": 0.42045454545454547, + "Vietnamese,Chinese": 0.45454545454545453, + "Indonesian,Malay": 0.48863636363636365, + "Indonesian,English": 0.4318181818181818, + "Indonesian,Spanish": 0.5227272727272727, + "Indonesian,Filipino": 0.4602272727272727, + "Indonesian,Chinese": 0.4943181818181818, + "Malay,English": 0.4318181818181818, + "Malay,Spanish": 0.4772727272727273, + "Malay,Filipino": 0.4602272727272727, + "Malay,Chinese": 0.44886363636363635, + "English,Spanish": 0.6136363636363636, + "English,Filipino": 0.35795454545454547, + "English,Chinese": 0.5170454545454546, + "Spanish,Filipino": 0.4659090909090909, + "Spanish,Chinese": 0.5397727272727273, + "Filipino,Chinese": 0.4147727272727273 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,English": 0.2784090909090909, + "Vietnamese,Indonesian,Spanish": 0.3352272727272727, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Chinese": 0.2897727272727273, + "Vietnamese,Malay,English": 0.26704545454545453, + "Vietnamese,Malay,Spanish": 0.3068181818181818, + "Vietnamese,Malay,Filipino": 0.26704545454545453, + "Vietnamese,Malay,Chinese": 0.2840909090909091, + "Vietnamese,English,Spanish": 0.3465909090909091, + "Vietnamese,English,Filipino": 0.24431818181818182, + "Vietnamese,English,Chinese": 0.29545454545454547, + "Vietnamese,Spanish,Filipino": 0.3068181818181818, + "Vietnamese,Spanish,Chinese": 0.32386363636363635, + "Vietnamese,Filipino,Chinese": 0.23295454545454544, + "Indonesian,Malay,English": 0.2784090909090909, + "Indonesian,Malay,Spanish": 0.32386363636363635, + "Indonesian,Malay,Filipino": 0.2840909090909091, + "Indonesian,Malay,Chinese": 0.3068181818181818, + "Indonesian,English,Spanish": 0.32386363636363635, + "Indonesian,English,Filipino": 0.20454545454545456, + "Indonesian,English,Chinese": 0.2784090909090909, + "Indonesian,Spanish,Filipino": 0.3068181818181818, + "Indonesian,Spanish,Chinese": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Malay,English,Spanish": 0.3181818181818182, + "Malay,English,Filipino": 0.2215909090909091, + "Malay,English,Chinese": 0.2556818181818182, + "Malay,Spanish,Filipino": 0.29545454545454547, + "Malay,Spanish,Chinese": 0.3068181818181818, + "Malay,Filipino,Chinese": 0.2727272727272727, + "English,Spanish,Filipino": 0.2727272727272727, + "English,Spanish,Chinese": 0.3806818181818182, + "English,Filipino,Chinese": 0.2159090909090909, + "Spanish,Filipino,Chinese": 0.2840909090909091 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,English,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,English,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,English,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Malay,English,Spanish": 0.22727272727272727, + "Vietnamese,Malay,English,Filipino": 0.1590909090909091, + "Vietnamese,Malay,English,Chinese": 0.1875, + "Vietnamese,Malay,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Malay,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Malay,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,English,Spanish,Filipino": 0.20454545454545456, + "Vietnamese,English,Spanish,Chinese": 0.23863636363636365, + "Vietnamese,English,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Spanish,Filipino,Chinese": 0.19318181818181818, + "Indonesian,Malay,English,Spanish": 0.2215909090909091, + "Indonesian,Malay,English,Filipino": 0.14772727272727273, + "Indonesian,Malay,English,Chinese": 0.18181818181818182, + "Indonesian,Malay,Spanish,Filipino": 0.2215909090909091, + "Indonesian,Malay,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Malay,Filipino,Chinese": 0.20454545454545456, + "Indonesian,English,Spanish,Filipino": 0.17045454545454544, + "Indonesian,English,Spanish,Chinese": 0.22727272727272727, + "Indonesian,English,Filipino,Chinese": 0.14772727272727273, + "Indonesian,Spanish,Filipino,Chinese": 0.2159090909090909, + "Malay,English,Spanish,Filipino": 0.17045454545454544, + "Malay,English,Spanish,Chinese": 0.2159090909090909, + "Malay,English,Filipino,Chinese": 0.14204545454545456, + "Malay,Spanish,Filipino,Chinese": 0.19886363636363635, + "English,Spanish,Filipino,Chinese": 0.17613636363636365 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Malay,English,Spanish,Filipino": 0.13068181818181818, + "Vietnamese,Malay,English,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Malay,English,Filipino,Chinese": 0.10795454545454546, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.125, + "Indonesian,Malay,English,Spanish,Chinese": 0.1590909090909091, + "Indonesian,Malay,English,Filipino,Chinese": 0.10795454545454546, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.16477272727272727, + "Indonesian,English,Spanish,Filipino,Chinese": 0.125, + "Malay,English,Spanish,Filipino,Chinese": 0.11363636363636363 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.10227272727272728, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.09090909090909091, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.09090909090909091 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.07954545454545454 + } + }, + "AC3_2": 0.45540650385850084, + "AC3_3": 0.34737357609670866, + "AC3_4": 0.26972793467641004, + "AC3_5": 0.21123950916741085, + "AC3_6": 0.16697206233797415, + "AC3_7": 0.13469199029982434 + }, + "prompt_2": { + "overall_acc": 0.43506493506493504, + "language_acc": { + "Vietnamese": 0.3977272727272727, + "Indonesian": 0.42045454545454547, + "Malay": 0.42613636363636365, + "English": 0.5454545454545454, + "Spanish": 0.42045454545454547, + "Filipino": 0.32954545454545453, + "Chinese": 0.5056818181818182 + }, + "consistency_score_2": 0.5146103896103895, + "consistency_score_3": 0.33587662337662344, + "consistency_score_4": 0.24025974025974028, + "consistency_score_5": 0.1801948051948052, + "consistency_score_6": 0.1387987012987013, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5284090909090909, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,English": 0.4943181818181818, + "Vietnamese,Spanish": 0.5284090909090909, + "Vietnamese,Filipino": 0.4318181818181818, + "Vietnamese,Chinese": 0.4659090909090909, + "Indonesian,Malay": 0.5965909090909091, + "Indonesian,English": 0.5738636363636364, + "Indonesian,Spanish": 0.6306818181818182, + "Indonesian,Filipino": 0.5397727272727273, + "Indonesian,Chinese": 0.4715909090909091, + "Malay,English": 0.5113636363636364, + "Malay,Spanish": 0.5795454545454546, + "Malay,Filipino": 0.48863636363636365, + "Malay,Chinese": 0.42613636363636365, + "English,Spanish": 0.6306818181818182, + "English,Filipino": 0.3977272727272727, + "English,Chinese": 0.5397727272727273, + "Spanish,Filipino": 0.48295454545454547, + "Spanish,Chinese": 0.5454545454545454, + "Filipino,Chinese": 0.4090909090909091 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,English": 0.375, + "Vietnamese,Indonesian,Spanish": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,Chinese": 0.3125, + "Vietnamese,Malay,English": 0.3409090909090909, + "Vietnamese,Malay,Spanish": 0.375, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,Chinese": 0.2840909090909091, + "Vietnamese,English,Spanish": 0.3693181818181818, + "Vietnamese,English,Filipino": 0.25, + "Vietnamese,English,Chinese": 0.3125, + "Vietnamese,Spanish,Filipino": 0.30113636363636365, + "Vietnamese,Spanish,Chinese": 0.3465909090909091, + "Vietnamese,Filipino,Chinese": 0.25, + "Indonesian,Malay,English": 0.39204545454545453, + "Indonesian,Malay,Spanish": 0.44886363636363635, + "Indonesian,Malay,Filipino": 0.36363636363636365, + "Indonesian,Malay,Chinese": 0.29545454545454547, + "Indonesian,English,Spanish": 0.4602272727272727, + "Indonesian,English,Filipino": 0.3181818181818182, + "Indonesian,English,Chinese": 0.3465909090909091, + "Indonesian,Spanish,Filipino": 0.375, + "Indonesian,Spanish,Chinese": 0.3693181818181818, + "Indonesian,Filipino,Chinese": 0.29545454545454547, + "Malay,English,Spanish": 0.3977272727272727, + "Malay,English,Filipino": 0.2840909090909091, + "Malay,English,Chinese": 0.2727272727272727, + "Malay,Spanish,Filipino": 0.3352272727272727, + "Malay,Spanish,Chinese": 0.32386363636363635, + "Malay,Filipino,Chinese": 0.2556818181818182, + "English,Spanish,Filipino": 0.3181818181818182, + "English,Spanish,Chinese": 0.4034090909090909, + "English,Filipino,Chinese": 0.2556818181818182, + "Spanish,Filipino,Chinese": 0.29545454545454547 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.2784090909090909, + "Vietnamese,Indonesian,Malay,Spanish": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.25, + "Vietnamese,Indonesian,Malay,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,English,Spanish": 0.3125, + "Vietnamese,Indonesian,English,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,English,Chinese": 0.25, + "Vietnamese,Indonesian,Spanish,Filipino": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish,Chinese": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Malay,English,Spanish": 0.2840909090909091, + "Vietnamese,Malay,English,Filipino": 0.1875, + "Vietnamese,Malay,English,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,Malay,Spanish,Chinese": 0.24431818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,English,Spanish,Filipino": 0.2159090909090909, + "Vietnamese,English,Spanish,Chinese": 0.2556818181818182, + "Vietnamese,English,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Spanish,Filipino,Chinese": 0.21022727272727273, + "Indonesian,Malay,English,Spanish": 0.32954545454545453, + "Indonesian,Malay,English,Filipino": 0.23295454545454544, + "Indonesian,Malay,English,Chinese": 0.23295454545454544, + "Indonesian,Malay,Spanish,Filipino": 0.2897727272727273, + "Indonesian,Malay,Spanish,Chinese": 0.26136363636363635, + "Indonesian,Malay,Filipino,Chinese": 0.20454545454545456, + "Indonesian,English,Spanish,Filipino": 0.2727272727272727, + "Indonesian,English,Spanish,Chinese": 0.29545454545454547, + "Indonesian,English,Filipino,Chinese": 0.22727272727272727, + "Indonesian,Spanish,Filipino,Chinese": 0.24431818181818182, + "Malay,English,Spanish,Filipino": 0.23863636363636365, + "Malay,English,Spanish,Chinese": 0.24431818181818182, + "Malay,English,Filipino,Chinese": 0.17045454545454544, + "Malay,Spanish,Filipino,Chinese": 0.20454545454545456, + "English,Spanish,Filipino,Chinese": 0.2159090909090909 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Malay,English,Spanish,Filipino": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish,Chinese": 0.1875, + "Vietnamese,Malay,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.1534090909090909, + "Indonesian,Malay,English,Spanish,Filipino": 0.21022727272727273, + "Indonesian,Malay,English,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Malay,English,Filipino,Chinese": 0.1590909090909091, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.1875, + "Indonesian,English,Spanish,Filipino,Chinese": 0.19886363636363635, + "Malay,English,Spanish,Filipino,Chinese": 0.1590909090909091 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.11363636363636363, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.10795454545454546 + } + }, + "AC3_2": 0.4715062714566223, + "AC3_3": 0.3790900612897835, + "AC3_4": 0.30956543451959506, + "AC3_5": 0.2548401466193072, + "AC3_6": 0.2104557394148227, + "AC3_7": 0.17298545994911949 + }, + "prompt_3": { + "overall_acc": 0.44480519480519476, + "language_acc": { + "Vietnamese": 0.45454545454545453, + "Indonesian": 0.4431818181818182, + "Malay": 0.42613636363636365, + "English": 0.5511363636363636, + "Spanish": 0.4318181818181818, + "Filipino": 0.32386363636363635, + "Chinese": 0.48295454545454547 + }, + "consistency_score_2": 0.5091991341991344, + "consistency_score_3": 0.326461038961039, + "consistency_score_4": 0.22759740259740255, + "consistency_score_5": 0.1647727272727273, + "consistency_score_6": 0.12175324675324674, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Malay": 0.48295454545454547, + "Vietnamese,English": 0.5, + "Vietnamese,Spanish": 0.5056818181818182, + "Vietnamese,Filipino": 0.4431818181818182, + "Vietnamese,Chinese": 0.4431818181818182, + "Indonesian,Malay": 0.6022727272727273, + "Indonesian,English": 0.5397727272727273, + "Indonesian,Spanish": 0.625, + "Indonesian,Filipino": 0.5227272727272727, + "Indonesian,Chinese": 0.4772727272727273, + "Malay,English": 0.5681818181818182, + "Malay,Spanish": 0.6193181818181818, + "Malay,Filipino": 0.4772727272727273, + "Malay,Chinese": 0.4602272727272727, + "English,Spanish": 0.6420454545454546, + "English,Filipino": 0.3977272727272727, + "English,Chinese": 0.5227272727272727, + "Spanish,Filipino": 0.4602272727272727, + "Spanish,Chinese": 0.5170454545454546, + "Filipino,Chinese": 0.38636363636363635 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,English": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,Chinese": 0.2784090909090909, + "Vietnamese,Malay,English": 0.32954545454545453, + "Vietnamese,Malay,Spanish": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.2727272727272727, + "Vietnamese,Malay,Chinese": 0.25, + "Vietnamese,English,Spanish": 0.3693181818181818, + "Vietnamese,English,Filipino": 0.2556818181818182, + "Vietnamese,English,Chinese": 0.29545454545454547, + "Vietnamese,Spanish,Filipino": 0.29545454545454547, + "Vietnamese,Spanish,Chinese": 0.30113636363636365, + "Vietnamese,Filipino,Chinese": 0.23295454545454544, + "Indonesian,Malay,English": 0.3977272727272727, + "Indonesian,Malay,Spanish": 0.44886363636363635, + "Indonesian,Malay,Filipino": 0.3522727272727273, + "Indonesian,Malay,Chinese": 0.32954545454545453, + "Indonesian,English,Spanish": 0.4431818181818182, + "Indonesian,English,Filipino": 0.29545454545454547, + "Indonesian,English,Chinese": 0.32954545454545453, + "Indonesian,Spanish,Filipino": 0.3693181818181818, + "Indonesian,Spanish,Chinese": 0.3693181818181818, + "Indonesian,Filipino,Chinese": 0.29545454545454547, + "Malay,English,Spanish": 0.44886363636363635, + "Malay,English,Filipino": 0.2784090909090909, + "Malay,English,Chinese": 0.3352272727272727, + "Malay,Spanish,Filipino": 0.3352272727272727, + "Malay,Spanish,Chinese": 0.36363636363636365, + "Malay,Filipino,Chinese": 0.23295454545454544, + "English,Spanish,Filipino": 0.3068181818181818, + "English,Spanish,Chinese": 0.39204545454545453, + "English,Filipino,Chinese": 0.25, + "Spanish,Filipino,Chinese": 0.26704545454545453 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,Chinese": 0.1875, + "Vietnamese,Indonesian,English,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,English,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,English,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1875, + "Vietnamese,Malay,English,Spanish": 0.2784090909090909, + "Vietnamese,Malay,English,Filipino": 0.1875, + "Vietnamese,Malay,English,Chinese": 0.19318181818181818, + "Vietnamese,Malay,Spanish,Filipino": 0.2215909090909091, + "Vietnamese,Malay,Spanish,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish,Filipino": 0.2159090909090909, + "Vietnamese,English,Spanish,Chinese": 0.22727272727272727, + "Vietnamese,English,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Spanish,Filipino,Chinese": 0.17045454545454544, + "Indonesian,Malay,English,Spanish": 0.3352272727272727, + "Indonesian,Malay,English,Filipino": 0.23295454545454544, + "Indonesian,Malay,English,Chinese": 0.24431818181818182, + "Indonesian,Malay,Spanish,Filipino": 0.29545454545454547, + "Indonesian,Malay,Spanish,Chinese": 0.26704545454545453, + "Indonesian,Malay,Filipino,Chinese": 0.21022727272727273, + "Indonesian,English,Spanish,Filipino": 0.25, + "Indonesian,English,Spanish,Chinese": 0.2840909090909091, + "Indonesian,English,Filipino,Chinese": 0.21022727272727273, + "Indonesian,Spanish,Filipino,Chinese": 0.24431818181818182, + "Malay,English,Spanish,Filipino": 0.23863636363636365, + "Malay,English,Spanish,Chinese": 0.2840909090909091, + "Malay,English,Filipino,Chinese": 0.18181818181818182, + "Malay,Spanish,Filipino,Chinese": 0.1875, + "English,Spanish,Filipino,Chinese": 0.20454545454545456 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.125, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Malay,English,Spanish,Filipino": 0.17045454545454544, + "Vietnamese,Malay,English,Spanish,Chinese": 0.1590909090909091, + "Vietnamese,Malay,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.21022727272727273, + "Indonesian,Malay,English,Spanish,Chinese": 0.21022727272727273, + "Indonesian,Malay,English,Filipino,Chinese": 0.16477272727272727, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.18181818181818182, + "Indonesian,English,Spanish,Filipino,Chinese": 0.1875, + "Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.14772727272727273 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.09090909090909091 + } + }, + "AC3_2": 0.4748289146543553, + "AC3_3": 0.3765536714953771, + "AC3_4": 0.30111872671373174, + "AC3_5": 0.24046725574072375, + "AC3_6": 0.1911770177908387, + "AC3_7": 0.1509641872996437 + }, + "prompt_4": { + "overall_acc": 0.4456168831168831, + "language_acc": { + "Vietnamese": 0.44886363636363635, + "Indonesian": 0.4431818181818182, + "Malay": 0.4090909090909091, + "English": 0.5284090909090909, + "Spanish": 0.45454545454545453, + "Filipino": 0.3352272727272727, + "Chinese": 0.5 + }, + "consistency_score_2": 0.5018939393939394, + "consistency_score_3": 0.32386363636363635, + "consistency_score_4": 0.23262987012987013, + "consistency_score_5": 0.17803030303030304, + "consistency_score_6": 0.14285714285714285, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Malay": 0.48295454545454547, + "Vietnamese,English": 0.5056818181818182, + "Vietnamese,Spanish": 0.5170454545454546, + "Vietnamese,Filipino": 0.36363636363636365, + "Vietnamese,Chinese": 0.4602272727272727, + "Indonesian,Malay": 0.5681818181818182, + "Indonesian,English": 0.5511363636363636, + "Indonesian,Spanish": 0.5852272727272727, + "Indonesian,Filipino": 0.5454545454545454, + "Indonesian,Chinese": 0.48863636363636365, + "Malay,English": 0.5170454545454546, + "Malay,Spanish": 0.5511363636363636, + "Malay,Filipino": 0.42613636363636365, + "Malay,Chinese": 0.4431818181818182, + "English,Spanish": 0.6193181818181818, + "English,Filipino": 0.3806818181818182, + "English,Chinese": 0.5568181818181818, + "Spanish,Filipino": 0.44886363636363635, + "Spanish,Chinese": 0.5909090909090909, + "Filipino,Chinese": 0.3806818181818182 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,English": 0.375, + "Vietnamese,Indonesian,Spanish": 0.375, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,Chinese": 0.32386363636363635, + "Vietnamese,Malay,English": 0.32954545454545453, + "Vietnamese,Malay,Spanish": 0.3352272727272727, + "Vietnamese,Malay,Filipino": 0.23863636363636365, + "Vietnamese,Malay,Chinese": 0.26704545454545453, + "Vietnamese,English,Spanish": 0.3977272727272727, + "Vietnamese,English,Filipino": 0.22727272727272727, + "Vietnamese,English,Chinese": 0.3465909090909091, + "Vietnamese,Spanish,Filipino": 0.26136363636363635, + "Vietnamese,Spanish,Chinese": 0.35795454545454547, + "Vietnamese,Filipino,Chinese": 0.2159090909090909, + "Indonesian,Malay,English": 0.375, + "Indonesian,Malay,Spanish": 0.39204545454545453, + "Indonesian,Malay,Filipino": 0.32954545454545453, + "Indonesian,Malay,Chinese": 0.32386363636363635, + "Indonesian,English,Spanish": 0.42045454545454547, + "Indonesian,English,Filipino": 0.30113636363636365, + "Indonesian,English,Chinese": 0.3465909090909091, + "Indonesian,Spanish,Filipino": 0.3522727272727273, + "Indonesian,Spanish,Chinese": 0.3806818181818182, + "Indonesian,Filipino,Chinese": 0.29545454545454547, + "Malay,English,Spanish": 0.38636363636363635, + "Malay,English,Filipino": 0.2556818181818182, + "Malay,English,Chinese": 0.32386363636363635, + "Malay,Spanish,Filipino": 0.2897727272727273, + "Malay,Spanish,Chinese": 0.3409090909090909, + "Malay,Filipino,Chinese": 0.24431818181818182, + "English,Spanish,Filipino": 0.29545454545454547, + "English,Spanish,Chinese": 0.44886363636363635, + "English,Filipino,Chinese": 0.23863636363636365, + "Spanish,Filipino,Chinese": 0.2897727272727273 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,English,Spanish": 0.3068181818181818, + "Vietnamese,Indonesian,English,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,English,Chinese": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Malay,English,Spanish": 0.26704545454545453, + "Vietnamese,Malay,English,Filipino": 0.17045454545454544, + "Vietnamese,Malay,English,Chinese": 0.22727272727272727, + "Vietnamese,Malay,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Malay,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,Malay,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,English,Spanish,Filipino": 0.21022727272727273, + "Vietnamese,English,Spanish,Chinese": 0.30113636363636365, + "Vietnamese,English,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Spanish,Filipino,Chinese": 0.19318181818181818, + "Indonesian,Malay,English,Spanish": 0.29545454545454547, + "Indonesian,Malay,English,Filipino": 0.2215909090909091, + "Indonesian,Malay,English,Chinese": 0.26704545454545453, + "Indonesian,Malay,Spanish,Filipino": 0.2556818181818182, + "Indonesian,Malay,Spanish,Chinese": 0.2727272727272727, + "Indonesian,Malay,Filipino,Chinese": 0.2159090909090909, + "Indonesian,English,Spanish,Filipino": 0.24431818181818182, + "Indonesian,English,Spanish,Chinese": 0.3068181818181818, + "Indonesian,English,Filipino,Chinese": 0.19318181818181818, + "Indonesian,Spanish,Filipino,Chinese": 0.23295454545454544, + "Malay,English,Spanish,Filipino": 0.21022727272727273, + "Malay,English,Spanish,Chinese": 0.2840909090909091, + "Malay,English,Filipino,Chinese": 0.1875, + "Malay,Spanish,Filipino,Chinese": 0.20454545454545456, + "English,Spanish,Filipino,Chinese": 0.2159090909090909 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Malay,English,Spanish,Filipino": 0.1590909090909091, + "Vietnamese,Malay,English,Spanish,Chinese": 0.19886363636363635, + "Vietnamese,Malay,English,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.17045454545454544, + "Indonesian,Malay,English,Spanish,Filipino": 0.1875, + "Indonesian,Malay,English,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Malay,English,Filipino,Chinese": 0.17045454545454544, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.18181818181818182, + "Indonesian,English,Spanish,Filipino,Chinese": 0.17613636363636365, + "Malay,English,Spanish,Filipino,Chinese": 0.17045454545454544 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.125, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11931818181818182 + } + }, + "AC3_2": 0.4720841336919343, + "AC3_3": 0.3751078825749572, + "AC3_4": 0.30568166271533914, + "AC3_5": 0.2544172746584753, + "AC3_6": 0.21635467976619027, + "AC3_7": 0.18823471783501886 + }, + "prompt_5": { + "overall_acc": 0.4301948051948052, + "language_acc": { + "Vietnamese": 0.44886363636363635, + "Indonesian": 0.4034090909090909, + "Malay": 0.4034090909090909, + "English": 0.5454545454545454, + "Spanish": 0.42613636363636365, + "Filipino": 0.3465909090909091, + "Chinese": 0.4375 + }, + "consistency_score_2": 0.4918831168831169, + "consistency_score_3": 0.31136363636363645, + "consistency_score_4": 0.2163961038961039, + "consistency_score_5": 0.15963203463203463, + "consistency_score_6": 0.12256493506493506, + "consistency_score_7": 0.09659090909090909, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5340909090909091, + "Vietnamese,Malay": 0.4943181818181818, + "Vietnamese,English": 0.5227272727272727, + "Vietnamese,Spanish": 0.5284090909090909, + "Vietnamese,Filipino": 0.45454545454545453, + "Vietnamese,Chinese": 0.4715909090909091, + "Indonesian,Malay": 0.5397727272727273, + "Indonesian,English": 0.5056818181818182, + "Indonesian,Spanish": 0.5284090909090909, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Chinese": 0.4602272727272727, + "Malay,English": 0.48295454545454547, + "Malay,Spanish": 0.5056818181818182, + "Malay,Filipino": 0.4943181818181818, + "Malay,Chinese": 0.4431818181818182, + "English,Spanish": 0.5852272727272727, + "English,Filipino": 0.3977272727272727, + "English,Chinese": 0.48295454545454547, + "Spanish,Filipino": 0.48295454545454547, + "Spanish,Chinese": 0.48863636363636365, + "Filipino,Chinese": 0.4318181818181818 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,English": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish": 0.3522727272727273, + "Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,Chinese": 0.3068181818181818, + "Vietnamese,Malay,English": 0.3352272727272727, + "Vietnamese,Malay,Spanish": 0.3352272727272727, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,Chinese": 0.2840909090909091, + "Vietnamese,English,Spanish": 0.3806818181818182, + "Vietnamese,English,Filipino": 0.30113636363636365, + "Vietnamese,English,Chinese": 0.3352272727272727, + "Vietnamese,Spanish,Filipino": 0.3125, + "Vietnamese,Spanish,Chinese": 0.3068181818181818, + "Vietnamese,Filipino,Chinese": 0.2727272727272727, + "Indonesian,Malay,English": 0.3409090909090909, + "Indonesian,Malay,Spanish": 0.3522727272727273, + "Indonesian,Malay,Filipino": 0.3465909090909091, + "Indonesian,Malay,Chinese": 0.2840909090909091, + "Indonesian,English,Spanish": 0.35795454545454547, + "Indonesian,English,Filipino": 0.2840909090909091, + "Indonesian,English,Chinese": 0.2784090909090909, + "Indonesian,Spanish,Filipino": 0.3352272727272727, + "Indonesian,Spanish,Chinese": 0.2897727272727273, + "Indonesian,Filipino,Chinese": 0.2727272727272727, + "Malay,English,Spanish": 0.3522727272727273, + "Malay,English,Filipino": 0.2784090909090909, + "Malay,English,Chinese": 0.2727272727272727, + "Malay,Spanish,Filipino": 0.3068181818181818, + "Malay,Spanish,Chinese": 0.2784090909090909, + "Malay,Filipino,Chinese": 0.26136363636363635, + "English,Spanish,Filipino": 0.2897727272727273, + "English,Spanish,Chinese": 0.3409090909090909, + "English,Filipino,Chinese": 0.23295454545454544, + "Spanish,Filipino,Chinese": 0.2784090909090909 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.2556818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Filipino": 0.25, + "Vietnamese,Indonesian,Malay,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,English,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,English,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,English,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Malay,English,Spanish": 0.2556818181818182, + "Vietnamese,Malay,English,Filipino": 0.2215909090909091, + "Vietnamese,Malay,English,Chinese": 0.2159090909090909, + "Vietnamese,Malay,Spanish,Filipino": 0.22727272727272727, + "Vietnamese,Malay,Spanish,Chinese": 0.20454545454545456, + "Vietnamese,Malay,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,English,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,English,Spanish,Chinese": 0.23863636363636365, + "Vietnamese,English,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Spanish,Filipino,Chinese": 0.19886363636363635, + "Indonesian,Malay,English,Spanish": 0.2556818181818182, + "Indonesian,Malay,English,Filipino": 0.22727272727272727, + "Indonesian,Malay,English,Chinese": 0.1875, + "Indonesian,Malay,Spanish,Filipino": 0.24431818181818182, + "Indonesian,Malay,Spanish,Chinese": 0.19886363636363635, + "Indonesian,Malay,Filipino,Chinese": 0.19886363636363635, + "Indonesian,English,Spanish,Filipino": 0.22727272727272727, + "Indonesian,English,Spanish,Chinese": 0.21022727272727273, + "Indonesian,English,Filipino,Chinese": 0.17613636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.19318181818181818, + "Malay,English,Spanish,Filipino": 0.2159090909090909, + "Malay,English,Spanish,Chinese": 0.21022727272727273, + "Malay,English,Filipino,Chinese": 0.1590909090909091, + "Malay,Spanish,Filipino,Chinese": 0.17613636363636365, + "English,Spanish,Filipino,Chinese": 0.17613636363636365 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.1875, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Malay,English,Spanish,Filipino": 0.18181818181818182, + "Vietnamese,Malay,English,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Malay,English,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.1590909090909091, + "Indonesian,Malay,English,Spanish,Filipino": 0.18181818181818182, + "Indonesian,Malay,English,Spanish,Chinese": 0.14772727272727273, + "Indonesian,Malay,English,Filipino,Chinese": 0.13636363636363635, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.14772727272727273, + "Indonesian,English,Spanish,Filipino,Chinese": 0.13636363636363635, + "Malay,English,Spanish,Filipino,Chinese": 0.125 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.125, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.125, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.11363636363636363, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.10795454545454546 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.09659090909090909 + } + }, + "AC3_2": 0.4589754435207378, + "AC3_3": 0.36125815948002143, + "AC3_4": 0.28794861924980525, + "AC3_5": 0.23285773855220454, + "AC3_6": 0.190776550874196, + "AC3_7": 0.15776019047292383 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6310679611650486 + }, + "prompt_2": { + "accuracy": 0.6601941747572816 + }, + "prompt_3": { + "accuracy": 0.6310679611650486 + }, + "prompt_4": { + "accuracy": 0.6601941747572816 + }, + "prompt_5": { + "accuracy": 0.6213592233009708 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3238095238095238 + }, + "prompt_2": { + "accuracy": 0.3333333333333333 + }, + "prompt_3": { + "accuracy": 0.3047619047619048 + }, + "prompt_4": { + "accuracy": 0.37142857142857144 + }, + "prompt_5": { + "accuracy": 0.3238095238095238 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5794392523364486 + }, + "prompt_2": { + "accuracy": 0.5981308411214953 + }, + "prompt_3": { + "accuracy": 0.5887850467289719 + }, + "prompt_4": { + "accuracy": 0.6074766355140186 + }, + "prompt_5": { + "accuracy": 0.6074766355140186 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.8, + "culture": 0.8, + "film": 0.4, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.49, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.9, + "culture": 0.8, + "film": 0.5, + "law": 0.5, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.48, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.6, + "culture": 0.7, + "film": 0.6, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.6, + "culture": 0.5, + "film": 0.3, + "law": 0.5, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.49, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.8, + "culture": 0.7, + "film": 0.4, + "law": 0.4, + "geography": 0.7 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.17443517060057026 + }, + "prompt_2": { + "bleu_score": 0.18035046517785064 + }, + "prompt_3": { + "bleu_score": 0.17745925730253784 + }, + "prompt_4": { + "bleu_score": 0.17535916787424066 + }, + "prompt_5": { + "bleu_score": 0.1225660440724778 + } }, "indommlu": { "prompt_1": -1, @@ -11214,179 +97704,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.27764879828031913 + }, + "prompt_2": { + "bleu_score": 0.28981040549145054 + }, + "prompt_3": { + "bleu_score": 0.2839778557581816 + }, + "prompt_4": { + "bleu_score": 0.2851805352922804 + }, + "prompt_5": { + "bleu_score": 0.2752705045996572 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.19360951591510397 + }, + "prompt_2": { + "bleu_score": 0.20212885443645062 + }, + "prompt_3": { + "bleu_score": 0.20137473340432344 + }, + "prompt_4": { + "bleu_score": 0.20361198304089412 + }, + "prompt_5": { + "bleu_score": 0.2017588620944703 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.16516694139164445 + }, + "prompt_2": { + "bleu_score": 0.1694845052480065 + }, + "prompt_3": { + "bleu_score": 0.17178576741973187 + }, + "prompt_4": { + "bleu_score": 0.17367294170827527 + }, + "prompt_5": { + "bleu_score": 0.16987788675708243 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.25847362509735256 + }, + "prompt_2": { + "bleu_score": 0.2723796519057974 + }, + "prompt_3": { + "bleu_score": 0.2751159422630147 + }, + "prompt_4": { + "bleu_score": 0.27206004591455646 + }, + "prompt_5": { + "bleu_score": 0.26269614532258306 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5787631271878646 + }, + "prompt_2": { + "accuracy": 0.5950991831971996 + }, + "prompt_3": { + "accuracy": 0.5787631271878646 + }, + "prompt_4": { + "accuracy": 0.5787631271878646 + }, + "prompt_5": { + "accuracy": 0.574095682613769 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5532356095816947, + "category_acc": { + "high_school_european_history": 0.7134146341463414, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6363636363636364, + "medical_genetics": 0.6060606060606061, + "high_school_us_history": 0.7142857142857143, + "high_school_physics": 0.28, + "high_school_world_history": 0.6991525423728814, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.5907172995780591, + "econometrics": 0.4247787610619469, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.6472491909385113, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.4483985765124555, + "philosophy": 0.6193548387096774, + "professional_medicine": 0.6088560885608856, + "nutrition": 0.6098360655737705, + "global_facts": 0.36363636363636365, + "machine_learning": 0.3783783783783784, + "security_studies": 0.6147540983606558, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.5859247135842881, + "prehistory": 0.6130030959752322, + "anatomy": 0.5597014925373134, + "human_sexuality": 0.5615384615384615, + "college_medicine": 0.563953488372093, + "high_school_government_and_politics": 0.78125, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.691358024691358, + "high_school_geography": 0.6852791878172588, + "elementary_mathematics": 0.46419098143236076, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.7775735294117647, + "formal_logic": 0.416, + "high_school_statistics": 0.4883720930232558, + "international_law": 0.675, + "high_school_mathematics": 0.3048327137546468, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.5256410256410257, + "miscellaneous": 0.7621483375959079, + "high_school_chemistry": 0.400990099009901, + "marketing": 0.8111587982832618, + "professional_law": 0.3926940639269406, + "management": 0.7352941176470589, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.7941176470588235, + "sociology": 0.775, + "us_foreign_policy": 0.7575757575757576, + "high_school_macroeconomics": 0.5218508997429306, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.2695749440715884, + "moral_disputes": 0.6202898550724638, + "electrical_engineering": 0.4861111111111111, + "astronomy": 0.6225165562913907, + "college_biology": 0.6153846153846154 + } + }, + "prompt_2": { + "accuracy": 0.5608151590990347, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.6363636363636364, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7093596059113301, + "high_school_physics": 0.32, + "high_school_world_history": 0.7245762711864406, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.6329113924050633, + "econometrics": 0.37168141592920356, + "college_computer_science": 0.46464646464646464, + "high_school_biology": 0.6601941747572816, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.4306049822064057, + "philosophy": 0.6193548387096774, + "professional_medicine": 0.6199261992619927, + "nutrition": 0.6229508196721312, + "global_facts": 0.35353535353535354, + "machine_learning": 0.38738738738738737, + "security_studies": 0.6147540983606558, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.5711947626841244, + "prehistory": 0.6253869969040248, + "anatomy": 0.5597014925373134, + "human_sexuality": 0.6153846153846154, + "college_medicine": 0.5813953488372093, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.7157360406091371, + "elementary_mathematics": 0.41909814323607425, + "human_aging": 0.6216216216216216, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.7647058823529411, + "formal_logic": 0.44, + "high_school_statistics": 0.4604651162790698, + "international_law": 0.7333333333333333, + "high_school_mathematics": 0.31226765799256506, + "high_school_computer_science": 0.6565656565656566, + "conceptual_physics": 0.49572649572649574, + "miscellaneous": 0.7736572890025576, + "high_school_chemistry": 0.42574257425742573, + "marketing": 0.8283261802575107, + "professional_law": 0.4024787997390737, + "management": 0.7254901960784313, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.8176470588235294, + "sociology": 0.785, + "us_foreign_policy": 0.7777777777777778, + "high_school_macroeconomics": 0.519280205655527, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.26733780760626397, + "moral_disputes": 0.6028985507246377, + "electrical_engineering": 0.5486111111111112, + "astronomy": 0.6225165562913907, + "college_biology": 0.6363636363636364 + } + }, + "prompt_3": { + "accuracy": 0.5555952806578477, + "category_acc": { + "high_school_european_history": 0.7317073170731707, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.6325757575757576, + "medical_genetics": 0.6868686868686869, + "high_school_us_history": 0.7339901477832512, + "high_school_physics": 0.32, + "high_school_world_history": 0.7457627118644068, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.5738396624472574, + "econometrics": 0.3893805309734513, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.6440129449838188, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.4377224199288256, + "philosophy": 0.6193548387096774, + "professional_medicine": 0.6309963099630996, + "nutrition": 0.6, + "global_facts": 0.3333333333333333, + "machine_learning": 0.3783783783783784, + "security_studies": 0.5655737704918032, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.5728314238952537, + "prehistory": 0.6439628482972136, + "anatomy": 0.5522388059701493, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.5523255813953488, + "high_school_government_and_politics": 0.8229166666666666, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.7157360406091371, + "elementary_mathematics": 0.4350132625994695, + "human_aging": 0.6036036036036037, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7702205882352942, + "formal_logic": 0.424, + "high_school_statistics": 0.5023255813953489, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.4829059829059829, + "miscellaneous": 0.7621483375959079, + "high_school_chemistry": 0.4306930693069307, + "marketing": 0.8326180257510729, + "professional_law": 0.40313111545988256, + "management": 0.7549019607843137, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.616822429906542, + "world_religions": 0.8, + "sociology": 0.76, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.5115681233933161, + "computer_security": 0.6363636363636364, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.6347826086956522, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.609271523178808, + "college_biology": 0.6783216783216783 + } + }, + "prompt_4": { + "accuracy": 0.5558097962102252, + "category_acc": { + "high_school_european_history": 0.7195121951219512, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.6628787878787878, + "medical_genetics": 0.6666666666666666, + "high_school_us_history": 0.7241379310344828, + "high_school_physics": 0.34, + "high_school_world_history": 0.7415254237288136, + "virology": 0.4666666666666667, + "high_school_microeconomics": 0.6118143459915611, + "econometrics": 0.3893805309734513, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.6796116504854369, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.4412811387900356, + "philosophy": 0.6129032258064516, + "professional_medicine": 0.6531365313653137, + "nutrition": 0.6, + "global_facts": 0.31313131313131315, + "machine_learning": 0.4144144144144144, + "security_studies": 0.5983606557377049, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.5613747954173486, + "prehistory": 0.653250773993808, + "anatomy": 0.5373134328358209, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.563953488372093, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.6954314720812182, + "elementary_mathematics": 0.4535809018567639, + "human_aging": 0.6261261261261262, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7628676470588235, + "formal_logic": 0.384, + "high_school_statistics": 0.4558139534883721, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.47863247863247865, + "miscellaneous": 0.7634271099744245, + "high_school_chemistry": 0.4405940594059406, + "marketing": 0.8197424892703863, + "professional_law": 0.40117416829745595, + "management": 0.7352941176470589, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.7823529411764706, + "sociology": 0.755, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.48586118251928023, + "computer_security": 0.7272727272727273, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.6405797101449275, + "electrical_engineering": 0.5277777777777778, + "astronomy": 0.5960264900662252, + "college_biology": 0.6503496503496503 + } + }, + "prompt_5": { + "accuracy": 0.5550947443689668, + "category_acc": { + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6590909090909091, + "medical_genetics": 0.6666666666666666, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.7542372881355932, + "virology": 0.4484848484848485, + "high_school_microeconomics": 0.6413502109704642, + "econometrics": 0.4336283185840708, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.6796116504854369, + "abstract_algebra": 0.36363636363636365, + "professional_accounting": 0.43416370106761565, + "philosophy": 0.6129032258064516, + "professional_medicine": 0.6457564575645757, + "nutrition": 0.6229508196721312, + "global_facts": 0.37373737373737376, + "machine_learning": 0.38738738738738737, + "security_studies": 0.5778688524590164, + "public_relations": 0.6055045871559633, + "professional_psychology": 0.5531914893617021, + "prehistory": 0.653250773993808, + "anatomy": 0.5895522388059702, + "human_sexuality": 0.6076923076923076, + "college_medicine": 0.5813953488372093, + "high_school_government_and_politics": 0.8020833333333334, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.7222222222222222, + "high_school_geography": 0.6954314720812182, + "elementary_mathematics": 0.41909814323607425, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7738970588235294, + "formal_logic": 0.376, + "high_school_statistics": 0.44651162790697674, + "international_law": 0.7166666666666667, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.47863247863247865, + "miscellaneous": 0.7493606138107417, + "high_school_chemistry": 0.4306930693069307, + "marketing": 0.8197424892703863, + "professional_law": 0.395955642530985, + "management": 0.7058823529411765, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.7941176470588235, + "sociology": 0.78, + "us_foreign_policy": 0.7575757575757576, + "high_school_macroeconomics": 0.5244215938303342, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.2662192393736018, + "moral_disputes": 0.6, + "electrical_engineering": 0.5347222222222222, + "astronomy": 0.5695364238410596, + "college_biology": 0.6083916083916084 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4011887072808321 + }, + "prompt_2": { + "accuracy": 0.40936106983655274 + }, + "prompt_3": { + "accuracy": 0.4034175334323923 + }, + "prompt_4": { + "accuracy": 0.4034175334323923 + }, + "prompt_5": { + "accuracy": 0.37964338781575035 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41033623910336237, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.5476190476190477, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.5238095238095238, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.375, + "high_school_chemistry": 0.125, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.38333333333333336, + "business_administration": 0.23684210526315788, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.5517241379310345, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.375, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.6666666666666666, + "logic": 0.4444444444444444, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.375, + "high_school_history": 0.52, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.40384615384615385, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.5098039215686274, + "accountant": 0.4074074074074074, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.3888888888888889, + "physician": 0.3888888888888889 + } + }, + "prompt_2": { + "accuracy": 0.41033623910336237, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.6666666666666666, + "computer_architecture": 0.5384615384615384, + "college_programming": 0.5476190476190477, + "college_physics": 0.375, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.5, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.36666666666666664, + "business_administration": 0.2894736842105263, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.5, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.4642857142857143, + "ideological_and_moral_cultivation": 0.6666666666666666, + "logic": 0.48148148148148145, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.5588235294117647, + "legal_professional": 0.5, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.56, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.40384615384615385, + "sports_science": 0.375, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.47058823529411764, + "accountant": 0.4444444444444444, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.35185185185185186, + "physician": 0.35185185185185186 + } + }, + "prompt_3": { + "accuracy": 0.4090909090909091, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.5, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.56, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.36666666666666664, + "business_administration": 0.3684210526315789, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.4074074074074074, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.5, + "high_school_chinese": 0.25, + "high_school_history": 0.6, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.4807692307692308, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.3888888888888889, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.3148148148148148, + "physician": 0.37037037037037035 + } + }, + "prompt_4": { + "accuracy": 0.3929016189290162, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.5952380952380952, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.47619047619047616, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.125, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.38333333333333336, + "business_administration": 0.23684210526315788, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.5, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.625, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.7083333333333334, + "logic": 0.4444444444444444, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.5, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.375, + "high_school_history": 0.6, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.4230769230769231, + "sports_science": 0.375, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.375, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.4444444444444444, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.3148148148148148, + "physician": 0.2962962962962963 + } + }, + "prompt_5": { + "accuracy": 0.3991282689912827, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.5384615384615384, + "college_programming": 0.5476190476190477, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.10344827586206896, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.25, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.36666666666666664, + "business_administration": 0.2894736842105263, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.37037037037037035, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.42857142857142855, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.375, + "high_school_history": 0.6, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.46153846153846156, + "sports_science": 0.3333333333333333, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.49019607843137253, + "accountant": 0.3888888888888889, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.3148148148148148, + "physician": 0.35185185185185186 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4229390681003584 + }, + "prompt_2": { + "accuracy": 0.3870967741935484 + }, + "prompt_3": { + "accuracy": 0.3655913978494624 + }, + "prompt_4": { + "accuracy": 0.34767025089605735 + }, + "prompt_5": { + "accuracy": 0.3655913978494624 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.40580210671732, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.25, + "ancient_chinese": 0.2865853658536585, + "arts": 0.375, + "astronomy": 0.3333333333333333, + "business_ethics": 0.4880382775119617, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.549618320610687, + "chinese_food_culture": 0.3235294117647059, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.42724458204334365, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.35443037974683544, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.2777777777777778, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.38461538461538464, + "computer_science": 0.49019607843137253, + "computer_security": 0.5789473684210527, + "conceptual_physics": 0.4489795918367347, + "construction_project_management": 0.4244604316546763, + "economics": 0.4591194968553459, + "education": 0.4601226993865031, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.3686868686868687, + "elementary_information_and_technology": 0.7016806722689075, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.3037037037037037, + "food_science": 0.3776223776223776, + "genetics": 0.3352272727272727, + "global_facts": 0.3959731543624161, + "high_school_biology": 0.33727810650887574, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3728813559322034, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.3, + "high_school_politics": 0.3916083916083916, + "human_sexuality": 0.4523809523809524, + "international_law": 0.3945945945945946, + "journalism": 0.4418604651162791, + "jurisprudence": 0.43795620437956206, + "legal_and_moral_basis": 0.6869158878504673, + "logical": 0.45528455284552843, + "machine_learning": 0.45081967213114754, + "management": 0.5095238095238095, + "marketing": 0.5555555555555556, + "marxist_theory": 0.42857142857142855, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.43448275862068964, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.45714285714285713, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.41810344827586204, + "public_relations": 0.45977011494252873, + "security_study": 0.362962962962963, + "sociology": 0.4778761061946903, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.34054054054054056, + "virology": 0.48520710059171596, + "world_history": 0.4720496894409938, + "world_religions": 0.4125 + } + }, + "prompt_2": { + "accuracy": 0.4041616301156968, + "category_acc": { + "agronomy": 0.38461538461538464, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2804878048780488, + "arts": 0.39375, + "astronomy": 0.3575757575757576, + "business_ethics": 0.4880382775119617, + "chinese_civil_service_exam": 0.4125, + "chinese_driving_rule": 0.5648854961832062, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.4485981308411215, + "chinese_history": 0.4520123839009288, + "chinese_literature": 0.3480392156862745, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.3206751054852321, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.4485981308411215, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.2777777777777778, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.3867924528301887, + "college_medicine": 0.3553113553113553, + "computer_science": 0.47549019607843135, + "computer_security": 0.5789473684210527, + "conceptual_physics": 0.41496598639455784, + "construction_project_management": 0.37410071942446044, + "economics": 0.46540880503144655, + "education": 0.4110429447852761, + "electrical_engineering": 0.45348837209302323, + "elementary_chinese": 0.27380952380952384, + "elementary_commonsense": 0.35858585858585856, + "elementary_information_and_technology": 0.634453781512605, + "elementary_mathematics": 0.2956521739130435, + "ethnology": 0.3925925925925926, + "food_science": 0.36363636363636365, + "genetics": 0.3352272727272727, + "global_facts": 0.4228187919463087, + "high_school_biology": 0.34911242603550297, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.40559440559440557, + "human_sexuality": 0.46825396825396826, + "international_law": 0.40540540540540543, + "journalism": 0.4127906976744186, + "jurisprudence": 0.43309002433090027, + "legal_and_moral_basis": 0.719626168224299, + "logical": 0.45528455284552843, + "machine_learning": 0.3524590163934426, + "management": 0.5142857142857142, + "marketing": 0.55, + "marxist_theory": 0.42328042328042326, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.3931034482758621, + "philosophy": 0.45714285714285713, + "professional_accounting": 0.4514285714285714, + "professional_law": 0.3412322274881517, + "professional_medicine": 0.2898936170212766, + "professional_psychology": 0.3922413793103448, + "public_relations": 0.4885057471264368, + "security_study": 0.4222222222222222, + "sociology": 0.4911504424778761, + "sports_science": 0.4484848484848485, + "traditional_chinese_medicine": 0.31351351351351353, + "virology": 0.4911242603550296, + "world_history": 0.4472049689440994, + "world_religions": 0.38125 + } + }, + "prompt_3": { + "accuracy": 0.40191676739768606, + "category_acc": { + "agronomy": 0.3727810650887574, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.2682926829268293, + "arts": 0.3625, + "astronomy": 0.3333333333333333, + "business_ethics": 0.4880382775119617, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.5419847328244275, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.514018691588785, + "chinese_history": 0.4458204334365325, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.3206751054852321, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.4392523364485981, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.26851851851851855, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.3553113553113553, + "computer_science": 0.49019607843137253, + "computer_security": 0.543859649122807, + "conceptual_physics": 0.41496598639455784, + "construction_project_management": 0.41007194244604317, + "economics": 0.44654088050314467, + "education": 0.4110429447852761, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.21031746031746032, + "elementary_commonsense": 0.3484848484848485, + "elementary_information_and_technology": 0.6302521008403361, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.32592592592592595, + "food_science": 0.38461538461538464, + "genetics": 0.3522727272727273, + "global_facts": 0.4161073825503356, + "high_school_biology": 0.38461538461538464, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3813559322033898, + "high_school_mathematics": 0.3353658536585366, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.4405594405594406, + "human_sexuality": 0.42857142857142855, + "international_law": 0.40540540540540543, + "journalism": 0.45930232558139533, + "jurisprudence": 0.4282238442822384, + "legal_and_moral_basis": 0.6682242990654206, + "logical": 0.43902439024390244, + "machine_learning": 0.4098360655737705, + "management": 0.5, + "marketing": 0.5277777777777778, + "marxist_theory": 0.43386243386243384, + "modern_chinese": 0.3103448275862069, + "nutrition": 0.4068965517241379, + "philosophy": 0.4380952380952381, + "professional_accounting": 0.4514285714285714, + "professional_law": 0.3412322274881517, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.39655172413793105, + "public_relations": 0.46551724137931033, + "security_study": 0.4, + "sociology": 0.47345132743362833, + "sports_science": 0.4484848484848485, + "traditional_chinese_medicine": 0.31351351351351353, + "virology": 0.4378698224852071, + "world_history": 0.45962732919254656, + "world_religions": 0.43125 + } + }, + "prompt_4": { + "accuracy": 0.39777240545674325, + "category_acc": { + "agronomy": 0.31952662721893493, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.29878048780487804, + "arts": 0.425, + "astronomy": 0.3575757575757576, + "business_ethics": 0.4784688995215311, + "chinese_civil_service_exam": 0.4, + "chinese_driving_rule": 0.549618320610687, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.4953271028037383, + "chinese_history": 0.43962848297213625, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.29535864978902954, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.330188679245283, + "college_law": 0.26851851851851855, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.3516483516483517, + "computer_science": 0.4950980392156863, + "computer_security": 0.5380116959064327, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.4460431654676259, + "economics": 0.4528301886792453, + "education": 0.4171779141104294, + "electrical_engineering": 0.4186046511627907, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.3888888888888889, + "elementary_information_and_technology": 0.6512605042016807, + "elementary_mathematics": 0.30434782608695654, + "ethnology": 0.37037037037037035, + "food_science": 0.3356643356643357, + "genetics": 0.3352272727272727, + "global_facts": 0.348993288590604, + "high_school_biology": 0.39644970414201186, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.35664335664335667, + "human_sexuality": 0.4126984126984127, + "international_law": 0.3891891891891892, + "journalism": 0.4127906976744186, + "jurisprudence": 0.41849148418491483, + "legal_and_moral_basis": 0.6728971962616822, + "logical": 0.4065040650406504, + "machine_learning": 0.39344262295081966, + "management": 0.5142857142857142, + "marketing": 0.5222222222222223, + "marxist_theory": 0.42328042328042326, + "modern_chinese": 0.29310344827586204, + "nutrition": 0.3724137931034483, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.42857142857142855, + "professional_law": 0.32701421800947866, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.3922413793103448, + "public_relations": 0.4482758620689655, + "security_study": 0.4, + "sociology": 0.4469026548672566, + "sports_science": 0.36363636363636365, + "traditional_chinese_medicine": 0.32972972972972975, + "virology": 0.4556213017751479, + "world_history": 0.4658385093167702, + "world_religions": 0.45625 + } + }, + "prompt_5": { + "accuracy": 0.4060611293386289, + "category_acc": { + "agronomy": 0.40236686390532544, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2804878048780488, + "arts": 0.36875, + "astronomy": 0.32727272727272727, + "business_ethics": 0.45454545454545453, + "chinese_civil_service_exam": 0.40625, + "chinese_driving_rule": 0.5190839694656488, + "chinese_food_culture": 0.3014705882352941, + "chinese_foreign_policy": 0.4766355140186916, + "chinese_history": 0.4241486068111455, + "chinese_literature": 0.3480392156862745, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.35443037974683544, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.4766355140186916, + "college_engineering_hydrology": 0.39622641509433965, + "college_law": 0.3333333333333333, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.3867924528301887, + "college_medicine": 0.3553113553113553, + "computer_science": 0.5049019607843137, + "computer_security": 0.5672514619883041, + "conceptual_physics": 0.40816326530612246, + "construction_project_management": 0.43884892086330934, + "economics": 0.44654088050314467, + "education": 0.36809815950920244, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.2976190476190476, + "elementary_commonsense": 0.3787878787878788, + "elementary_information_and_technology": 0.634453781512605, + "elementary_mathematics": 0.3130434782608696, + "ethnology": 0.3851851851851852, + "food_science": 0.3776223776223776, + "genetics": 0.36363636363636365, + "global_facts": 0.4228187919463087, + "high_school_biology": 0.38461538461538464, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.3728813559322034, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.3986013986013986, + "human_sexuality": 0.4444444444444444, + "international_law": 0.3567567567567568, + "journalism": 0.436046511627907, + "jurisprudence": 0.44038929440389296, + "legal_and_moral_basis": 0.677570093457944, + "logical": 0.44715447154471544, + "machine_learning": 0.4098360655737705, + "management": 0.5142857142857142, + "marketing": 0.5666666666666667, + "marxist_theory": 0.455026455026455, + "modern_chinese": 0.3275862068965517, + "nutrition": 0.4206896551724138, + "philosophy": 0.45714285714285713, + "professional_accounting": 0.4228571428571429, + "professional_law": 0.35545023696682465, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.41810344827586204, + "public_relations": 0.4367816091954023, + "security_study": 0.37777777777777777, + "sociology": 0.4557522123893805, + "sports_science": 0.48484848484848486, + "traditional_chinese_medicine": 0.35135135135135137, + "virology": 0.47928994082840237, + "world_history": 0.484472049689441, + "world_religions": 0.45625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.36363636363636365 + }, + "prompt_3": { + "accuracy": 0.3939393939393939 + }, + "prompt_4": { + "accuracy": 0.3939393939393939 + }, + "prompt_5": { + "accuracy": 0.36363636363636365 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5954545454545455 + }, + "prompt_2": { + "accuracy": 0.575 + }, + "prompt_3": { + "accuracy": 0.5704545454545454 + }, + "prompt_4": { + "accuracy": 0.5704545454545454 + }, + "prompt_5": { + "accuracy": 0.5659090909090909 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5166101694915254 + }, + "prompt_2": { + "accuracy": 0.47627118644067795 + }, + "prompt_3": { + "accuracy": 0.5094915254237288 + }, + "prompt_4": { + "accuracy": 0.48847457627118646 + }, + "prompt_5": { + "accuracy": 0.4942372881355932 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7557965594614809 + }, + "prompt_2": { + "accuracy": 0.7449513836948392 + }, + "prompt_3": { + "accuracy": 0.7535527299925205 + }, + "prompt_4": { + "accuracy": 0.7393418100224383 + }, + "prompt_5": { + "accuracy": 0.7513089005235603 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8539931406173444 + }, + "prompt_2": { + "accuracy": 0.8574228319451249 + }, + "prompt_3": { + "accuracy": 0.8603625673689368 + }, + "prompt_4": { + "accuracy": 0.8471337579617835 + }, + "prompt_5": { + "accuracy": 0.8525232729054385 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3040469125169744, + "rouge2": 0.09866424065188766, + "rougeL": 0.2278472840162937, + "avg_rouge": 0.21018614572838526 + }, + "prompt_2": { + "rouge1": 0.33472951189717925, + "rouge2": 0.11407883250370864, + "rougeL": 0.25326559317233693, + "avg_rouge": 0.2340246458577416 + }, + "prompt_3": { + "rouge1": 0.3162804006362745, + "rouge2": 0.1006042895533365, + "rougeL": 0.23537383337213125, + "avg_rouge": 0.21741950785391406 + }, + "prompt_4": { + "rouge1": 0.31838767829496717, + "rouge2": 0.10449642628258984, + "rougeL": 0.23812650557758017, + "avg_rouge": 0.2203368700517124 + }, + "prompt_5": { + "rouge1": 0.332873520633706, + "rouge2": 0.1067181043307632, + "rougeL": 0.24963186849782357, + "avg_rouge": 0.22974116448743095 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2236131183286011, + "rouge2": 0.051634442822359794, + "rougeL": 0.16275063290754171, + "avg_rouge": 0.14599939801950088 + }, + "prompt_2": { + "rouge1": 0.221654996890681, + "rouge2": 0.050915215740073745, + "rougeL": 0.16117814361439403, + "avg_rouge": 0.1445827854150496 + }, + "prompt_3": { + "rouge1": 0.22108360111145298, + "rouge2": 0.05115688486594595, + "rougeL": 0.16135797359794596, + "avg_rouge": 0.1445328198584483 + }, + "prompt_4": { + "rouge1": 0.22146689356745836, + "rouge2": 0.0501008975416257, + "rougeL": 0.1609780714567062, + "avg_rouge": 0.14418195418859678 + }, + "prompt_5": { + "rouge1": 0.22115858144997075, + "rouge2": 0.051841544464347, + "rougeL": 0.16149699745156773, + "avg_rouge": 0.14483237445529518 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8967889908256881 + }, + "prompt_2": { + "accuracy": 0.9059633027522935 + }, + "prompt_3": { + "accuracy": 0.8944954128440367 + }, + "prompt_4": { + "accuracy": 0.908256880733945 + }, + "prompt_5": { + "accuracy": 0.856651376146789 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8139980824544583 + }, + "prompt_2": { + "accuracy": 0.8120805369127517 + }, + "prompt_3": { + "accuracy": 0.822627037392138 + }, + "prompt_4": { + "accuracy": 0.8092042186001918 + }, + "prompt_5": { + "accuracy": 0.8149568552253116 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6355 + }, + "prompt_2": { + "accuracy": 0.646 + }, + "prompt_3": { + "accuracy": 0.6675 + }, + "prompt_4": { + "accuracy": 0.6165 + }, + "prompt_5": { + "accuracy": 0.6315 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6645 + }, + "prompt_2": { + "accuracy": 0.6695 + }, + "prompt_3": { + "accuracy": 0.6565 + }, + "prompt_4": { + "accuracy": 0.6335 + }, + "prompt_5": { + "accuracy": 0.6465 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8525 + }, + "prompt_2": { + "accuracy": 0.8445 + }, + "prompt_3": { + "accuracy": 0.842 + }, + "prompt_4": { + "accuracy": 0.841 + }, + "prompt_5": { + "accuracy": 0.852 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6338028169014085 + }, + "prompt_2": { + "accuracy": 0.5915492957746479 + }, + "prompt_3": { + "accuracy": 0.6197183098591549 + }, + "prompt_4": { + "accuracy": 0.6056338028169014 + }, + "prompt_5": { + "accuracy": 0.6056338028169014 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6606498194945848 + }, + "prompt_2": { + "accuracy": 0.6570397111913358 + }, + "prompt_3": { + "accuracy": 0.6895306859205776 + }, + "prompt_4": { + "accuracy": 0.703971119133574 + }, + "prompt_5": { + "accuracy": 0.6137184115523465 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7083333333333334 + }, + "prompt_2": { + "accuracy": 0.7892156862745098 + }, + "prompt_3": { + "accuracy": 0.7401960784313726 + }, + "prompt_4": { + "accuracy": 0.7205882352941176 + }, + "prompt_5": { + "accuracy": 0.7426470588235294 + } } }, "five_shot": { @@ -11496,53 +99176,1733 @@ "model_link": "https://huggingface.co/aisingapore/sealion3b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.23904761904761904, + "language_acc": { + "English": 0.25333333333333335, + "Vietnamese": 0.20666666666666667, + "Malay": 0.26666666666666666, + "Indonesian": 0.22666666666666666, + "Spanish": 0.22666666666666666, + "Chinese": 0.23333333333333334, + "Filipino": 0.26 + }, + "consistency_score_2": 0.44126984126984137, + "consistency_score_3": 0.2537142857142857, + "consistency_score_4": 0.171047619047619, + "consistency_score_5": 0.1273015873015873, + "consistency_score_6": 0.10190476190476191, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4666666666666667, + "English,Malay": 0.4533333333333333, + "English,Indonesian": 0.44, + "English,Spanish": 0.41333333333333333, + "English,Chinese": 0.42, + "English,Filipino": 0.5066666666666667, + "Vietnamese,Malay": 0.4066666666666667, + "Vietnamese,Indonesian": 0.46, + "Vietnamese,Spanish": 0.38666666666666666, + "Vietnamese,Chinese": 0.47333333333333333, + "Vietnamese,Filipino": 0.46, + "Malay,Indonesian": 0.58, + "Malay,Spanish": 0.3933333333333333, + "Malay,Chinese": 0.3933333333333333, + "Malay,Filipino": 0.5066666666666667, + "Indonesian,Spanish": 0.41333333333333333, + "Indonesian,Chinese": 0.38666666666666666, + "Indonesian,Filipino": 0.5733333333333334, + "Spanish,Chinese": 0.30666666666666664, + "Spanish,Filipino": 0.36666666666666664, + "Chinese,Filipino": 0.46 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.25333333333333335, + "English,Vietnamese,Indonesian": 0.26666666666666666, + "English,Vietnamese,Spanish": 0.23333333333333334, + "English,Vietnamese,Chinese": 0.26, + "English,Vietnamese,Filipino": 0.2866666666666667, + "English,Malay,Indonesian": 0.3, + "English,Malay,Spanish": 0.22666666666666666, + "English,Malay,Chinese": 0.21333333333333335, + "English,Malay,Filipino": 0.32, + "English,Indonesian,Spanish": 0.25333333333333335, + "English,Indonesian,Chinese": 0.22666666666666666, + "English,Indonesian,Filipino": 0.32666666666666666, + "English,Spanish,Chinese": 0.19333333333333333, + "English,Spanish,Filipino": 0.24666666666666667, + "English,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Malay,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese": 0.21333333333333335, + "Vietnamese,Malay,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Indonesian,Chinese": 0.24, + "Vietnamese,Indonesian,Filipino": 0.3, + "Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Filipino": 0.2, + "Vietnamese,Chinese,Filipino": 0.28, + "Malay,Indonesian,Spanish": 0.26666666666666666, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.4, + "Malay,Spanish,Chinese": 0.17333333333333334, + "Malay,Spanish,Filipino": 0.25333333333333335, + "Malay,Chinese,Filipino": 0.26666666666666666, + "Indonesian,Spanish,Chinese": 0.19333333333333333, + "Indonesian,Spanish,Filipino": 0.26666666666666666, + "Indonesian,Chinese,Filipino": 0.2866666666666667, + "Spanish,Chinese,Filipino": 0.20666666666666667 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.18666666666666668, + "English,Vietnamese,Malay,Spanish": 0.15333333333333332, + "English,Vietnamese,Malay,Chinese": 0.14666666666666667, + "English,Vietnamese,Malay,Filipino": 0.19333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.18, + "English,Vietnamese,Indonesian,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.16, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Chinese,Filipino": 0.2, + "English,Malay,Indonesian,Spanish": 0.16, + "English,Malay,Indonesian,Chinese": 0.16666666666666666, + "English,Malay,Indonesian,Filipino": 0.26666666666666666, + "English,Malay,Spanish,Chinese": 0.12666666666666668, + "English,Malay,Spanish,Filipino": 0.18666666666666668, + "English,Malay,Chinese,Filipino": 0.16, + "English,Indonesian,Spanish,Chinese": 0.14666666666666667, + "English,Indonesian,Spanish,Filipino": 0.19333333333333333, + "English,Indonesian,Chinese,Filipino": 0.18666666666666668, + "English,Spanish,Chinese,Filipino": 0.14, + "Vietnamese,Malay,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Spanish,Chinese": 0.11333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.16666666666666666, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14, + "Vietnamese,Indonesian,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Spanish,Chinese,Filipino": 0.14666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.13333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.20666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.22, + "Malay,Spanish,Chinese,Filipino": 0.14, + "Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.12, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.12666666666666668, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Malay,Spanish,Chinese": 0.10666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.13333333333333333, + "English,Vietnamese,Malay,Chinese,Filipino": 0.12, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.13333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.13333333333333333, + "English,Malay,Indonesian,Spanish,Chinese": 0.1, + "English,Malay,Indonesian,Spanish,Filipino": 0.15333333333333332, + "English,Malay,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Malay,Spanish,Chinese,Filipino": 0.1, + "English,Indonesian,Spanish,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.1, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.1, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.12, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.11333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + } + }, + "AC3_2": 0.31010377081413826, + "AC3_3": 0.24616268148961026, + "AC3_4": 0.19940990418245869, + "AC3_5": 0.16613188078323998, + "AC3_6": 0.14289438676309096, + "AC3_7": 0.1272124755944718 + }, + "prompt_2": { + "overall_acc": 0.2495238095238095, + "language_acc": { + "English": 0.25333333333333335, + "Vietnamese": 0.22666666666666666, + "Malay": 0.22666666666666666, + "Indonesian": 0.26, + "Spanish": 0.24666666666666667, + "Chinese": 0.26, + "Filipino": 0.2733333333333333 + }, + "consistency_score_2": 0.5552380952380952, + "consistency_score_3": 0.38019047619047625, + "consistency_score_4": 0.2828571428571429, + "consistency_score_5": 0.2203174603174603, + "consistency_score_6": 0.17714285714285713, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.56, + "English,Malay": 0.5933333333333334, + "English,Indonesian": 0.5333333333333333, + "English,Spanish": 0.5666666666666667, + "English,Chinese": 0.5333333333333333, + "English,Filipino": 0.6466666666666666, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Indonesian": 0.5533333333333333, + "Vietnamese,Spanish": 0.5266666666666666, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Filipino": 0.58, + "Malay,Indonesian": 0.5733333333333334, + "Malay,Spanish": 0.58, + "Malay,Chinese": 0.54, + "Malay,Filipino": 0.6266666666666667, + "Indonesian,Spanish": 0.5, + "Indonesian,Chinese": 0.4666666666666667, + "Indonesian,Filipino": 0.58, + "Spanish,Chinese": 0.4666666666666667, + "Spanish,Filipino": 0.5866666666666667, + "Chinese,Filipino": 0.5533333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.41333333333333333, + "English,Vietnamese,Indonesian": 0.38666666666666666, + "English,Vietnamese,Spanish": 0.38666666666666666, + "English,Vietnamese,Chinese": 0.35333333333333333, + "English,Vietnamese,Filipino": 0.43333333333333335, + "English,Malay,Indonesian": 0.3933333333333333, + "English,Malay,Spanish": 0.44666666666666666, + "English,Malay,Chinese": 0.36666666666666664, + "English,Malay,Filipino": 0.47333333333333333, + "English,Indonesian,Spanish": 0.36, + "English,Indonesian,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.41333333333333333, + "English,Spanish,Chinese": 0.34, + "English,Spanish,Filipino": 0.44666666666666666, + "English,Chinese,Filipino": 0.41333333333333333, + "Vietnamese,Malay,Indonesian": 0.42, + "Vietnamese,Malay,Spanish": 0.4066666666666667, + "Vietnamese,Malay,Chinese": 0.36, + "Vietnamese,Malay,Filipino": 0.43333333333333335, + "Vietnamese,Indonesian,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Spanish,Chinese": 0.3, + "Vietnamese,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Chinese,Filipino": 0.35333333333333333, + "Malay,Indonesian,Spanish": 0.36666666666666664, + "Malay,Indonesian,Chinese": 0.3333333333333333, + "Malay,Indonesian,Filipino": 0.4266666666666667, + "Malay,Spanish,Chinese": 0.34, + "Malay,Spanish,Filipino": 0.4533333333333333, + "Malay,Chinese,Filipino": 0.38666666666666666, + "Indonesian,Spanish,Chinese": 0.29333333333333333, + "Indonesian,Spanish,Filipino": 0.36666666666666664, + "Indonesian,Chinese,Filipino": 0.3333333333333333, + "Spanish,Chinese,Filipino": 0.36666666666666664 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.31333333333333335, + "English,Vietnamese,Malay,Spanish": 0.34, + "English,Vietnamese,Malay,Chinese": 0.26666666666666666, + "English,Vietnamese,Malay,Filipino": 0.34, + "English,Vietnamese,Indonesian,Spanish": 0.2733333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.25333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.32, + "English,Vietnamese,Spanish,Chinese": 0.24666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.30666666666666664, + "English,Vietnamese,Chinese,Filipino": 0.29333333333333333, + "English,Malay,Indonesian,Spanish": 0.3, + "English,Malay,Indonesian,Chinese": 0.26, + "English,Malay,Indonesian,Filipino": 0.34, + "English,Malay,Spanish,Chinese": 0.2733333333333333, + "English,Malay,Spanish,Filipino": 0.37333333333333335, + "English,Malay,Chinese,Filipino": 0.32, + "English,Indonesian,Spanish,Chinese": 0.22666666666666666, + "English,Indonesian,Spanish,Filipino": 0.3, + "English,Indonesian,Chinese,Filipino": 0.2733333333333333, + "English,Spanish,Chinese,Filipino": 0.3, + "Vietnamese,Malay,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.26, + "Vietnamese,Malay,Indonesian,Filipino": 0.32666666666666666, + "Vietnamese,Malay,Spanish,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Spanish,Filipino": 0.32, + "Vietnamese,Malay,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish,Chinese": 0.21333333333333335, + "Vietnamese,Indonesian,Spanish,Filipino": 0.26, + "Vietnamese,Indonesian,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Spanish,Chinese,Filipino": 0.25333333333333335, + "Malay,Indonesian,Spanish,Chinese": 0.22, + "Malay,Indonesian,Spanish,Filipino": 0.3, + "Malay,Indonesian,Chinese,Filipino": 0.26, + "Malay,Spanish,Chinese,Filipino": 0.28, + "Indonesian,Spanish,Chinese,Filipino": 0.22 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.24, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.21333333333333335, + "English,Vietnamese,Malay,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Malay,Chinese,Filipino": 0.24, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.22, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.22, + "English,Malay,Indonesian,Spanish,Chinese": 0.18666666666666668, + "English,Malay,Indonesian,Spanish,Filipino": 0.26, + "English,Malay,Indonesian,Chinese,Filipino": 0.23333333333333334, + "English,Malay,Spanish,Chinese,Filipino": 0.24666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.2, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.18, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.18 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.15333333333333332, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.20666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.19333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.15333333333333332 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667 + } + }, + "AC3_2": 0.3443133276551586, + "AC3_3": 0.301300377332385, + "AC3_4": 0.2651469460273747, + "AC3_5": 0.23401286996306317, + "AC3_6": 0.20719387750245932, + "AC3_7": 0.18474358969695973 + }, + "prompt_3": { + "overall_acc": 0.2638095238095238, + "language_acc": { + "English": 0.26, + "Vietnamese": 0.22666666666666666, + "Malay": 0.26666666666666666, + "Indonesian": 0.26, + "Spanish": 0.25333333333333335, + "Chinese": 0.2866666666666667, + "Filipino": 0.29333333333333333 + }, + "consistency_score_2": 0.5980952380952381, + "consistency_score_3": 0.44228571428571434, + "consistency_score_4": 0.34971428571428564, + "consistency_score_5": 0.2866666666666666, + "consistency_score_6": 0.24095238095238097, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5866666666666667, + "English,Malay": 0.5933333333333334, + "English,Indonesian": 0.64, + "English,Spanish": 0.5466666666666666, + "English,Chinese": 0.5133333333333333, + "English,Filipino": 0.64, + "Vietnamese,Malay": 0.58, + "Vietnamese,Indonesian": 0.6, + "Vietnamese,Spanish": 0.5666666666666667, + "Vietnamese,Chinese": 0.5733333333333334, + "Vietnamese,Filipino": 0.6066666666666667, + "Malay,Indonesian": 0.7333333333333333, + "Malay,Spanish": 0.56, + "Malay,Chinese": 0.5533333333333333, + "Malay,Filipino": 0.7266666666666667, + "Indonesian,Spanish": 0.5666666666666667, + "Indonesian,Chinese": 0.5733333333333334, + "Indonesian,Filipino": 0.7266666666666667, + "Spanish,Chinese": 0.49333333333333335, + "Spanish,Filipino": 0.5866666666666667, + "Chinese,Filipino": 0.5933333333333334 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.43333333333333335, + "English,Vietnamese,Indonesian": 0.4533333333333333, + "English,Vietnamese,Spanish": 0.41333333333333333, + "English,Vietnamese,Chinese": 0.3933333333333333, + "English,Vietnamese,Filipino": 0.4666666666666667, + "English,Malay,Indonesian": 0.5066666666666667, + "English,Malay,Spanish": 0.4066666666666667, + "English,Malay,Chinese": 0.37333333333333335, + "English,Malay,Filipino": 0.5133333333333333, + "English,Indonesian,Spanish": 0.43333333333333335, + "English,Indonesian,Chinese": 0.3933333333333333, + "English,Indonesian,Filipino": 0.52, + "English,Spanish,Chinese": 0.3466666666666667, + "English,Spanish,Filipino": 0.44, + "English,Chinese,Filipino": 0.3933333333333333, + "Vietnamese,Malay,Indonesian": 0.49333333333333335, + "Vietnamese,Malay,Spanish": 0.41333333333333333, + "Vietnamese,Malay,Chinese": 0.4066666666666667, + "Vietnamese,Malay,Filipino": 0.49333333333333335, + "Vietnamese,Indonesian,Spanish": 0.41333333333333333, + "Vietnamese,Indonesian,Chinese": 0.42, + "Vietnamese,Indonesian,Filipino": 0.48, + "Vietnamese,Spanish,Chinese": 0.38666666666666666, + "Vietnamese,Spanish,Filipino": 0.44666666666666666, + "Vietnamese,Chinese,Filipino": 0.44, + "Malay,Indonesian,Spanish": 0.48, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Indonesian,Filipino": 0.6133333333333333, + "Malay,Spanish,Chinese": 0.36666666666666664, + "Malay,Spanish,Filipino": 0.48, + "Malay,Chinese,Filipino": 0.48, + "Indonesian,Spanish,Chinese": 0.37333333333333335, + "Indonesian,Spanish,Filipino": 0.47333333333333333, + "Indonesian,Chinese,Filipino": 0.4666666666666667, + "Spanish,Chinese,Filipino": 0.4066666666666667 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.38, + "English,Vietnamese,Malay,Spanish": 0.32666666666666666, + "English,Vietnamese,Malay,Chinese": 0.3, + "English,Vietnamese,Malay,Filipino": 0.38666666666666666, + "English,Vietnamese,Indonesian,Spanish": 0.3333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.32, + "English,Vietnamese,Indonesian,Filipino": 0.38666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.29333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.3466666666666667, + "English,Vietnamese,Chinese,Filipino": 0.32666666666666666, + "English,Malay,Indonesian,Spanish": 0.37333333333333335, + "English,Malay,Indonesian,Chinese": 0.3333333333333333, + "English,Malay,Indonesian,Filipino": 0.44666666666666666, + "English,Malay,Spanish,Chinese": 0.2733333333333333, + "English,Malay,Spanish,Filipino": 0.36666666666666664, + "English,Malay,Chinese,Filipino": 0.3333333333333333, + "English,Indonesian,Spanish,Chinese": 0.2866666666666667, + "English,Indonesian,Spanish,Filipino": 0.36666666666666664, + "English,Indonesian,Chinese,Filipino": 0.34, + "English,Spanish,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.4266666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Spanish,Filipino": 0.38, + "Vietnamese,Malay,Chinese,Filipino": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish,Chinese": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish,Filipino": 0.36, + "Vietnamese,Indonesian,Chinese,Filipino": 0.36, + "Vietnamese,Spanish,Chinese,Filipino": 0.35333333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.32666666666666666, + "Malay,Indonesian,Spanish,Filipino": 0.4266666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.4066666666666667, + "Malay,Spanish,Chinese,Filipino": 0.3466666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.3333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.29333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.2733333333333333, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.34, + "English,Vietnamese,Malay,Spanish,Chinese": 0.24, + "English,Vietnamese,Malay,Spanish,Filipino": 0.3, + "English,Vietnamese,Malay,Chinese,Filipino": 0.2733333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.24666666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.28, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.26, + "English,Malay,Indonesian,Spanish,Chinese": 0.25333333333333335, + "English,Malay,Indonesian,Spanish,Filipino": 0.3333333333333333, + "English,Malay,Indonesian,Chinese,Filipino": 0.3, + "English,Malay,Spanish,Chinese,Filipino": 0.25333333333333335, + "English,Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.32, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.3, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.29333333333333333, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.22, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.26666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.24666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.22666666666666666, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.26666666666666666 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.20666666666666667 + } + }, + "AC3_2": 0.36612680869206476, + "AC3_3": 0.33049134836739164, + "AC3_4": 0.3007477712731073, + "AC3_5": 0.27476355242990164, + "AC3_6": 0.2518634321154215, + "AC3_7": 0.23176788119230304 + }, + "prompt_4": { + "overall_acc": 0.2380952380952381, + "language_acc": { + "English": 0.23333333333333334, + "Vietnamese": 0.23333333333333334, + "Malay": 0.24, + "Indonesian": 0.21333333333333335, + "Spanish": 0.24666666666666667, + "Chinese": 0.25333333333333335, + "Filipino": 0.24666666666666667 + }, + "consistency_score_2": 0.5114285714285715, + "consistency_score_3": 0.3257142857142858, + "consistency_score_4": 0.22685714285714284, + "consistency_score_5": 0.16507936507936508, + "consistency_score_6": 0.12095238095238095, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5333333333333333, + "English,Malay": 0.52, + "English,Indonesian": 0.5666666666666667, + "English,Spanish": 0.44, + "English,Chinese": 0.48, + "English,Filipino": 0.58, + "Vietnamese,Malay": 0.44, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish": 0.4066666666666667, + "Vietnamese,Chinese": 0.5733333333333334, + "Vietnamese,Filipino": 0.44, + "Malay,Indonesian": 0.7666666666666667, + "Malay,Spanish": 0.42, + "Malay,Chinese": 0.41333333333333333, + "Malay,Filipino": 0.7066666666666667, + "Indonesian,Spanish": 0.42, + "Indonesian,Chinese": 0.4866666666666667, + "Indonesian,Filipino": 0.6933333333333334, + "Spanish,Chinese": 0.44, + "Spanish,Filipino": 0.44666666666666666, + "Chinese,Filipino": 0.4533333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.3333333333333333, + "English,Vietnamese,Indonesian": 0.37333333333333335, + "English,Vietnamese,Spanish": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.36, + "English,Vietnamese,Filipino": 0.34, + "English,Malay,Indonesian": 0.4533333333333333, + "English,Malay,Spanish": 0.26666666666666666, + "English,Malay,Chinese": 0.2733333333333333, + "English,Malay,Filipino": 0.4533333333333333, + "English,Indonesian,Spanish": 0.2733333333333333, + "English,Indonesian,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.44, + "English,Spanish,Chinese": 0.25333333333333335, + "English,Spanish,Filipino": 0.3, + "English,Chinese,Filipino": 0.32, + "Vietnamese,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Malay,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese": 0.26666666666666666, + "Vietnamese,Malay,Filipino": 0.34, + "Vietnamese,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Indonesian,Chinese": 0.3333333333333333, + "Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Vietnamese,Chinese,Filipino": 0.29333333333333333, + "Malay,Indonesian,Spanish": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.37333333333333335, + "Malay,Indonesian,Filipino": 0.6066666666666667, + "Malay,Spanish,Chinese": 0.23333333333333334, + "Malay,Spanish,Filipino": 0.3333333333333333, + "Malay,Chinese,Filipino": 0.3333333333333333, + "Indonesian,Spanish,Chinese": 0.25333333333333335, + "Indonesian,Spanish,Filipino": 0.32, + "Indonesian,Chinese,Filipino": 0.35333333333333333, + "Spanish,Chinese,Filipino": 0.25333333333333335 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.3, + "English,Vietnamese,Malay,Spanish": 0.16666666666666666, + "English,Vietnamese,Malay,Chinese": 0.20666666666666667, + "English,Vietnamese,Malay,Filipino": 0.28, + "English,Vietnamese,Indonesian,Spanish": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese": 0.26, + "English,Vietnamese,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Spanish,Chinese": 0.16666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.17333333333333334, + "English,Vietnamese,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Indonesian,Spanish": 0.23333333333333334, + "English,Malay,Indonesian,Chinese": 0.25333333333333335, + "English,Malay,Indonesian,Filipino": 0.4, + "English,Malay,Spanish,Chinese": 0.14666666666666667, + "English,Malay,Spanish,Filipino": 0.23333333333333334, + "English,Malay,Chinese,Filipino": 0.24666666666666667, + "English,Indonesian,Spanish,Chinese": 0.17333333333333334, + "English,Indonesian,Spanish,Filipino": 0.22666666666666666, + "English,Indonesian,Chinese,Filipino": 0.25333333333333335, + "English,Spanish,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Spanish,Chinese": 0.16, + "Vietnamese,Malay,Spanish,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Spanish,Chinese": 0.18, + "Vietnamese,Indonesian,Spanish,Filipino": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.18, + "Malay,Indonesian,Spanish,Chinese": 0.20666666666666667, + "Malay,Indonesian,Spanish,Filipino": 0.29333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.31333333333333335, + "Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.2 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.15333333333333332, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.2, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Spanish,Chinese": 0.1, + "English,Vietnamese,Malay,Spanish,Filipino": 0.14, + "English,Vietnamese,Malay,Chinese,Filipino": 0.18, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.14, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.19333333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.12, + "English,Malay,Indonesian,Spanish,Chinese": 0.13333333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.20666666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Spanish,Chinese,Filipino": 0.14, + "English,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.14, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.18 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.17333333333333334, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.1, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.13333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + } + }, + "AC3_2": 0.3249228534690836, + "AC3_3": 0.2750965250477327, + "AC3_4": 0.232340375674264, + "AC3_5": 0.1949756279981341, + "AC3_6": 0.1604142983006551, + "AC3_7": 0.12707722381228806 + }, + "prompt_5": { + "overall_acc": 0.26285714285714284, + "language_acc": { + "English": 0.22666666666666666, + "Vietnamese": 0.26, + "Malay": 0.2733333333333333, + "Indonesian": 0.22666666666666666, + "Spanish": 0.2866666666666667, + "Chinese": 0.28, + "Filipino": 0.2866666666666667 + }, + "consistency_score_2": 0.4933333333333334, + "consistency_score_3": 0.30380952380952386, + "consistency_score_4": 0.2015238095238095, + "consistency_score_5": 0.1365079365079365, + "consistency_score_6": 0.09142857142857144, + "consistency_score_7": 0.06, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4266666666666667, + "English,Malay": 0.5066666666666667, + "English,Indonesian": 0.5266666666666666, + "English,Spanish": 0.5266666666666666, + "English,Chinese": 0.47333333333333333, + "English,Filipino": 0.54, + "Vietnamese,Malay": 0.36666666666666664, + "Vietnamese,Indonesian": 0.41333333333333333, + "Vietnamese,Spanish": 0.3333333333333333, + "Vietnamese,Chinese": 0.48, + "Vietnamese,Filipino": 0.36666666666666664, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Spanish": 0.54, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.62, + "Indonesian,Spanish": 0.5533333333333333, + "Indonesian,Chinese": 0.5266666666666666, + "Indonesian,Filipino": 0.58, + "Spanish,Chinese": 0.46, + "Spanish,Filipino": 0.5533333333333333, + "Chinese,Filipino": 0.4666666666666667 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.26, + "English,Vietnamese,Spanish": 0.21333333333333335, + "English,Vietnamese,Chinese": 0.26, + "English,Vietnamese,Filipino": 0.25333333333333335, + "English,Malay,Indonesian": 0.37333333333333335, + "English,Malay,Spanish": 0.3333333333333333, + "English,Malay,Chinese": 0.3, + "English,Malay,Filipino": 0.4, + "English,Indonesian,Spanish": 0.35333333333333333, + "English,Indonesian,Chinese": 0.3333333333333333, + "English,Indonesian,Filipino": 0.38666666666666666, + "English,Spanish,Chinese": 0.2866666666666667, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Chinese": 0.24, + "Vietnamese,Malay,Filipino": 0.23333333333333334, + "Vietnamese,Indonesian,Spanish": 0.22, + "Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Indonesian,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Chinese": 0.22, + "Vietnamese,Spanish,Filipino": 0.21333333333333335, + "Vietnamese,Chinese,Filipino": 0.22, + "Malay,Indonesian,Spanish": 0.41333333333333333, + "Malay,Indonesian,Chinese": 0.38, + "Malay,Indonesian,Filipino": 0.44666666666666666, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Spanish,Filipino": 0.4066666666666667, + "Malay,Chinese,Filipino": 0.3333333333333333, + "Indonesian,Spanish,Chinese": 0.32, + "Indonesian,Spanish,Filipino": 0.41333333333333333, + "Indonesian,Chinese,Filipino": 0.3466666666666667, + "Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.18666666666666668, + "English,Vietnamese,Malay,Spanish": 0.13333333333333333, + "English,Vietnamese,Malay,Chinese": 0.16, + "English,Vietnamese,Malay,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Spanish": 0.16666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.16, + "English,Vietnamese,Chinese,Filipino": 0.16, + "English,Malay,Indonesian,Spanish": 0.2733333333333333, + "English,Malay,Indonesian,Chinese": 0.24666666666666667, + "English,Malay,Indonesian,Filipino": 0.32, + "English,Malay,Spanish,Chinese": 0.19333333333333333, + "English,Malay,Spanish,Filipino": 0.2866666666666667, + "English,Malay,Chinese,Filipino": 0.23333333333333334, + "English,Indonesian,Spanish,Chinese": 0.22, + "English,Indonesian,Spanish,Filipino": 0.29333333333333333, + "English,Indonesian,Chinese,Filipino": 0.26, + "English,Spanish,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Chinese": 0.19333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.18, + "Vietnamese,Malay,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14, + "Vietnamese,Indonesian,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Chinese,Filipino": 0.14666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.23333333333333334, + "Malay,Indonesian,Spanish,Filipino": 0.34, + "Malay,Indonesian,Chinese,Filipino": 0.28, + "Malay,Spanish,Chinese,Filipino": 0.22, + "Indonesian,Spanish,Chinese,Filipino": 0.23333333333333334 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.12, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.14, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.14666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Malay,Chinese,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.12, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.12666666666666668, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.10666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.16, + "English,Malay,Indonesian,Spanish,Filipino": 0.24666666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.20666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.16, + "English,Indonesian,Spanish,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.1, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.1, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.18666666666666668 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.08, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.1, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.1, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.14, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.06 + } + }, + "AC3_2": 0.3429722921460805, + "AC3_3": 0.2818535413668278, + "AC3_4": 0.22814016167593953, + "AC3_5": 0.17969566200360784, + "AC3_6": 0.1356682027266835, + "AC3_7": 0.09769911501398701 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.24675324675324672, + "language_acc": { + "Vietnamese": 0.26136363636363635, + "Indonesian": 0.24431818181818182, + "Malay": 0.22727272727272727, + "English": 0.2556818181818182, + "Spanish": 0.25, + "Filipino": 0.23863636363636365, + "Chinese": 0.25 + }, + "consistency_score_2": 0.4667207792207792, + "consistency_score_3": 0.27840909090909083, + "consistency_score_4": 0.1831168831168831, + "consistency_score_5": 0.12662337662337664, + "consistency_score_6": 0.08928571428571429, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Malay": 0.48295454545454547, + "Vietnamese,English": 0.5227272727272727, + "Vietnamese,Spanish": 0.44886363636363635, + "Vietnamese,Filipino": 0.5113636363636364, + "Vietnamese,Chinese": 0.4318181818181818, + "Indonesian,Malay": 0.8125, + "Indonesian,English": 0.4318181818181818, + "Indonesian,Spanish": 0.18181818181818182, + "Indonesian,Filipino": 0.8522727272727273, + "Indonesian,Chinese": 0.5340909090909091, + "Malay,English": 0.4090909090909091, + "Malay,Spanish": 0.16477272727272727, + "Malay,Filipino": 0.8181818181818182, + "Malay,Chinese": 0.5170454545454546, + "English,Spanish": 0.4375, + "English,Filipino": 0.45454545454545453, + "English,Chinese": 0.4034090909090909, + "Spanish,Filipino": 0.16477272727272727, + "Spanish,Chinese": 0.22727272727272727, + "Filipino,Chinese": 0.4943181818181818 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.42613636363636365, + "Vietnamese,Indonesian,English": 0.3125, + "Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Vietnamese,Indonesian,Filipino": 0.4602272727272727, + "Vietnamese,Indonesian,Chinese": 0.29545454545454547, + "Vietnamese,Malay,English": 0.3068181818181818, + "Vietnamese,Malay,Spanish": 0.13068181818181818, + "Vietnamese,Malay,Filipino": 0.4375, + "Vietnamese,Malay,Chinese": 0.2840909090909091, + "Vietnamese,English,Spanish": 0.2840909090909091, + "Vietnamese,English,Filipino": 0.3352272727272727, + "Vietnamese,English,Chinese": 0.2784090909090909, + "Vietnamese,Spanish,Filipino": 0.14204545454545456, + "Vietnamese,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Filipino,Chinese": 0.30113636363636365, + "Indonesian,Malay,English": 0.375, + "Indonesian,Malay,Spanish": 0.14772727272727273, + "Indonesian,Malay,Filipino": 0.75, + "Indonesian,Malay,Chinese": 0.45454545454545453, + "Indonesian,English,Spanish": 0.14204545454545456, + "Indonesian,English,Filipino": 0.4090909090909091, + "Indonesian,English,Chinese": 0.2784090909090909, + "Indonesian,Spanish,Filipino": 0.1590909090909091, + "Indonesian,Spanish,Chinese": 0.11931818181818182, + "Indonesian,Filipino,Chinese": 0.4602272727272727, + "Malay,English,Spanish": 0.125, + "Malay,English,Filipino": 0.38636363636363635, + "Malay,English,Chinese": 0.2784090909090909, + "Malay,Spanish,Filipino": 0.14772727272727273, + "Malay,Spanish,Chinese": 0.09659090909090909, + "Malay,Filipino,Chinese": 0.4431818181818182, + "English,Spanish,Filipino": 0.13068181818181818, + "English,Spanish,Chinese": 0.14204545454545456, + "English,Filipino,Chinese": 0.2897727272727273, + "Spanish,Filipino,Chinese": 0.09659090909090909 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.2784090909090909, + "Vietnamese,Indonesian,Malay,Spanish": 0.125, + "Vietnamese,Indonesian,Malay,Filipino": 0.4034090909090909, + "Vietnamese,Indonesian,Malay,Chinese": 0.26136363636363635, + "Vietnamese,Indonesian,English,Spanish": 0.125, + "Vietnamese,Indonesian,English,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,English,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Spanish,Chinese": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2784090909090909, + "Vietnamese,Malay,English,Spanish": 0.11363636363636363, + "Vietnamese,Malay,English,Filipino": 0.2897727272727273, + "Vietnamese,Malay,English,Chinese": 0.21022727272727273, + "Vietnamese,Malay,Spanish,Filipino": 0.125, + "Vietnamese,Malay,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Malay,Filipino,Chinese": 0.26704545454545453, + "Vietnamese,English,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,English,Spanish,Chinese": 0.125, + "Vietnamese,English,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,Spanish,Filipino,Chinese": 0.08522727272727272, + "Indonesian,Malay,English,Spanish": 0.11931818181818182, + "Indonesian,Malay,English,Filipino": 0.36363636363636365, + "Indonesian,Malay,English,Chinese": 0.2556818181818182, + "Indonesian,Malay,Spanish,Filipino": 0.14204545454545456, + "Indonesian,Malay,Spanish,Chinese": 0.09090909090909091, + "Indonesian,Malay,Filipino,Chinese": 0.42613636363636365, + "Indonesian,English,Spanish,Filipino": 0.125, + "Indonesian,English,Spanish,Chinese": 0.08522727272727272, + "Indonesian,English,Filipino,Chinese": 0.2556818181818182, + "Indonesian,Spanish,Filipino,Chinese": 0.09659090909090909, + "Malay,English,Spanish,Filipino": 0.11931818181818182, + "Malay,English,Spanish,Chinese": 0.06818181818181818, + "Malay,English,Filipino,Chinese": 0.26136363636363635, + "Malay,Spanish,Filipino,Chinese": 0.08522727272727272, + "English,Spanish,Filipino,Chinese": 0.06818181818181818 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.2556818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.08522727272727272, + "Vietnamese,Malay,English,Spanish,Filipino": 0.10795454545454546, + "Vietnamese,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Vietnamese,Malay,English,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.11363636363636363, + "Indonesian,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Indonesian,Malay,English,Filipino,Chinese": 0.24431818181818182, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.08522727272727272, + "Indonesian,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish,Filipino,Chinese": 0.0625 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.1875, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.0625, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0625 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0625 + } + }, + "AC3_2": 0.3228284797798194, + "AC3_3": 0.26162709001622064, + "AC3_4": 0.2102248204478195, + "AC3_5": 0.16736307166607362, + "AC3_6": 0.13112491369458865, + "AC3_7": 0.09973753277614786 + }, + "prompt_2": { + "overall_acc": 0.2540584415584416, + "language_acc": { + "Vietnamese": 0.21022727272727273, + "Indonesian": 0.22727272727272727, + "Malay": 0.26704545454545453, + "English": 0.26136363636363635, + "Spanish": 0.2727272727272727, + "Filipino": 0.25, + "Chinese": 0.2897727272727273 + }, + "consistency_score_2": 0.5995670995670996, + "consistency_score_3": 0.43376623376623374, + "consistency_score_4": 0.3336038961038962, + "consistency_score_5": 0.2632575757575758, + "consistency_score_6": 0.21103896103896105, + "consistency_score_7": 0.17045454545454544, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.7329545454545454, + "Vietnamese,Malay": 0.7102272727272727, + "Vietnamese,English": 0.4431818181818182, + "Vietnamese,Spanish": 0.5511363636363636, + "Vietnamese,Filipino": 0.7329545454545454, + "Vietnamese,Chinese": 0.5340909090909091, + "Indonesian,Malay": 0.8977272727272727, + "Indonesian,English": 0.44886363636363635, + "Indonesian,Spanish": 0.5454545454545454, + "Indonesian,Filipino": 0.9034090909090909, + "Indonesian,Chinese": 0.6022727272727273, + "Malay,English": 0.4090909090909091, + "Malay,Spanish": 0.5511363636363636, + "Malay,Filipino": 0.9147727272727273, + "Malay,Chinese": 0.5795454545454546, + "English,Spanish": 0.6079545454545454, + "English,Filipino": 0.4090909090909091, + "English,Chinese": 0.45454545454545453, + "Spanish,Filipino": 0.5511363636363636, + "Spanish,Chinese": 0.42045454545454547, + "Filipino,Chinese": 0.5909090909090909 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.6875, + "Vietnamese,Indonesian,English": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish": 0.4602272727272727, + "Vietnamese,Indonesian,Filipino": 0.6931818181818182, + "Vietnamese,Indonesian,Chinese": 0.4659090909090909, + "Vietnamese,Malay,English": 0.3125, + "Vietnamese,Malay,Spanish": 0.4431818181818182, + "Vietnamese,Malay,Filipino": 0.6875, + "Vietnamese,Malay,Chinese": 0.44886363636363635, + "Vietnamese,English,Spanish": 0.32386363636363635, + "Vietnamese,English,Filipino": 0.32954545454545453, + "Vietnamese,English,Chinese": 0.2840909090909091, + "Vietnamese,Spanish,Filipino": 0.45454545454545453, + "Vietnamese,Spanish,Chinese": 0.3409090909090909, + "Vietnamese,Filipino,Chinese": 0.4659090909090909, + "Indonesian,Malay,English": 0.3977272727272727, + "Indonesian,Malay,Spanish": 0.5113636363636364, + "Indonesian,Malay,Filipino": 0.8579545454545454, + "Indonesian,Malay,Chinese": 0.5454545454545454, + "Indonesian,English,Spanish": 0.3181818181818182, + "Indonesian,English,Filipino": 0.4034090909090909, + "Indonesian,English,Chinese": 0.32386363636363635, + "Indonesian,Spanish,Filipino": 0.5170454545454546, + "Indonesian,Spanish,Chinese": 0.3522727272727273, + "Indonesian,Filipino,Chinese": 0.5625, + "Malay,English,Spanish": 0.3068181818181818, + "Malay,English,Filipino": 0.38636363636363635, + "Malay,English,Chinese": 0.2897727272727273, + "Malay,Spanish,Filipino": 0.5284090909090909, + "Malay,Spanish,Chinese": 0.3465909090909091, + "Malay,Filipino,Chinese": 0.5511363636363636, + "English,Spanish,Filipino": 0.3068181818181818, + "English,Spanish,Chinese": 0.2727272727272727, + "English,Filipino,Chinese": 0.30113636363636365, + "Spanish,Filipino,Chinese": 0.36363636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.4318181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.6704545454545454, + "Vietnamese,Indonesian,Malay,Chinese": 0.4318181818181818, + "Vietnamese,Indonesian,English,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,English,Filipino": 0.32386363636363635, + "Vietnamese,Indonesian,English,Chinese": 0.25, + "Vietnamese,Indonesian,Spanish,Filipino": 0.4431818181818182, + "Vietnamese,Indonesian,Spanish,Chinese": 0.3125, + "Vietnamese,Indonesian,Filipino,Chinese": 0.44886363636363635, + "Vietnamese,Malay,English,Spanish": 0.24431818181818182, + "Vietnamese,Malay,English,Filipino": 0.3125, + "Vietnamese,Malay,English,Chinese": 0.22727272727272727, + "Vietnamese,Malay,Spanish,Filipino": 0.4375, + "Vietnamese,Malay,Spanish,Chinese": 0.30113636363636365, + "Vietnamese,Malay,Filipino,Chinese": 0.4431818181818182, + "Vietnamese,English,Spanish,Filipino": 0.26136363636363635, + "Vietnamese,English,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,English,Filipino,Chinese": 0.24431818181818182, + "Vietnamese,Spanish,Filipino,Chinese": 0.3181818181818182, + "Indonesian,Malay,English,Spanish": 0.29545454545454547, + "Indonesian,Malay,English,Filipino": 0.3806818181818182, + "Indonesian,Malay,English,Chinese": 0.2840909090909091, + "Indonesian,Malay,Spanish,Filipino": 0.4943181818181818, + "Indonesian,Malay,Spanish,Chinese": 0.32386363636363635, + "Indonesian,Malay,Filipino,Chinese": 0.5227272727272727, + "Indonesian,English,Spanish,Filipino": 0.30113636363636365, + "Indonesian,English,Spanish,Chinese": 0.2215909090909091, + "Indonesian,English,Filipino,Chinese": 0.29545454545454547, + "Indonesian,Spanish,Filipino,Chinese": 0.3409090909090909, + "Malay,English,Spanish,Filipino": 0.2897727272727273, + "Malay,English,Spanish,Chinese": 0.20454545454545456, + "Malay,English,Filipino,Chinese": 0.2784090909090909, + "Malay,Spanish,Filipino,Chinese": 0.3409090909090909, + "English,Spanish,Filipino,Chinese": 0.2159090909090909 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.42613636363636365, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.2897727272727273, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.42613636363636365, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.3068181818181818, + "Vietnamese,Malay,English,Spanish,Filipino": 0.24431818181818182, + "Vietnamese,Malay,English,Spanish,Chinese": 0.17613636363636365, + "Vietnamese,Malay,English,Filipino,Chinese": 0.22727272727272727, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.30113636363636365, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.19318181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.2840909090909091, + "Indonesian,Malay,English,Spanish,Chinese": 0.19886363636363635, + "Indonesian,Malay,English,Filipino,Chinese": 0.2727272727272727, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.3181818181818182, + "Indonesian,English,Spanish,Filipino,Chinese": 0.21022727272727273, + "Malay,English,Spanish,Filipino,Chinese": 0.19886363636363635 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.2897727272727273, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.1875, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.17613636363636365, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.19318181818181818 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.17045454545454544 + } + }, + "AC3_2": 0.35688970296516853, + "AC3_3": 0.3204362311755778, + "AC3_4": 0.28844756757666057, + "AC3_5": 0.2585762171428649, + "AC3_6": 0.23055914412213743, + "AC3_7": 0.20402398743672984 + }, + "prompt_3": { + "overall_acc": 0.24675324675324675, + "language_acc": { + "Vietnamese": 0.24431818181818182, + "Indonesian": 0.2215909090909091, + "Malay": 0.23863636363636365, + "English": 0.23863636363636365, + "Spanish": 0.2727272727272727, + "Filipino": 0.23863636363636365, + "Chinese": 0.2727272727272727 + }, + "consistency_score_2": 0.6260822510822509, + "consistency_score_3": 0.472564935064935, + "consistency_score_4": 0.37499999999999983, + "consistency_score_5": 0.30303030303030304, + "consistency_score_6": 0.24594155844155846, + "consistency_score_7": 0.19886363636363635, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.8125, + "Vietnamese,Malay": 0.7670454545454546, + "Vietnamese,English": 0.6022727272727273, + "Vietnamese,Spanish": 0.4943181818181818, + "Vietnamese,Filipino": 0.7784090909090909, + "Vietnamese,Chinese": 0.6079545454545454, + "Indonesian,Malay": 0.9090909090909091, + "Indonesian,English": 0.5625, + "Indonesian,Spanish": 0.4318181818181818, + "Indonesian,Filipino": 0.9375, + "Indonesian,Chinese": 0.6477272727272727, + "Malay,English": 0.5397727272727273, + "Malay,Spanish": 0.42613636363636365, + "Malay,Filipino": 0.9261363636363636, + "Malay,Chinese": 0.6306818181818182, + "English,Spanish": 0.5340909090909091, + "English,Filipino": 0.5340909090909091, + "English,Chinese": 0.5568181818181818, + "Spanish,Filipino": 0.42613636363636365, + "Spanish,Chinese": 0.3977272727272727, + "Filipino,Chinese": 0.625 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.75, + "Vietnamese,Indonesian,English": 0.5170454545454546, + "Vietnamese,Indonesian,Spanish": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.7670454545454546, + "Vietnamese,Indonesian,Chinese": 0.5738636363636364, + "Vietnamese,Malay,English": 0.48295454545454547, + "Vietnamese,Malay,Spanish": 0.375, + "Vietnamese,Malay,Filipino": 0.7386363636363636, + "Vietnamese,Malay,Chinese": 0.5454545454545454, + "Vietnamese,English,Spanish": 0.3522727272727273, + "Vietnamese,English,Filipino": 0.48863636363636365, + "Vietnamese,English,Chinese": 0.4431818181818182, + "Vietnamese,Spanish,Filipino": 0.375, + "Vietnamese,Spanish,Chinese": 0.29545454545454547, + "Vietnamese,Filipino,Chinese": 0.5454545454545454, + "Indonesian,Malay,English": 0.5227272727272727, + "Indonesian,Malay,Spanish": 0.4034090909090909, + "Indonesian,Malay,Filipino": 0.8863636363636364, + "Indonesian,Malay,Chinese": 0.6079545454545454, + "Indonesian,English,Spanish": 0.3068181818181818, + "Indonesian,English,Filipino": 0.5284090909090909, + "Indonesian,English,Chinese": 0.44886363636363635, + "Indonesian,Spanish,Filipino": 0.4090909090909091, + "Indonesian,Spanish,Chinese": 0.3068181818181818, + "Indonesian,Filipino,Chinese": 0.6136363636363636, + "Malay,English,Spanish": 0.2897727272727273, + "Malay,English,Filipino": 0.5113636363636364, + "Malay,English,Chinese": 0.4375, + "Malay,Spanish,Filipino": 0.4090909090909091, + "Malay,Spanish,Chinese": 0.3068181818181818, + "Malay,Filipino,Chinese": 0.5965909090909091, + "English,Spanish,Filipino": 0.29545454545454547, + "English,Spanish,Chinese": 0.2840909090909091, + "English,Filipino,Chinese": 0.4318181818181818, + "Spanish,Filipino,Chinese": 0.30113636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.4772727272727273, + "Vietnamese,Indonesian,Malay,Spanish": 0.36363636363636365, + "Vietnamese,Indonesian,Malay,Filipino": 0.7272727272727273, + "Vietnamese,Indonesian,Malay,Chinese": 0.5397727272727273, + "Vietnamese,Indonesian,English,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,English,Filipino": 0.48295454545454547, + "Vietnamese,Indonesian,English,Chinese": 0.4147727272727273, + "Vietnamese,Indonesian,Spanish,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino,Chinese": 0.5397727272727273, + "Vietnamese,Malay,English,Spanish": 0.2727272727272727, + "Vietnamese,Malay,English,Filipino": 0.4659090909090909, + "Vietnamese,Malay,English,Chinese": 0.4034090909090909, + "Vietnamese,Malay,Spanish,Filipino": 0.35795454545454547, + "Vietnamese,Malay,Spanish,Chinese": 0.2727272727272727, + "Vietnamese,Malay,Filipino,Chinese": 0.5227272727272727, + "Vietnamese,English,Spanish,Filipino": 0.2784090909090909, + "Vietnamese,English,Spanish,Chinese": 0.22727272727272727, + "Vietnamese,English,Filipino,Chinese": 0.3977272727272727, + "Vietnamese,Spanish,Filipino,Chinese": 0.26704545454545453, + "Indonesian,Malay,English,Spanish": 0.2840909090909091, + "Indonesian,Malay,English,Filipino": 0.5056818181818182, + "Indonesian,Malay,English,Chinese": 0.4318181818181818, + "Indonesian,Malay,Spanish,Filipino": 0.39204545454545453, + "Indonesian,Malay,Spanish,Chinese": 0.29545454545454547, + "Indonesian,Malay,Filipino,Chinese": 0.5852272727272727, + "Indonesian,English,Spanish,Filipino": 0.2897727272727273, + "Indonesian,English,Spanish,Chinese": 0.22727272727272727, + "Indonesian,English,Filipino,Chinese": 0.42613636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.2897727272727273, + "Malay,English,Spanish,Filipino": 0.2840909090909091, + "Malay,English,Spanish,Chinese": 0.22727272727272727, + "Malay,English,Filipino,Chinese": 0.42045454545454547, + "Malay,Spanish,Filipino,Chinese": 0.29545454545454547, + "English,Spanish,Filipino,Chinese": 0.2215909090909091 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.4602272727272727, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.3977272727272727, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.3522727272727273, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.5170454545454546, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.2727272727272727, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.39204545454545453, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.26136363636363635, + "Vietnamese,Malay,English,Spanish,Filipino": 0.26704545454545453, + "Vietnamese,Malay,English,Spanish,Chinese": 0.21022727272727273, + "Vietnamese,Malay,English,Filipino,Chinese": 0.38636363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.26136363636363635, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.20454545454545456, + "Indonesian,Malay,English,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Malay,English,Spanish,Chinese": 0.2215909090909091, + "Indonesian,Malay,English,Filipino,Chinese": 0.4147727272727273, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.2840909090909091, + "Indonesian,English,Spanish,Filipino,Chinese": 0.2159090909090909, + "Malay,English,Spanish,Filipino,Chinese": 0.2215909090909091 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.3806818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.2556818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.20454545454545456, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.2159090909090909 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.19886363636363635 + } + }, + "AC3_2": 0.35399070856887505, + "AC3_3": 0.3242151664178935, + "AC3_4": 0.2976501305004301, + "AC3_5": 0.2720114530643392, + "AC3_6": 0.2463467339408857, + "AC3_7": 0.22023513821850263 + }, + "prompt_4": { + "overall_acc": 0.24756493506493507, + "language_acc": { + "Vietnamese": 0.24431818181818182, + "Indonesian": 0.23295454545454544, + "Malay": 0.25, + "English": 0.22727272727272727, + "Spanish": 0.26136363636363635, + "Filipino": 0.23863636363636365, + "Chinese": 0.2784090909090909 + }, + "consistency_score_2": 0.5116341991341992, + "consistency_score_3": 0.29999999999999993, + "consistency_score_4": 0.19334415584415585, + "consistency_score_5": 0.13501082251082253, + "consistency_score_6": 0.09902597402597402, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4375, + "Vietnamese,Malay": 0.42045454545454547, + "Vietnamese,English": 0.6193181818181818, + "Vietnamese,Spanish": 0.5852272727272727, + "Vietnamese,Filipino": 0.4090909090909091, + "Vietnamese,Chinese": 0.48863636363636365, + "Indonesian,Malay": 0.9431818181818182, + "Indonesian,English": 0.3522727272727273, + "Indonesian,Spanish": 0.29545454545454547, + "Indonesian,Filipino": 0.9545454545454546, + "Indonesian,Chinese": 0.5284090909090909, + "Malay,English": 0.3352272727272727, + "Malay,Spanish": 0.2897727272727273, + "Malay,Filipino": 0.9431818181818182, + "Malay,Chinese": 0.5113636363636364, + "English,Spanish": 0.6363636363636364, + "English,Filipino": 0.3181818181818182, + "English,Chinese": 0.4943181818181818, + "Spanish,Filipino": 0.2727272727272727, + "Spanish,Chinese": 0.4090909090909091, + "Filipino,Chinese": 0.5 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,English": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.1875, + "Vietnamese,Indonesian,Filipino": 0.4034090909090909, + "Vietnamese,Indonesian,Chinese": 0.29545454545454547, + "Vietnamese,Malay,English": 0.2215909090909091, + "Vietnamese,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Malay,Filipino": 0.39204545454545453, + "Vietnamese,Malay,Chinese": 0.2840909090909091, + "Vietnamese,English,Spanish": 0.44886363636363635, + "Vietnamese,English,Filipino": 0.21022727272727273, + "Vietnamese,English,Chinese": 0.3352272727272727, + "Vietnamese,Spanish,Filipino": 0.16477272727272727, + "Vietnamese,Spanish,Chinese": 0.26704545454545453, + "Vietnamese,Filipino,Chinese": 0.26704545454545453, + "Indonesian,Malay,English": 0.32386363636363635, + "Indonesian,Malay,Spanish": 0.26704545454545453, + "Indonesian,Malay,Filipino": 0.9204545454545454, + "Indonesian,Malay,Chinese": 0.5, + "Indonesian,English,Spanish": 0.19318181818181818, + "Indonesian,English,Filipino": 0.3125, + "Indonesian,English,Chinese": 0.2556818181818182, + "Indonesian,Spanish,Filipino": 0.26136363636363635, + "Indonesian,Spanish,Chinese": 0.17613636363636365, + "Indonesian,Filipino,Chinese": 0.4943181818181818, + "Malay,English,Spanish": 0.18181818181818182, + "Malay,English,Filipino": 0.3068181818181818, + "Malay,English,Chinese": 0.24431818181818182, + "Malay,Spanish,Filipino": 0.2556818181818182, + "Malay,Spanish,Chinese": 0.16477272727272727, + "Malay,Filipino,Chinese": 0.48863636363636365, + "English,Spanish,Filipino": 0.16477272727272727, + "English,Spanish,Chinese": 0.3125, + "English,Filipino,Chinese": 0.2215909090909091, + "Spanish,Filipino,Chinese": 0.14772727272727273 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Filipino": 0.39204545454545453, + "Vietnamese,Indonesian,Malay,Chinese": 0.2784090909090909, + "Vietnamese,Indonesian,English,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,English,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,English,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Spanish,Chinese": 0.125, + "Vietnamese,Indonesian,Filipino,Chinese": 0.26704545454545453, + "Vietnamese,Malay,English,Spanish": 0.125, + "Vietnamese,Malay,English,Filipino": 0.19886363636363635, + "Vietnamese,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Malay,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Malay,Spanish,Chinese": 0.11931818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.26136363636363635, + "Vietnamese,English,Spanish,Filipino": 0.11363636363636363, + "Vietnamese,English,Spanish,Chinese": 0.23295454545454544, + "Vietnamese,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Spanish,Filipino,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Spanish": 0.17045454545454544, + "Indonesian,Malay,English,Filipino": 0.3068181818181818, + "Indonesian,Malay,English,Chinese": 0.23863636363636365, + "Indonesian,Malay,Spanish,Filipino": 0.25, + "Indonesian,Malay,Spanish,Chinese": 0.1534090909090909, + "Indonesian,Malay,Filipino,Chinese": 0.48295454545454547, + "Indonesian,English,Spanish,Filipino": 0.1590909090909091, + "Indonesian,English,Spanish,Chinese": 0.14204545454545456, + "Indonesian,English,Filipino,Chinese": 0.2215909090909091, + "Indonesian,Spanish,Filipino,Chinese": 0.14204545454545456, + "Malay,English,Spanish,Filipino": 0.1534090909090909, + "Malay,English,Spanish,Chinese": 0.13068181818181818, + "Malay,English,Filipino,Chinese": 0.2215909090909091, + "Malay,Spanish,Filipino,Chinese": 0.14204545454545456, + "English,Spanish,Filipino,Chinese": 0.10795454545454546 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.26136363636363635, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.10795454545454546, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.10227272727272728, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Malay,English,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Malay,English,Spanish,Chinese": 0.09659090909090909, + "Vietnamese,Malay,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Malay,English,Spanish,Filipino": 0.1534090909090909, + "Indonesian,Malay,English,Spanish,Chinese": 0.125, + "Indonesian,Malay,English,Filipino,Chinese": 0.2215909090909091, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.13636363636363635, + "Indonesian,English,Spanish,Filipino,Chinese": 0.10795454545454546, + "Malay,English,Spanish,Filipino,Chinese": 0.10795454545454546 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.10795454545454546 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.07386363636363637 + } + }, + "AC3_2": 0.333674477652266, + "AC3_3": 0.27127186475923404, + "AC3_4": 0.21712064624113733, + "AC3_5": 0.17473112102561852, + "AC3_6": 0.14146567713914657, + "AC3_7": 0.11377984385808208 + }, + "prompt_5": { + "overall_acc": 0.24675324675324672, + "language_acc": { + "Vietnamese": 0.25, + "Indonesian": 0.22727272727272727, + "Malay": 0.22727272727272727, + "English": 0.23295454545454544, + "Spanish": 0.2556818181818182, + "Filipino": 0.23863636363636365, + "Chinese": 0.29545454545454547 + }, + "consistency_score_2": 0.44237012987012975, + "consistency_score_3": 0.23620129870129872, + "consistency_score_4": 0.14464285714285716, + "consistency_score_5": 0.10010822510822512, + "consistency_score_6": 0.0762987012987013, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.39204545454545453, + "Vietnamese,Malay": 0.38636363636363635, + "Vietnamese,English": 0.5568181818181818, + "Vietnamese,Spanish": 0.5284090909090909, + "Vietnamese,Filipino": 0.3693181818181818, + "Vietnamese,Chinese": 0.35795454545454547, + "Indonesian,Malay": 0.8068181818181818, + "Indonesian,English": 0.3181818181818182, + "Indonesian,Spanish": 0.3352272727272727, + "Indonesian,Filipino": 0.8011363636363636, + "Indonesian,Chinese": 0.39204545454545453, + "Malay,English": 0.3068181818181818, + "Malay,Spanish": 0.3125, + "Malay,Filipino": 0.8977272727272727, + "Malay,Chinese": 0.375, + "English,Spanish": 0.5965909090909091, + "English,Filipino": 0.26136363636363635, + "English,Chinese": 0.2840909090909091, + "Spanish,Filipino": 0.2897727272727273, + "Spanish,Chinese": 0.375, + "Filipino,Chinese": 0.3465909090909091 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,English": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Chinese": 0.19318181818181818, + "Vietnamese,Malay,English": 0.19886363636363635, + "Vietnamese,Malay,Spanish": 0.16477272727272727, + "Vietnamese,Malay,Filipino": 0.3409090909090909, + "Vietnamese,Malay,Chinese": 0.1875, + "Vietnamese,English,Spanish": 0.39204545454545453, + "Vietnamese,English,Filipino": 0.17045454545454544, + "Vietnamese,English,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,Filipino": 0.14204545454545456, + "Vietnamese,Spanish,Chinese": 0.19886363636363635, + "Vietnamese,Filipino,Chinese": 0.17613636363636365, + "Indonesian,Malay,English": 0.2556818181818182, + "Indonesian,Malay,Spanish": 0.26136363636363635, + "Indonesian,Malay,Filipino": 0.7556818181818182, + "Indonesian,Malay,Chinese": 0.3125, + "Indonesian,English,Spanish": 0.20454545454545456, + "Indonesian,English,Filipino": 0.2215909090909091, + "Indonesian,English,Chinese": 0.1590909090909091, + "Indonesian,Spanish,Filipino": 0.23863636363636365, + "Indonesian,Spanish,Chinese": 0.17045454545454544, + "Indonesian,Filipino,Chinese": 0.29545454545454547, + "Malay,English,Spanish": 0.1875, + "Malay,English,Filipino": 0.24431818181818182, + "Malay,English,Chinese": 0.14772727272727273, + "Malay,Spanish,Filipino": 0.2727272727272727, + "Malay,Spanish,Chinese": 0.16477272727272727, + "Malay,Filipino,Chinese": 0.32954545454545453, + "English,Spanish,Filipino": 0.1590909090909091, + "English,Spanish,Chinese": 0.19318181818181818, + "English,Filipino,Chinese": 0.125, + "Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.2897727272727273, + "Vietnamese,Indonesian,Malay,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,English,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,English,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,English,Chinese": 0.125, + "Vietnamese,Indonesian,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Malay,English,Spanish": 0.13068181818181818, + "Vietnamese,Malay,English,Filipino": 0.1534090909090909, + "Vietnamese,Malay,English,Chinese": 0.11363636363636363, + "Vietnamese,Malay,Spanish,Filipino": 0.13068181818181818, + "Vietnamese,Malay,Spanish,Chinese": 0.09659090909090909, + "Vietnamese,Malay,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,English,Spanish,Filipino": 0.10795454545454546, + "Vietnamese,English,Spanish,Chinese": 0.14772727272727273, + "Vietnamese,English,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Spanish,Filipino,Chinese": 0.08522727272727272, + "Indonesian,Malay,English,Spanish": 0.1590909090909091, + "Indonesian,Malay,English,Filipino": 0.21022727272727273, + "Indonesian,Malay,English,Chinese": 0.13636363636363635, + "Indonesian,Malay,Spanish,Filipino": 0.22727272727272727, + "Indonesian,Malay,Spanish,Chinese": 0.14772727272727273, + "Indonesian,Malay,Filipino,Chinese": 0.2840909090909091, + "Indonesian,English,Spanish,Filipino": 0.13068181818181818, + "Indonesian,English,Spanish,Chinese": 0.10795454545454546, + "Indonesian,English,Filipino,Chinese": 0.11363636363636363, + "Indonesian,Spanish,Filipino,Chinese": 0.13068181818181818, + "Malay,English,Spanish,Filipino": 0.14772727272727273, + "Malay,English,Spanish,Chinese": 0.10227272727272728, + "Malay,English,Filipino,Chinese": 0.11363636363636363, + "Malay,Spanish,Filipino,Chinese": 0.14772727272727273, + "English,Spanish,Filipino,Chinese": 0.09090909090909091 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.09090909090909091, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.09090909090909091, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.09090909090909091, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Spanish,Filipino": 0.09659090909090909, + "Vietnamese,Malay,English,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Filipino,Chinese": 0.08522727272727272, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,English,Spanish,Filipino": 0.125, + "Indonesian,Malay,English,Spanish,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Filipino,Chinese": 0.10795454545454546, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.13068181818181818, + "Indonesian,English,Spanish,Filipino,Chinese": 0.07954545454545454, + "Malay,English,Spanish,Filipino,Chinese": 0.08522727272727272 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.0625, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.0625, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.07954545454545454 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0625 + } + }, + "AC3_2": 0.31679745456067077, + "AC3_3": 0.2413619992952165, + "AC3_4": 0.1823783847374287, + "AC3_5": 0.14243167125176273, + "AC3_6": 0.11655680998806679, + "AC3_7": 0.09973753277614786 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2815533980582524 + }, + "prompt_2": { + "accuracy": 0.2815533980582524 + }, + "prompt_3": { + "accuracy": 0.2621359223300971 + }, + "prompt_4": { + "accuracy": 0.2524271844660194 + }, + "prompt_5": { + "accuracy": 0.22330097087378642 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2571428571428571 + }, + "prompt_2": { + "accuracy": 0.2571428571428571 + }, + "prompt_3": { + "accuracy": 0.26666666666666666 + }, + "prompt_4": { + "accuracy": 0.3523809523809524 + }, + "prompt_5": { + "accuracy": 0.29523809523809524 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2803738317757009 + }, + "prompt_2": { + "accuracy": 0.21495327102803738 + }, + "prompt_3": { + "accuracy": 0.22429906542056074 + }, + "prompt_4": { + "accuracy": 0.24299065420560748 + }, + "prompt_5": { + "accuracy": 0.2523364485981308 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2, + "category_acc": { + "brand": 0.3, + "demographics": 0.6, + "biology": 0.1, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.1, + "culture": 0.2, + "film": 0.3, + "law": 0.0, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.19, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.1, + "culture": 0.2, + "film": 0.1, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_3": { + "accuracy": 0.16, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.1, + "culture": 0.0, + "film": 0.2, + "law": 0.1, + "geography": 0.2 + } + }, + "prompt_4": { + "accuracy": 0.25, + "category_acc": { + "brand": 0.2, + "demographics": 0.4, + "biology": 0.2, + "history": 0.2, + "literature": 0.3, + "politics": 0.3, + "culture": 0.3, + "film": 0.2, + "law": 0.2, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.22, + "category_acc": { + "brand": 0.1, + "demographics": 0.6, + "biology": 0.0, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.2, + "film": 0.3, + "law": 0.2, + "geography": 0.2 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.055503673616135084 + }, + "prompt_2": { + "bleu_score": 0.05391560916890579 + }, + "prompt_3": { + "bleu_score": 0.054357373364412875 + }, + "prompt_4": { + "bleu_score": 0.05136450032379191 + }, + "prompt_5": { + "bleu_score": 0.04088471984093496 + } }, "indommlu": { "prompt_1": -1, @@ -11552,179 +100912,1369 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07585317917349457 + }, + "prompt_2": { + "bleu_score": 0.06506413192248581 + }, + "prompt_3": { + "bleu_score": 0.0643938274362041 + }, + "prompt_4": { + "bleu_score": 0.11739459724583577 + }, + "prompt_5": { + "bleu_score": 0.036427882144920314 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06846703826242148 + }, + "prompt_2": { + "bleu_score": 0.051860041446938294 + }, + "prompt_3": { + "bleu_score": 0.05265622408594447 + }, + "prompt_4": { + "bleu_score": 0.09344610758006208 + }, + "prompt_5": { + "bleu_score": 0.04748212974698807 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.047307622068576395 + }, + "prompt_2": { + "bleu_score": 0.04908898844339031 + }, + "prompt_3": { + "bleu_score": 0.0525572200769723 + }, + "prompt_4": { + "bleu_score": 0.09421045349075516 + }, + "prompt_5": { + "bleu_score": 0.04837140400422949 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0757517984615548 + }, + "prompt_2": { + "bleu_score": 0.06748854822647792 + }, + "prompt_3": { + "bleu_score": 0.06545876293952489 + }, + "prompt_4": { + "bleu_score": 0.12308563446237895 + }, + "prompt_5": { + "bleu_score": 0.02796179524686788 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26254375729288215 + }, + "prompt_2": { + "accuracy": 0.2998833138856476 + }, + "prompt_3": { + "accuracy": 0.26837806301050177 + }, + "prompt_4": { + "accuracy": 0.2660443407234539 + }, + "prompt_5": { + "accuracy": 0.2602100350058343 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27415087593850557, + "category_acc": { + "high_school_european_history": 0.25, + "business_ethics": 0.18181818181818182, + "clinical_knowledge": 0.25, + "medical_genetics": 0.32323232323232326, + "high_school_us_history": 0.270935960591133, + "high_school_physics": 0.28, + "high_school_world_history": 0.21610169491525424, + "virology": 0.30303030303030304, + "high_school_microeconomics": 0.24050632911392406, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.2815533980582524, + "abstract_algebra": 0.18181818181818182, + "professional_accounting": 0.24199288256227758, + "philosophy": 0.26129032258064516, + "professional_medicine": 0.34317343173431736, + "nutrition": 0.3114754098360656, + "global_facts": 0.29292929292929293, + "machine_learning": 0.21621621621621623, + "security_studies": 0.26229508196721313, + "public_relations": 0.30275229357798167, + "professional_psychology": 0.24877250409165302, + "prehistory": 0.30959752321981426, + "anatomy": 0.29850746268656714, + "human_sexuality": 0.33076923076923076, + "college_medicine": 0.19767441860465115, + "high_school_government_and_politics": 0.359375, + "college_chemistry": 0.16161616161616163, + "logical_fallacies": 0.2777777777777778, + "high_school_geography": 0.3604060913705584, + "elementary_mathematics": 0.2519893899204244, + "human_aging": 0.25675675675675674, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.27941176470588236, + "formal_logic": 0.296, + "high_school_statistics": 0.32558139534883723, + "international_law": 0.3416666666666667, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.30303030303030304, + "conceptual_physics": 0.27350427350427353, + "miscellaneous": 0.26342710997442453, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.2446351931330472, + "professional_law": 0.28636660143509457, + "management": 0.29411764705882354, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.2803738317757009, + "world_religions": 0.29411764705882354, + "sociology": 0.24, + "us_foreign_policy": 0.25252525252525254, + "high_school_macroeconomics": 0.2776349614395887, + "computer_security": 0.2828282828282828, + "moral_scenarios": 0.24720357941834453, + "moral_disputes": 0.28405797101449276, + "electrical_engineering": 0.2916666666666667, + "astronomy": 0.2980132450331126, + "college_biology": 0.35664335664335667 + } + }, + "prompt_2": { + "accuracy": 0.26428316052913836, + "category_acc": { + "high_school_european_history": 0.2804878048780488, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.2689393939393939, + "medical_genetics": 0.2222222222222222, + "high_school_us_history": 0.24630541871921183, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.25, + "virology": 0.21212121212121213, + "high_school_microeconomics": 0.2869198312236287, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.16161616161616163, + "high_school_biology": 0.2750809061488673, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.2597864768683274, + "philosophy": 0.24838709677419354, + "professional_medicine": 0.3062730627306273, + "nutrition": 0.29180327868852457, + "global_facts": 0.18181818181818182, + "machine_learning": 0.32432432432432434, + "security_studies": 0.3360655737704918, + "public_relations": 0.26605504587155965, + "professional_psychology": 0.24713584288052373, + "prehistory": 0.21981424148606812, + "anatomy": 0.20149253731343283, + "human_sexuality": 0.26153846153846155, + "college_medicine": 0.2441860465116279, + "high_school_government_and_politics": 0.34375, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.2716049382716049, + "high_school_geography": 0.3248730964467005, + "elementary_mathematics": 0.23607427055702918, + "human_aging": 0.1981981981981982, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.35294117647058826, + "formal_logic": 0.312, + "high_school_statistics": 0.33488372093023255, + "international_law": 0.21666666666666667, + "high_school_mathematics": 0.2342007434944238, + "high_school_computer_science": 0.2222222222222222, + "conceptual_physics": 0.2564102564102564, + "miscellaneous": 0.23273657289002558, + "high_school_chemistry": 0.2623762376237624, + "marketing": 0.22746781115879827, + "professional_law": 0.2831050228310502, + "management": 0.3431372549019608, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.24299065420560748, + "world_religions": 0.21764705882352942, + "sociology": 0.285, + "us_foreign_policy": 0.29292929292929293, + "high_school_macroeconomics": 0.2827763496143959, + "computer_security": 0.21212121212121213, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.2463768115942029, + "electrical_engineering": 0.2916666666666667, + "astronomy": 0.271523178807947, + "college_biology": 0.27972027972027974 + } + }, + "prompt_3": { + "accuracy": 0.26671433678941725, + "category_acc": { + "high_school_european_history": 0.2804878048780488, + "business_ethics": 0.24242424242424243, + "clinical_knowledge": 0.2765151515151515, + "medical_genetics": 0.2828282828282828, + "high_school_us_history": 0.24630541871921183, + "high_school_physics": 0.30666666666666664, + "high_school_world_history": 0.25, + "virology": 0.20606060606060606, + "high_school_microeconomics": 0.31645569620253167, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.1919191919191919, + "high_school_biology": 0.3074433656957929, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.24555160142348753, + "philosophy": 0.26129032258064516, + "professional_medicine": 0.4022140221402214, + "nutrition": 0.28524590163934427, + "global_facts": 0.21212121212121213, + "machine_learning": 0.24324324324324326, + "security_studies": 0.36065573770491804, + "public_relations": 0.23853211009174313, + "professional_psychology": 0.2225859247135843, + "prehistory": 0.2476780185758514, + "anatomy": 0.17164179104477612, + "human_sexuality": 0.3, + "college_medicine": 0.29069767441860467, + "high_school_government_and_politics": 0.3697916666666667, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.24691358024691357, + "high_school_geography": 0.3197969543147208, + "elementary_mathematics": 0.246684350132626, + "human_aging": 0.1891891891891892, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.3180147058823529, + "formal_logic": 0.312, + "high_school_statistics": 0.35348837209302325, + "international_law": 0.175, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.2222222222222222, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.2289002557544757, + "high_school_chemistry": 0.2623762376237624, + "marketing": 0.19313304721030042, + "professional_law": 0.2720156555772994, + "management": 0.3137254901960784, + "college_physics": 0.31683168316831684, + "jurisprudence": 0.205607476635514, + "world_religions": 0.18823529411764706, + "sociology": 0.23, + "us_foreign_policy": 0.29292929292929293, + "high_school_macroeconomics": 0.3393316195372751, + "computer_security": 0.21212121212121213, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.2492753623188406, + "electrical_engineering": 0.22916666666666666, + "astronomy": 0.2913907284768212, + "college_biology": 0.2867132867132867 + } + }, + "prompt_4": { + "accuracy": 0.27143367894172327, + "category_acc": { + "high_school_european_history": 0.24390243902439024, + "business_ethics": 0.30303030303030304, + "clinical_knowledge": 0.25, + "medical_genetics": 0.30303030303030304, + "high_school_us_history": 0.2660098522167488, + "high_school_physics": 0.24, + "high_school_world_history": 0.2584745762711864, + "virology": 0.3212121212121212, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.21212121212121213, + "high_school_biology": 0.3074433656957929, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.23487544483985764, + "philosophy": 0.2967741935483871, + "professional_medicine": 0.31365313653136534, + "nutrition": 0.3180327868852459, + "global_facts": 0.2828282828282828, + "machine_learning": 0.23423423423423423, + "security_studies": 0.3155737704918033, + "public_relations": 0.3394495412844037, + "professional_psychology": 0.2733224222585925, + "prehistory": 0.28173374613003094, + "anatomy": 0.20149253731343283, + "human_sexuality": 0.2846153846153846, + "college_medicine": 0.23837209302325582, + "high_school_government_and_politics": 0.3697916666666667, + "college_chemistry": 0.1919191919191919, + "logical_fallacies": 0.24074074074074073, + "high_school_geography": 0.3197969543147208, + "elementary_mathematics": 0.246684350132626, + "human_aging": 0.24324324324324326, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.29411764705882354, + "formal_logic": 0.264, + "high_school_statistics": 0.3023255813953488, + "international_law": 0.25, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.21212121212121213, + "conceptual_physics": 0.3076923076923077, + "miscellaneous": 0.2749360613810742, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.2446351931330472, + "professional_law": 0.25962165688193084, + "management": 0.2549019607843137, + "college_physics": 0.18811881188118812, + "jurisprudence": 0.308411214953271, + "world_religions": 0.27058823529411763, + "sociology": 0.325, + "us_foreign_policy": 0.2727272727272727, + "high_school_macroeconomics": 0.2827763496143959, + "computer_security": 0.2828282828282828, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.25507246376811593, + "electrical_engineering": 0.2777777777777778, + "astronomy": 0.271523178807947, + "college_biology": 0.2867132867132867 + } + }, + "prompt_5": { + "accuracy": 0.2729352878083661, + "category_acc": { + "high_school_european_history": 0.1951219512195122, + "business_ethics": 0.2222222222222222, + "clinical_knowledge": 0.2878787878787879, + "medical_genetics": 0.2828282828282828, + "high_school_us_history": 0.2857142857142857, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.21610169491525424, + "virology": 0.26666666666666666, + "high_school_microeconomics": 0.2911392405063291, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.2222222222222222, + "high_school_biology": 0.3300970873786408, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.23487544483985764, + "philosophy": 0.27741935483870966, + "professional_medicine": 0.3247232472324723, + "nutrition": 0.3344262295081967, + "global_facts": 0.23232323232323232, + "machine_learning": 0.22522522522522523, + "security_studies": 0.36065573770491804, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.24877250409165302, + "prehistory": 0.29102167182662536, + "anatomy": 0.2537313432835821, + "human_sexuality": 0.3153846153846154, + "college_medicine": 0.23255813953488372, + "high_school_government_and_politics": 0.3541666666666667, + "college_chemistry": 0.2222222222222222, + "logical_fallacies": 0.22839506172839505, + "high_school_geography": 0.34517766497461927, + "elementary_mathematics": 0.2546419098143236, + "human_aging": 0.19369369369369369, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.3272058823529412, + "formal_logic": 0.296, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.25833333333333336, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.23232323232323232, + "conceptual_physics": 0.26495726495726496, + "miscellaneous": 0.26342710997442453, + "high_school_chemistry": 0.25742574257425743, + "marketing": 0.2832618025751073, + "professional_law": 0.26353555120678407, + "management": 0.35294117647058826, + "college_physics": 0.1782178217821782, + "jurisprudence": 0.2523364485981308, + "world_religions": 0.2823529411764706, + "sociology": 0.28, + "us_foreign_policy": 0.24242424242424243, + "high_school_macroeconomics": 0.30848329048843187, + "computer_security": 0.2828282828282828, + "moral_scenarios": 0.2225950782997763, + "moral_disputes": 0.26666666666666666, + "electrical_engineering": 0.2847222222222222, + "astronomy": 0.33774834437086093, + "college_biology": 0.2727272727272727 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24888558692421991 + }, + "prompt_2": { + "accuracy": 0.24219910846953938 + }, + "prompt_3": { + "accuracy": 0.24219910846953938 + }, + "prompt_4": { + "accuracy": 0.23402674591381872 + }, + "prompt_5": { + "accuracy": 0.24962852897473997 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24346201743462018, + "category_acc": { + "computer_network": 0.041666666666666664, + "operating_system": 0.375, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.19047619047619047, + "college_physics": 0.08333333333333333, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.11538461538461539, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.14285714285714285, + "college_economics": 0.2833333333333333, + "business_administration": 0.3157894736842105, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.2222222222222222, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.10714285714285714, + "high_school_chinese": 0.125, + "high_school_history": 0.36, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.25, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.375, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.16666666666666666, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2222222222222222, + "physician": 0.25925925925925924 + } + }, + "prompt_2": { + "accuracy": 0.25093399750934, + "category_acc": { + "computer_network": 0.08333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.16666666666666666, + "college_physics": 0.125, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.20689655172413793, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.25, + "business_administration": 0.23684210526315788, + "marxism": 0.25, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.125, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.125, + "logic": 0.18518518518518517, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.07142857142857142, + "high_school_chinese": 0.125, + "high_school_history": 0.32, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.19230769230769232, + "sports_science": 0.5, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.18518518518518517, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.24074074074074073, + "physician": 0.2962962962962963 + } + }, + "prompt_3": { + "accuracy": 0.24906600249066002, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.16666666666666666, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.25, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.125, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.26666666666666666, + "business_administration": 0.18421052631578946, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.25, + "high_school_geography": 0.125, + "middle_school_politics": 0.15384615384615385, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.18518518518518517, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.125, + "high_school_history": 0.36, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.2692307692307692, + "sports_science": 0.3333333333333333, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.2037037037037037, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2222222222222222, + "physician": 0.2222222222222222 + } + }, + "prompt_4": { + "accuracy": 0.2465753424657534, + "category_acc": { + "computer_network": 0.08333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.19047619047619047, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.375, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.08, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.25, + "business_administration": 0.2631578947368421, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.27586206896551724, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.1836734693877551, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.14814814814814814, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.17647058823529413, + "legal_professional": 0.03571428571428571, + "high_school_chinese": 0.125, + "high_school_history": 0.44, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.19230769230769232, + "sports_science": 0.16666666666666666, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.25, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.2037037037037037, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.2777777777777778, + "physician": 0.24074074074074073 + } + }, + "prompt_5": { + "accuracy": 0.24719800747198006, + "category_acc": { + "computer_network": 0.08333333333333333, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.23809523809523808, + "college_physics": 0.08333333333333333, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.13793103448275862, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.25, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.3684210526315789, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.14814814814814814, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.10714285714285714, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.4, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.21153846153846154, + "sports_science": 0.16666666666666666, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.18518518518518517, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2222222222222222, + "physician": 0.2777777777777778 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2903225806451613 + }, + "prompt_2": { + "accuracy": 0.2903225806451613 + }, + "prompt_3": { + "accuracy": 0.31899641577060933 + }, + "prompt_4": { + "accuracy": 0.26523297491039427 + }, + "prompt_5": { + "accuracy": 0.2724014336917563 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2562597133482991, + "category_acc": { + "agronomy": 0.23076923076923078, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.25, + "arts": 0.25625, + "astronomy": 0.2545454545454545, + "business_ethics": 0.2535885167464115, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.2824427480916031, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.24074074074074073, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.25274725274725274, + "computer_science": 0.2696078431372549, + "computer_security": 0.23391812865497075, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.2589928057553957, + "economics": 0.25157232704402516, + "education": 0.25766871165644173, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.28695652173913044, + "ethnology": 0.26666666666666666, + "food_science": 0.27972027972027974, + "genetics": 0.2556818181818182, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.23636363636363636, + "high_school_politics": 0.23776223776223776, + "human_sexuality": 0.23809523809523808, + "international_law": 0.22702702702702704, + "journalism": 0.23837209302325582, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.2850467289719626, + "logical": 0.2845528455284553, + "machine_learning": 0.22950819672131148, + "management": 0.2571428571428571, + "marketing": 0.2111111111111111, + "marxist_theory": 0.25396825396825395, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.27586206896551724, + "philosophy": 0.24761904761904763, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.2632978723404255, + "professional_psychology": 0.27155172413793105, + "public_relations": 0.28160919540229884, + "security_study": 0.2814814814814815, + "sociology": 0.23893805309734514, + "sports_science": 0.23030303030303031, + "traditional_chinese_medicine": 0.20540540540540542, + "virology": 0.27218934911242604, + "world_history": 0.21739130434782608, + "world_religions": 0.3 + } + }, + "prompt_2": { + "accuracy": 0.25979968917285445, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2621951219512195, + "arts": 0.28125, + "astronomy": 0.24848484848484848, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.2125, + "chinese_driving_rule": 0.24427480916030533, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.2631578947368421, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.2569832402234637, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.2616822429906542, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.17592592592592593, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.21611721611721613, + "computer_science": 0.2549019607843137, + "computer_security": 0.2222222222222222, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.26618705035971224, + "economics": 0.2830188679245283, + "education": 0.26993865030674846, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.26666666666666666, + "food_science": 0.2517482517482518, + "genetics": 0.2840909090909091, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.25, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.24475524475524477, + "human_sexuality": 0.2698412698412698, + "international_law": 0.22702702702702704, + "journalism": 0.2616279069767442, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.2850467289719626, + "logical": 0.23577235772357724, + "machine_learning": 0.2459016393442623, + "management": 0.24761904761904763, + "marketing": 0.25555555555555554, + "marxist_theory": 0.24338624338624337, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.2689655172413793, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.26861702127659576, + "professional_psychology": 0.2801724137931034, + "public_relations": 0.2988505747126437, + "security_study": 0.28888888888888886, + "sociology": 0.25663716814159293, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.3076923076923077, + "world_history": 0.2422360248447205, + "world_religions": 0.2375 + } + }, + "prompt_3": { + "accuracy": 0.25720946295976516, + "category_acc": { + "agronomy": 0.22485207100591717, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.23780487804878048, + "arts": 0.25, + "astronomy": 0.23030303030303031, + "business_ethics": 0.27751196172248804, + "chinese_civil_service_exam": 0.21875, + "chinese_driving_rule": 0.25190839694656486, + "chinese_food_culture": 0.22794117647058823, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.2693498452012384, + "chinese_literature": 0.27450980392156865, + "chinese_teacher_qualification": 0.2737430167597765, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.19444444444444445, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.2271062271062271, + "computer_science": 0.2696078431372549, + "computer_security": 0.23976608187134502, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.23741007194244604, + "economics": 0.25157232704402516, + "education": 0.25766871165644173, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.25757575757575757, + "elementary_information_and_technology": 0.2773109243697479, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.2518518518518518, + "food_science": 0.3006993006993007, + "genetics": 0.25, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.22033898305084745, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.23776223776223776, + "human_sexuality": 0.2777777777777778, + "international_law": 0.2648648648648649, + "journalism": 0.2441860465116279, + "jurisprudence": 0.26277372262773724, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.23577235772357724, + "machine_learning": 0.27049180327868855, + "management": 0.24761904761904763, + "marketing": 0.23333333333333334, + "marxist_theory": 0.2222222222222222, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.2620689655172414, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.26436781609195403, + "security_study": 0.28888888888888886, + "sociology": 0.25663716814159293, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.24324324324324326, + "virology": 0.30177514792899407, + "world_history": 0.2546583850931677, + "world_religions": 0.24375 + } + }, + "prompt_4": { + "accuracy": 0.25807287169746157, + "category_acc": { + "agronomy": 0.22485207100591717, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2682926829268293, + "arts": 0.30625, + "astronomy": 0.3090909090909091, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.2366412213740458, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.2693498452012384, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.24581005586592178, + "clinical_knowledge": 0.2109704641350211, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.3177570093457944, + "college_engineering_hydrology": 0.1792452830188679, + "college_law": 0.28703703703703703, + "college_mathematics": 0.17142857142857143, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.23809523809523808, + "computer_science": 0.23039215686274508, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.20863309352517986, + "economics": 0.2389937106918239, + "education": 0.27607361963190186, + "electrical_engineering": 0.23837209302325582, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.23703703703703705, + "food_science": 0.2937062937062937, + "genetics": 0.2784090909090909, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.21893491124260356, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.23015873015873015, + "international_law": 0.25405405405405407, + "journalism": 0.2616279069767442, + "jurisprudence": 0.2895377128953771, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.24390243902439024, + "machine_learning": 0.29508196721311475, + "management": 0.26666666666666666, + "marketing": 0.23333333333333334, + "marxist_theory": 0.291005291005291, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.21379310344827587, + "philosophy": 0.3047619047619048, + "professional_accounting": 0.32571428571428573, + "professional_law": 0.24644549763033174, + "professional_medicine": 0.2765957446808511, + "professional_psychology": 0.28448275862068967, + "public_relations": 0.28735632183908044, + "security_study": 0.2518518518518518, + "sociology": 0.22123893805309736, + "sports_science": 0.24848484848484848, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.22485207100591717, + "world_history": 0.22981366459627328, + "world_religions": 0.29375 + } + }, + "prompt_5": { + "accuracy": 0.2542738732515973, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.23648648648648649, + "ancient_chinese": 0.25, + "arts": 0.25625, + "astronomy": 0.2545454545454545, + "business_ethics": 0.22488038277511962, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.2748091603053435, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.24458204334365324, + "chinese_literature": 0.27450980392156865, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.24299065420560748, + "college_engineering_hydrology": 0.2358490566037736, + "college_law": 0.2222222222222222, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.23443223443223443, + "computer_science": 0.25980392156862747, + "computer_security": 0.2222222222222222, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.302158273381295, + "economics": 0.24528301886792453, + "education": 0.26993865030674846, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.27380952380952384, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.226890756302521, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.22962962962962963, + "food_science": 0.26573426573426573, + "genetics": 0.26704545454545453, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.2803030303030303, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.23636363636363636, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.23809523809523808, + "international_law": 0.24864864864864866, + "journalism": 0.2441860465116279, + "jurisprudence": 0.25060827250608275, + "legal_and_moral_basis": 0.27102803738317754, + "logical": 0.24390243902439024, + "machine_learning": 0.27049180327868855, + "management": 0.23809523809523808, + "marketing": 0.23333333333333334, + "marxist_theory": 0.23809523809523808, + "modern_chinese": 0.25, + "nutrition": 0.27586206896551724, + "philosophy": 0.2571428571428571, + "professional_accounting": 0.2571428571428571, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.2526595744680851, + "professional_psychology": 0.3017241379310345, + "public_relations": 0.3045977011494253, + "security_study": 0.2962962962962963, + "sociology": 0.252212389380531, + "sports_science": 0.22424242424242424, + "traditional_chinese_medicine": 0.21081081081081082, + "virology": 0.24260355029585798, + "world_history": 0.22981366459627328, + "world_religions": 0.28125 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.24242424242424243 + }, + "prompt_3": { + "accuracy": 0.21212121212121213 + }, + "prompt_4": { + "accuracy": 0.15151515151515152 + }, + "prompt_5": { + "accuracy": 0.18181818181818182 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.17272727272727273 + }, + "prompt_2": { + "accuracy": 0.11818181818181818 + }, + "prompt_3": { + "accuracy": 0.11136363636363636 + }, + "prompt_4": { + "accuracy": 0.14545454545454545 + }, + "prompt_5": { + "accuracy": 0.16363636363636364 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3227118644067797 + }, + "prompt_2": { + "accuracy": 0.3264406779661017 + }, + "prompt_3": { + "accuracy": 0.3325423728813559 + }, + "prompt_4": { + "accuracy": 0.3345762711864407 + }, + "prompt_5": { + "accuracy": 0.31966101694915255 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.281226626776365 + }, + "prompt_2": { + "accuracy": 0.2737471952131638 + }, + "prompt_3": { + "accuracy": 0.2763649962602842 + }, + "prompt_4": { + "accuracy": 0.27599102468212416 + }, + "prompt_5": { + "accuracy": 0.2894540014958863 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3258206761391475 + }, + "prompt_2": { + "accuracy": 0.32533072023517884 + }, + "prompt_3": { + "accuracy": 0.32435080842724157 + }, + "prompt_4": { + "accuracy": 0.3375796178343949 + }, + "prompt_5": { + "accuracy": 0.33219010289073986 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.21071721731902832, + "rouge2": 0.06263208088632324, + "rougeL": 0.15873185260050474, + "avg_rouge": 0.14402705026861876 + }, + "prompt_2": { + "rouge1": 0.17477087124858154, + "rouge2": 0.05161058151166577, + "rougeL": 0.13780980003856205, + "avg_rouge": 0.12139708426626979 + }, + "prompt_3": { + "rouge1": 0.17836046630353639, + "rouge2": 0.05234500957536136, + "rougeL": 0.1420107915086774, + "avg_rouge": 0.12423875579585837 + }, + "prompt_4": { + "rouge1": 0.20612700441253587, + "rouge2": 0.06196281775998205, + "rougeL": 0.157844152573879, + "avg_rouge": 0.1419779915821323 + }, + "prompt_5": { + "rouge1": 0.19232902309680353, + "rouge2": 0.056889868658432464, + "rougeL": 0.1471035576604194, + "avg_rouge": 0.1321074831385518 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2095880648481376, + "rouge2": 0.05886625847491587, + "rougeL": 0.15351111164357276, + "avg_rouge": 0.1406551449888754 + }, + "prompt_2": { + "rouge1": 0.21071351003968464, + "rouge2": 0.05844581658785653, + "rougeL": 0.15474814840633067, + "avg_rouge": 0.14130249167795728 + }, + "prompt_3": { + "rouge1": 0.20990935287930465, + "rouge2": 0.05867884234976881, + "rougeL": 0.15469198105008658, + "avg_rouge": 0.14109339209305335 + }, + "prompt_4": { + "rouge1": 0.20494868093526838, + "rouge2": 0.058064620767554594, + "rougeL": 0.14976318600375157, + "avg_rouge": 0.1375921625688582 + }, + "prompt_5": { + "rouge1": 0.20954452685307218, + "rouge2": 0.057658269430444185, + "rougeL": 0.15468782985154383, + "avg_rouge": 0.14063020871168672 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5034403669724771 + }, + "prompt_2": { + "accuracy": 0.4805045871559633 + }, + "prompt_3": { + "accuracy": 0.4988532110091743 + }, + "prompt_4": { + "accuracy": 0.5011467889908257 + }, + "prompt_5": { + "accuracy": 0.5194954128440367 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5043144774688398 + }, + "prompt_2": { + "accuracy": 0.5349952061361457 + }, + "prompt_3": { + "accuracy": 0.5129434324065196 + }, + "prompt_4": { + "accuracy": 0.5043144774688398 + }, + "prompt_5": { + "accuracy": 0.5043144774688398 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.501 + }, + "prompt_2": { + "accuracy": 0.5135 + }, + "prompt_3": { + "accuracy": 0.54 + }, + "prompt_4": { + "accuracy": 0.497 + }, + "prompt_5": { + "accuracy": 0.5055 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3285 + }, + "prompt_2": { + "accuracy": 0.3205 + }, + "prompt_3": { + "accuracy": 0.32 + }, + "prompt_4": { + "accuracy": 0.336 + }, + "prompt_5": { + "accuracy": 0.3205 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5095 + }, + "prompt_2": { + "accuracy": 0.5 + }, + "prompt_3": { + "accuracy": 0.4975 + }, + "prompt_4": { + "accuracy": 0.4885 + }, + "prompt_5": { + "accuracy": 0.492 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5211267605633803 + }, + "prompt_2": { + "accuracy": 0.5352112676056338 + }, + "prompt_3": { + "accuracy": 0.5070422535211268 + }, + "prompt_4": { + "accuracy": 0.5211267605633803 + }, + "prompt_5": { + "accuracy": 0.5211267605633803 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4981949458483754 + }, + "prompt_2": { + "accuracy": 0.48014440433212996 + }, + "prompt_3": { + "accuracy": 0.49097472924187724 + }, + "prompt_4": { + "accuracy": 0.49097472924187724 + }, + "prompt_5": { + "accuracy": 0.48375451263537905 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.45588235294117646 + }, + "prompt_2": { + "accuracy": 0.4387254901960784 + }, + "prompt_3": { + "accuracy": 0.47058823529411764 + }, + "prompt_4": { + "accuracy": 0.3480392156862745 + }, + "prompt_5": { + "accuracy": 0.46568627450980393 + } } }, "five_shot": { @@ -11834,53 +102384,1733 @@ "model_link": "https://huggingface.co/aisingapore/sealion7b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.24190476190476187, + "language_acc": { + "English": 0.24, + "Vietnamese": 0.2, + "Malay": 0.23333333333333334, + "Indonesian": 0.24, + "Spanish": 0.26666666666666666, + "Chinese": 0.22666666666666666, + "Filipino": 0.2866666666666667 + }, + "consistency_score_2": 0.5847619047619048, + "consistency_score_3": 0.418857142857143, + "consistency_score_4": 0.3253333333333334, + "consistency_score_5": 0.26412698412698404, + "consistency_score_6": 0.2219047619047619, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.6666666666666666, + "English,Malay": 0.5933333333333334, + "English,Indonesian": 0.6133333333333333, + "English,Spanish": 0.66, + "English,Chinese": 0.4533333333333333, + "English,Filipino": 0.43333333333333335, + "Vietnamese,Malay": 0.5866666666666667, + "Vietnamese,Indonesian": 0.6933333333333334, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Filipino": 0.5133333333333333, + "Malay,Indonesian": 0.8, + "Malay,Spanish": 0.6066666666666667, + "Malay,Chinese": 0.52, + "Malay,Filipino": 0.56, + "Indonesian,Spanish": 0.6733333333333333, + "Indonesian,Chinese": 0.5466666666666666, + "Indonesian,Filipino": 0.6266666666666667, + "Spanish,Chinese": 0.47333333333333333, + "Spanish,Filipino": 0.56, + "Chinese,Filipino": 0.5333333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.4533333333333333, + "English,Vietnamese,Indonesian": 0.5133333333333333, + "English,Vietnamese,Spanish": 0.52, + "English,Vietnamese,Chinese": 0.36666666666666664, + "English,Vietnamese,Filipino": 0.34, + "English,Malay,Indonesian": 0.52, + "English,Malay,Spanish": 0.4666666666666667, + "English,Malay,Chinese": 0.3333333333333333, + "English,Malay,Filipino": 0.34, + "English,Indonesian,Spanish": 0.5, + "English,Indonesian,Chinese": 0.36666666666666664, + "English,Indonesian,Filipino": 0.38, + "English,Spanish,Chinese": 0.36, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Chinese,Filipino": 0.29333333333333333, + "Vietnamese,Malay,Indonesian": 0.56, + "Vietnamese,Malay,Spanish": 0.4666666666666667, + "Vietnamese,Malay,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian,Spanish": 0.5333333333333333, + "Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Filipino": 0.4666666666666667, + "Vietnamese,Spanish,Chinese": 0.37333333333333335, + "Vietnamese,Spanish,Filipino": 0.41333333333333333, + "Vietnamese,Chinese,Filipino": 0.3466666666666667, + "Malay,Indonesian,Spanish": 0.56, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Indonesian,Filipino": 0.5066666666666667, + "Malay,Spanish,Chinese": 0.3466666666666667, + "Malay,Spanish,Filipino": 0.41333333333333333, + "Malay,Chinese,Filipino": 0.36, + "Indonesian,Spanish,Chinese": 0.38666666666666666, + "Indonesian,Spanish,Filipino": 0.46, + "Indonesian,Chinese,Filipino": 0.4, + "Spanish,Chinese,Filipino": 0.32666666666666666 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.4266666666666667, + "English,Vietnamese,Malay,Spanish": 0.38666666666666666, + "English,Vietnamese,Malay,Chinese": 0.2866666666666667, + "English,Vietnamese,Malay,Filipino": 0.28, + "English,Vietnamese,Indonesian,Spanish": 0.4266666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.32666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.32, + "English,Vietnamese,Spanish,Chinese": 0.32, + "English,Vietnamese,Spanish,Filipino": 0.30666666666666664, + "English,Vietnamese,Chinese,Filipino": 0.25333333333333335, + "English,Malay,Indonesian,Spanish": 0.4266666666666667, + "English,Malay,Indonesian,Chinese": 0.32, + "English,Malay,Indonesian,Filipino": 0.3333333333333333, + "English,Malay,Spanish,Chinese": 0.2866666666666667, + "English,Malay,Spanish,Filipino": 0.3, + "English,Malay,Chinese,Filipino": 0.24, + "English,Indonesian,Spanish,Chinese": 0.31333333333333335, + "English,Indonesian,Spanish,Filipino": 0.32666666666666666, + "English,Indonesian,Chinese,Filipino": 0.2733333333333333, + "English,Spanish,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.44, + "Vietnamese,Malay,Indonesian,Chinese": 0.3466666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.38666666666666666, + "Vietnamese,Malay,Spanish,Chinese": 0.28, + "Vietnamese,Malay,Spanish,Filipino": 0.34, + "Vietnamese,Malay,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish,Chinese": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese,Filipino": 0.2733333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.3333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.4, + "Malay,Indonesian,Chinese,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese,Filipino": 0.25333333333333335, + "Indonesian,Spanish,Chinese,Filipino": 0.29333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.36, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.28, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.2733333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.25333333333333335, + "English,Vietnamese,Malay,Spanish,Filipino": 0.26, + "English,Vietnamese,Malay,Chinese,Filipino": 0.21333333333333335, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.2866666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.24666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Indonesian,Spanish,Chinese": 0.2733333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.29333333333333333, + "English,Malay,Indonesian,Chinese,Filipino": 0.24, + "English,Malay,Spanish,Chinese,Filipino": 0.20666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.26, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.24666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.21333333333333335, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.22, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.22 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.19333333333333333 + } + }, + "AC3_2": 0.34223392578699663, + "AC3_3": 0.306686982616354, + "AC3_4": 0.2774837698190019, + "AC3_5": 0.2525279320765228, + "AC3_6": 0.23147355035588157, + "AC3_7": 0.2149088256253127 + }, + "prompt_2": { + "overall_acc": 0.23904761904761904, + "language_acc": { + "English": 0.24666666666666667, + "Vietnamese": 0.31333333333333335, + "Malay": 0.22, + "Indonesian": 0.19333333333333333, + "Spanish": 0.26, + "Chinese": 0.2, + "Filipino": 0.24 + }, + "consistency_score_2": 0.4520634920634921, + "consistency_score_3": 0.2531428571428571, + "consistency_score_4": 0.16266666666666665, + "consistency_score_5": 0.11492063492063491, + "consistency_score_6": 0.08666666666666667, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38666666666666666, + "English,Malay": 0.4533333333333333, + "English,Indonesian": 0.4666666666666667, + "English,Spanish": 0.5, + "English,Chinese": 0.44, + "English,Filipino": 0.4666666666666667, + "Vietnamese,Malay": 0.49333333333333335, + "Vietnamese,Indonesian": 0.44666666666666666, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Chinese": 0.36666666666666664, + "Vietnamese,Filipino": 0.4533333333333333, + "Malay,Indonesian": 0.56, + "Malay,Spanish": 0.44, + "Malay,Chinese": 0.4, + "Malay,Filipino": 0.52, + "Indonesian,Spanish": 0.46, + "Indonesian,Chinese": 0.4066666666666667, + "Indonesian,Filipino": 0.5, + "Spanish,Chinese": 0.42, + "Spanish,Filipino": 0.42, + "Chinese,Filipino": 0.3933333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.23333333333333334, + "English,Vietnamese,Indonesian": 0.22666666666666666, + "English,Vietnamese,Spanish": 0.28, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Vietnamese,Filipino": 0.24, + "English,Malay,Indonesian": 0.3, + "English,Malay,Spanish": 0.2733333333333333, + "English,Malay,Chinese": 0.22, + "English,Malay,Filipino": 0.28, + "English,Indonesian,Spanish": 0.28, + "English,Indonesian,Chinese": 0.22, + "English,Indonesian,Filipino": 0.28, + "English,Spanish,Chinese": 0.24666666666666667, + "English,Spanish,Filipino": 0.26666666666666666, + "English,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Malay,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese": 0.21333333333333335, + "Vietnamese,Malay,Filipino": 0.29333333333333333, + "Vietnamese,Indonesian,Spanish": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Indonesian,Filipino": 0.28, + "Vietnamese,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Spanish,Filipino": 0.2733333333333333, + "Vietnamese,Chinese,Filipino": 0.18666666666666668, + "Malay,Indonesian,Spanish": 0.28, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.3333333333333333, + "Malay,Spanish,Chinese": 0.22, + "Malay,Spanish,Filipino": 0.2733333333333333, + "Malay,Chinese,Filipino": 0.22666666666666666, + "Indonesian,Spanish,Chinese": 0.22, + "Indonesian,Spanish,Filipino": 0.26666666666666666, + "Indonesian,Chinese,Filipino": 0.22666666666666666, + "Spanish,Chinese,Filipino": 0.22 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.16, + "English,Vietnamese,Malay,Spanish": 0.18, + "English,Vietnamese,Malay,Chinese": 0.13333333333333333, + "English,Vietnamese,Malay,Filipino": 0.16, + "English,Vietnamese,Indonesian,Spanish": 0.19333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.12, + "English,Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.18, + "English,Vietnamese,Chinese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Spanish": 0.2, + "English,Malay,Indonesian,Chinese": 0.16, + "English,Malay,Indonesian,Filipino": 0.2, + "English,Malay,Spanish,Chinese": 0.16, + "English,Malay,Spanish,Filipino": 0.17333333333333334, + "English,Malay,Chinese,Filipino": 0.14666666666666667, + "English,Indonesian,Spanish,Chinese": 0.14666666666666667, + "English,Indonesian,Spanish,Filipino": 0.18, + "English,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Spanish,Chinese,Filipino": 0.16, + "Vietnamese,Malay,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Spanish,Chinese": 0.15333333333333332, + "Vietnamese,Malay,Spanish,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Indonesian,Spanish,Chinese": 0.16, + "Vietnamese,Indonesian,Spanish,Filipino": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Spanish,Chinese,Filipino": 0.12666666666666668, + "Malay,Indonesian,Spanish,Chinese": 0.14666666666666667, + "Malay,Indonesian,Spanish,Filipino": 0.19333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.17333333333333334, + "Malay,Spanish,Chinese,Filipino": 0.14666666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.14, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.1, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.12, + "English,Vietnamese,Malay,Chinese,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.14666666666666667, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.08666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.09333333333333334, + "English,Malay,Indonesian,Spanish,Chinese": 0.12, + "English,Malay,Indonesian,Spanish,Filipino": 0.13333333333333333, + "English,Malay,Indonesian,Chinese,Filipino": 0.12, + "English,Malay,Spanish,Chinese,Filipino": 0.11333333333333333, + "English,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.12, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.1, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.10666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.10666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.07333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.08, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667 + } + }, + "AC3_2": 0.31272743176348644, + "AC3_3": 0.24589340994561815, + "AC3_4": 0.1935957009159296, + "AC3_5": 0.15522015797451286, + "AC3_6": 0.1272124755944718, + "AC3_7": 0.10425752852249104 + }, + "prompt_3": { + "overall_acc": 0.2285714285714286, + "language_acc": { + "English": 0.22666666666666666, + "Vietnamese": 0.25333333333333335, + "Malay": 0.21333333333333335, + "Indonesian": 0.21333333333333335, + "Spanish": 0.28, + "Chinese": 0.18666666666666668, + "Filipino": 0.22666666666666666 + }, + "consistency_score_2": 0.3920634920634921, + "consistency_score_3": 0.18171428571428572, + "consistency_score_4": 0.0939047619047619, + "consistency_score_5": 0.05269841269841269, + "consistency_score_6": 0.03142857142857143, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.48, + "English,Malay": 0.36666666666666664, + "English,Indonesian": 0.38666666666666666, + "English,Spanish": 0.44, + "English,Chinese": 0.42, + "English,Filipino": 0.2733333333333333, + "Vietnamese,Malay": 0.38666666666666666, + "Vietnamese,Indonesian": 0.43333333333333335, + "Vietnamese,Spanish": 0.41333333333333333, + "Vietnamese,Chinese": 0.38, + "Vietnamese,Filipino": 0.32666666666666666, + "Malay,Indonesian": 0.5266666666666666, + "Malay,Spanish": 0.44, + "Malay,Chinese": 0.38, + "Malay,Filipino": 0.4, + "Indonesian,Spanish": 0.44, + "Indonesian,Chinese": 0.38666666666666666, + "Indonesian,Filipino": 0.31333333333333335, + "Spanish,Chinese": 0.4, + "Spanish,Filipino": 0.28, + "Chinese,Filipino": 0.36 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.19333333333333333, + "English,Vietnamese,Indonesian": 0.24, + "English,Vietnamese,Spanish": 0.24666666666666667, + "English,Vietnamese,Chinese": 0.21333333333333335, + "English,Vietnamese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian": 0.21333333333333335, + "English,Malay,Spanish": 0.18666666666666668, + "English,Malay,Chinese": 0.19333333333333333, + "English,Malay,Filipino": 0.15333333333333332, + "English,Indonesian,Spanish": 0.21333333333333335, + "English,Indonesian,Chinese": 0.18, + "English,Indonesian,Filipino": 0.1, + "English,Spanish,Chinese": 0.21333333333333335, + "English,Spanish,Filipino": 0.11333333333333333, + "English,Chinese,Filipino": 0.13333333333333333, + "Vietnamese,Malay,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Spanish": 0.2, + "Vietnamese,Malay,Chinese": 0.18666666666666668, + "Vietnamese,Malay,Filipino": 0.14666666666666667, + "Vietnamese,Indonesian,Spanish": 0.22, + "Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Indonesian,Filipino": 0.13333333333333333, + "Vietnamese,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Spanish,Filipino": 0.12666666666666668, + "Vietnamese,Chinese,Filipino": 0.14, + "Malay,Indonesian,Spanish": 0.2733333333333333, + "Malay,Indonesian,Chinese": 0.22, + "Malay,Indonesian,Filipino": 0.19333333333333333, + "Malay,Spanish,Chinese": 0.21333333333333335, + "Malay,Spanish,Filipino": 0.17333333333333334, + "Malay,Chinese,Filipino": 0.17333333333333334, + "Indonesian,Spanish,Chinese": 0.21333333333333335, + "Indonesian,Spanish,Filipino": 0.14, + "Indonesian,Chinese,Filipino": 0.13333333333333333, + "Spanish,Chinese,Filipino": 0.13333333333333333 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.14, + "English,Vietnamese,Malay,Spanish": 0.10666666666666667, + "English,Vietnamese,Malay,Chinese": 0.10666666666666667, + "English,Vietnamese,Malay,Filipino": 0.06, + "English,Vietnamese,Indonesian,Spanish": 0.14, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.06, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Chinese,Filipino": 0.06666666666666667, + "English,Malay,Indonesian,Spanish": 0.12, + "English,Malay,Indonesian,Chinese": 0.10666666666666667, + "English,Malay,Indonesian,Filipino": 0.06666666666666667, + "English,Malay,Spanish,Chinese": 0.11333333333333333, + "English,Malay,Spanish,Filipino": 0.08666666666666667, + "English,Malay,Chinese,Filipino": 0.08666666666666667, + "English,Indonesian,Spanish,Chinese": 0.12666666666666668, + "English,Indonesian,Spanish,Filipino": 0.06, + "English,Indonesian,Chinese,Filipino": 0.04666666666666667, + "English,Spanish,Chinese,Filipino": 0.07333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.11333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.08666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.07333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.07333333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.06, + "Vietnamese,Spanish,Chinese,Filipino": 0.06, + "Malay,Indonesian,Spanish,Chinese": 0.14666666666666667, + "Malay,Indonesian,Spanish,Filipino": 0.10666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.08, + "Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "Indonesian,Spanish,Chinese,Filipino": 0.08 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.08, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.03333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.03333333333333333, + "English,Vietnamese,Malay,Chinese,Filipino": 0.04, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.03333333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Malay,Indonesian,Spanish,Chinese": 0.08, + "English,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.03333333333333333, + "English,Malay,Spanish,Chinese,Filipino": 0.05333333333333334, + "English,Indonesian,Spanish,Chinese,Filipino": 0.04, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.08666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.06, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.02, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.02, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02 + } + }, + "AC3_2": 0.2887833393761953, + "AC3_3": 0.2024671706629482, + "AC3_4": 0.13311956792766733, + "AC3_5": 0.08564979035974558, + "AC3_6": 0.05525902666634465, + "AC3_7": 0.0367816091806051 + }, + "prompt_4": { + "overall_acc": 0.24666666666666665, + "language_acc": { + "English": 0.28, + "Vietnamese": 0.22, + "Malay": 0.24, + "Indonesian": 0.24, + "Spanish": 0.25333333333333335, + "Chinese": 0.22666666666666666, + "Filipino": 0.26666666666666666 + }, + "consistency_score_2": 0.5425396825396825, + "consistency_score_3": 0.35790476190476195, + "consistency_score_4": 0.25695238095238093, + "consistency_score_5": 0.19460317460317458, + "consistency_score_6": 0.1542857142857143, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5133333333333333, + "English,Malay": 0.49333333333333335, + "English,Indonesian": 0.5066666666666667, + "English,Spanish": 0.48, + "English,Chinese": 0.42, + "English,Filipino": 0.49333333333333335, + "Vietnamese,Malay": 0.6666666666666666, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Spanish": 0.72, + "Vietnamese,Chinese": 0.4866666666666667, + "Vietnamese,Filipino": 0.5266666666666666, + "Malay,Indonesian": 0.72, + "Malay,Spanish": 0.6, + "Malay,Chinese": 0.5066666666666667, + "Malay,Filipino": 0.5933333333333334, + "Indonesian,Spanish": 0.48, + "Indonesian,Chinese": 0.5066666666666667, + "Indonesian,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.4066666666666667, + "Spanish,Filipino": 0.5266666666666666, + "Chinese,Filipino": 0.5933333333333334 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.36666666666666664, + "English,Vietnamese,Indonesian": 0.36666666666666664, + "English,Vietnamese,Spanish": 0.4, + "English,Vietnamese,Chinese": 0.26, + "English,Vietnamese,Filipino": 0.32, + "English,Malay,Indonesian": 0.38666666666666666, + "English,Malay,Spanish": 0.34, + "English,Malay,Chinese": 0.24666666666666667, + "English,Malay,Filipino": 0.34, + "English,Indonesian,Spanish": 0.29333333333333333, + "English,Indonesian,Chinese": 0.2733333333333333, + "English,Indonesian,Filipino": 0.31333333333333335, + "English,Spanish,Chinese": 0.23333333333333334, + "English,Spanish,Filipino": 0.31333333333333335, + "English,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian": 0.52, + "Vietnamese,Malay,Spanish": 0.5133333333333333, + "Vietnamese,Malay,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Filipino": 0.42, + "Vietnamese,Indonesian,Spanish": 0.44, + "Vietnamese,Indonesian,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Filipino": 0.38, + "Vietnamese,Spanish,Chinese": 0.35333333333333333, + "Vietnamese,Spanish,Filipino": 0.4, + "Vietnamese,Chinese,Filipino": 0.37333333333333335, + "Malay,Indonesian,Spanish": 0.43333333333333335, + "Malay,Indonesian,Chinese": 0.4, + "Malay,Indonesian,Filipino": 0.44666666666666666, + "Malay,Spanish,Chinese": 0.3, + "Malay,Spanish,Filipino": 0.4, + "Malay,Chinese,Filipino": 0.4066666666666667, + "Indonesian,Spanish,Chinese": 0.26666666666666666, + "Indonesian,Spanish,Filipino": 0.31333333333333335, + "Indonesian,Chinese,Filipino": 0.38, + "Spanish,Chinese,Filipino": 0.32 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.3, + "English,Vietnamese,Malay,Spanish": 0.3, + "English,Vietnamese,Malay,Chinese": 0.19333333333333333, + "English,Vietnamese,Malay,Filipino": 0.24666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.28, + "English,Vietnamese,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.24, + "English,Vietnamese,Spanish,Chinese": 0.22666666666666666, + "English,Vietnamese,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Chinese,Filipino": 0.21333333333333335, + "English,Malay,Indonesian,Spanish": 0.24666666666666667, + "English,Malay,Indonesian,Chinese": 0.21333333333333335, + "English,Malay,Indonesian,Filipino": 0.25333333333333335, + "English,Malay,Spanish,Chinese": 0.18, + "English,Malay,Spanish,Filipino": 0.24666666666666667, + "English,Malay,Chinese,Filipino": 0.21333333333333335, + "English,Indonesian,Spanish,Chinese": 0.18, + "English,Indonesian,Spanish,Filipino": 0.19333333333333333, + "English,Indonesian,Chinese,Filipino": 0.22, + "English,Spanish,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Spanish": 0.3933333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.28, + "Vietnamese,Malay,Indonesian,Filipino": 0.32, + "Vietnamese,Malay,Spanish,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese": 0.26, + "Vietnamese,Indonesian,Spanish,Filipino": 0.28, + "Vietnamese,Indonesian,Chinese,Filipino": 0.29333333333333333, + "Vietnamese,Spanish,Chinese,Filipino": 0.2866666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.23333333333333334, + "Malay,Indonesian,Spanish,Filipino": 0.2866666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.32666666666666666, + "Malay,Spanish,Chinese,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.23333333333333334 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.23333333333333334, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.16, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.19333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.18, + "English,Vietnamese,Malay,Spanish,Filipino": 0.20666666666666667, + "English,Vietnamese,Malay,Chinese,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.18, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.18, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.18, + "English,Malay,Indonesian,Spanish,Chinese": 0.14666666666666667, + "English,Malay,Indonesian,Spanish,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Chinese,Filipino": 0.18, + "English,Malay,Spanish,Chinese,Filipino": 0.16, + "English,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.26, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.22666666666666666, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.21333333333333335 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.15333333333333332, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.14, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.16, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.14666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.20666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12666666666666668 + } + }, + "AC3_2": 0.3391418610457912, + "AC3_3": 0.2920520898487732, + "AC3_4": 0.2517044880992896, + "AC3_5": 0.21756354911136747, + "AC3_6": 0.1898337291688063, + "AC3_7": 0.1673809523361182 + }, + "prompt_5": { + "overall_acc": 0.2314285714285714, + "language_acc": { + "English": 0.2733333333333333, + "Vietnamese": 0.22, + "Malay": 0.22, + "Indonesian": 0.22666666666666666, + "Spanish": 0.22, + "Chinese": 0.24, + "Filipino": 0.22 + }, + "consistency_score_2": 0.4095238095238095, + "consistency_score_3": 0.211047619047619, + "consistency_score_4": 0.11885714285714286, + "consistency_score_5": 0.06888888888888889, + "consistency_score_6": 0.039047619047619046, + "consistency_score_7": 0.02, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.49333333333333335, + "English,Malay": 0.31333333333333335, + "English,Indonesian": 0.3333333333333333, + "English,Spanish": 0.5, + "English,Chinese": 0.26666666666666666, + "English,Filipino": 0.36666666666666664, + "Vietnamese,Malay": 0.5266666666666666, + "Vietnamese,Indonesian": 0.46, + "Vietnamese,Spanish": 0.6, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.3466666666666667, + "Malay,Indonesian": 0.5333333333333333, + "Malay,Spanish": 0.4533333333333333, + "Malay,Chinese": 0.38666666666666666, + "Malay,Filipino": 0.4, + "Indonesian,Spanish": 0.37333333333333335, + "Indonesian,Chinese": 0.4066666666666667, + "Indonesian,Filipino": 0.44666666666666666, + "Spanish,Chinese": 0.31333333333333335, + "Spanish,Filipino": 0.3333333333333333, + "Chinese,Filipino": 0.37333333333333335 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.21333333333333335, + "English,Vietnamese,Indonesian": 0.21333333333333335, + "English,Vietnamese,Spanish": 0.3466666666666667, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Vietnamese,Filipino": 0.20666666666666667, + "English,Malay,Indonesian": 0.16666666666666666, + "English,Malay,Spanish": 0.2, + "English,Malay,Chinese": 0.09333333333333334, + "English,Malay,Filipino": 0.16666666666666666, + "English,Indonesian,Spanish": 0.18666666666666668, + "English,Indonesian,Chinese": 0.14, + "English,Indonesian,Filipino": 0.18, + "English,Spanish,Chinese": 0.16, + "English,Spanish,Filipino": 0.20666666666666667, + "English,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Malay,Spanish": 0.34, + "Vietnamese,Malay,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Spanish": 0.28, + "Vietnamese,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Indonesian,Filipino": 0.22, + "Vietnamese,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Spanish,Filipino": 0.23333333333333334, + "Vietnamese,Chinese,Filipino": 0.2, + "Malay,Indonesian,Spanish": 0.24, + "Malay,Indonesian,Chinese": 0.25333333333333335, + "Malay,Indonesian,Filipino": 0.26666666666666666, + "Malay,Spanish,Chinese": 0.17333333333333334, + "Malay,Spanish,Filipino": 0.18666666666666668, + "Malay,Chinese,Filipino": 0.21333333333333335, + "Indonesian,Spanish,Chinese": 0.18, + "Indonesian,Spanish,Filipino": 0.18, + "Indonesian,Chinese,Filipino": 0.24, + "Spanish,Chinese,Filipino": 0.17333333333333334 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.12666666666666668, + "English,Vietnamese,Malay,Spanish": 0.14, + "English,Vietnamese,Malay,Chinese": 0.07333333333333333, + "English,Vietnamese,Malay,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.13333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.1, + "English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.12, + "English,Vietnamese,Spanish,Filipino": 0.15333333333333332, + "English,Vietnamese,Chinese,Filipino": 0.1, + "English,Malay,Indonesian,Spanish": 0.09333333333333334, + "English,Malay,Indonesian,Chinese": 0.06, + "English,Malay,Indonesian,Filipino": 0.1, + "English,Malay,Spanish,Chinese": 0.06, + "English,Malay,Spanish,Filipino": 0.11333333333333333, + "English,Malay,Chinese,Filipino": 0.06666666666666667, + "English,Indonesian,Spanish,Chinese": 0.08666666666666667, + "English,Indonesian,Spanish,Filipino": 0.1, + "English,Indonesian,Chinese,Filipino": 0.08666666666666667, + "English,Spanish,Chinese,Filipino": 0.08666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Malay,Spanish,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.14666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese": 0.14, + "Vietnamese,Indonesian,Spanish,Filipino": 0.14, + "Vietnamese,Indonesian,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Spanish,Chinese,Filipino": 0.13333333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.11333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.16, + "Malay,Spanish,Chinese,Filipino": 0.11333333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.12 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.06666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.04, + "English,Vietnamese,Malay,Spanish,Filipino": 0.08, + "English,Vietnamese,Malay,Chinese,Filipino": 0.05333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.06, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.06666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.06666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.03333333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.05333333333333334, + "English,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.04666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.05333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.1, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.1, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.08 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.02, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02 + } + }, + "AC3_2": 0.29573338989230225, + "AC3_3": 0.22076871036151985, + "AC3_4": 0.15705429965220424, + "AC3_5": 0.106173361486845, + "AC3_6": 0.066820925528615, + "AC3_7": 0.03681818180353822 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2759740259740259, + "language_acc": { + "Vietnamese": 0.2727272727272727, + "Indonesian": 0.30113636363636365, + "Malay": 0.2840909090909091, + "English": 0.23863636363636365, + "Spanish": 0.2784090909090909, + "Filipino": 0.2784090909090909, + "Chinese": 0.2784090909090909 + }, + "consistency_score_2": 0.41260822510822515, + "consistency_score_3": 0.20974025974025978, + "consistency_score_4": 0.11931818181818184, + "consistency_score_5": 0.0725108225108225, + "consistency_score_6": 0.045454545454545456, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.5738636363636364, + "Vietnamese,Malay": 0.4318181818181818, + "Vietnamese,English": 0.5681818181818182, + "Vietnamese,Spanish": 0.4090909090909091, + "Vietnamese,Filipino": 0.30113636363636365, + "Vietnamese,Chinese": 0.4772727272727273, + "Indonesian,Malay": 0.5, + "Indonesian,English": 0.48863636363636365, + "Indonesian,Spanish": 0.4431818181818182, + "Indonesian,Filipino": 0.32386363636363635, + "Indonesian,Chinese": 0.3977272727272727, + "Malay,English": 0.3409090909090909, + "Malay,Spanish": 0.4772727272727273, + "Malay,Filipino": 0.4034090909090909, + "Malay,Chinese": 0.3522727272727273, + "English,Spanish": 0.39204545454545453, + "English,Filipino": 0.14204545454545456, + "English,Chinese": 0.5284090909090909, + "Spanish,Filipino": 0.45454545454545453, + "Spanish,Chinese": 0.38636363636363635, + "Filipino,Chinese": 0.2727272727272727 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Indonesian,English": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino": 0.19318181818181818, + "Vietnamese,Indonesian,Chinese": 0.2897727272727273, + "Vietnamese,Malay,English": 0.23863636363636365, + "Vietnamese,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Malay,Filipino": 0.16477272727272727, + "Vietnamese,Malay,Chinese": 0.23295454545454544, + "Vietnamese,English,Spanish": 0.22727272727272727, + "Vietnamese,English,Filipino": 0.09090909090909091, + "Vietnamese,English,Chinese": 0.3465909090909091, + "Vietnamese,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English": 0.23863636363636365, + "Indonesian,Malay,Spanish": 0.2727272727272727, + "Indonesian,Malay,Filipino": 0.20454545454545456, + "Indonesian,Malay,Chinese": 0.23295454545454544, + "Indonesian,English,Spanish": 0.22727272727272727, + "Indonesian,English,Filipino": 0.09090909090909091, + "Indonesian,English,Chinese": 0.2897727272727273, + "Indonesian,Spanish,Filipino": 0.23295454545454544, + "Indonesian,Spanish,Chinese": 0.20454545454545456, + "Indonesian,Filipino,Chinese": 0.13636363636363635, + "Malay,English,Spanish": 0.17045454545454544, + "Malay,English,Filipino": 0.09090909090909091, + "Malay,English,Chinese": 0.2215909090909091, + "Malay,Spanish,Filipino": 0.2784090909090909, + "Malay,Spanish,Chinese": 0.19886363636363635, + "Malay,Filipino,Chinese": 0.13068181818181818, + "English,Spanish,Filipino": 0.10795454545454546, + "English,Spanish,Chinese": 0.23295454545454544, + "English,Filipino,Chinese": 0.08522727272727272, + "Spanish,Filipino,Chinese": 0.1590909090909091 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Filipino": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,English,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,English,Filipino": 0.07386363636363637, + "Vietnamese,Indonesian,English,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Spanish,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Malay,English,Spanish": 0.10227272727272728, + "Vietnamese,Malay,English,Filipino": 0.05113636363636364, + "Vietnamese,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Malay,Spanish,Filipino": 0.13068181818181818, + "Vietnamese,Malay,Spanish,Chinese": 0.11931818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish,Filipino": 0.06818181818181818, + "Vietnamese,English,Spanish,Chinese": 0.1534090909090909, + "Vietnamese,English,Filipino,Chinese": 0.0625, + "Vietnamese,Spanish,Filipino,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Spanish": 0.125, + "Indonesian,Malay,English,Filipino": 0.0625, + "Indonesian,Malay,English,Chinese": 0.17045454545454544, + "Indonesian,Malay,Spanish,Filipino": 0.1590909090909091, + "Indonesian,Malay,Spanish,Chinese": 0.14204545454545456, + "Indonesian,Malay,Filipino,Chinese": 0.08522727272727272, + "Indonesian,English,Spanish,Filipino": 0.07954545454545454, + "Indonesian,English,Spanish,Chinese": 0.1590909090909091, + "Indonesian,English,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Spanish,Filipino,Chinese": 0.09090909090909091, + "Malay,English,Spanish,Filipino": 0.07386363636363637, + "Malay,English,Spanish,Chinese": 0.10795454545454546, + "Malay,English,Filipino,Chinese": 0.056818181818181816, + "Malay,Spanish,Filipino,Chinese": 0.10795454545454546, + "English,Spanish,Filipino,Chinese": 0.07386363636363637 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.0625, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Malay,English,Spanish,Filipino": 0.03977272727272727, + "Vietnamese,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Vietnamese,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.0625, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.05113636363636364, + "Indonesian,Malay,English,Spanish,Filipino": 0.05113636363636364, + "Indonesian,Malay,English,Spanish,Chinese": 0.09659090909090909, + "Indonesian,Malay,English,Filipino,Chinese": 0.05113636363636364, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.06818181818181818, + "Indonesian,English,Spanish,Filipino,Chinese": 0.0625, + "Malay,English,Spanish,Filipino,Chinese": 0.045454545454545456 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.03977272727272727 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908 + } + }, + "AC3_2": 0.3307350801958898, + "AC3_3": 0.23834120420122493, + "AC3_4": 0.16660444274299377, + "AC3_5": 0.11484633375747726, + "AC3_6": 0.07805325984715844, + "AC3_7": 0.05151515149822707 + }, + "prompt_2": { + "overall_acc": 0.27029220779220775, + "language_acc": { + "Vietnamese": 0.2727272727272727, + "Indonesian": 0.25, + "Malay": 0.2556818181818182, + "English": 0.2727272727272727, + "Spanish": 0.25, + "Filipino": 0.25, + "Chinese": 0.3409090909090909 + }, + "consistency_score_2": 0.3468614718614719, + "consistency_score_3": 0.13214285714285717, + "consistency_score_4": 0.05064935064935065, + "consistency_score_5": 0.017586580086580077, + "consistency_score_6": 0.004058441558441558, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4090909090909091, + "Vietnamese,Malay": 0.3125, + "Vietnamese,English": 0.4375, + "Vietnamese,Spanish": 0.36363636363636365, + "Vietnamese,Filipino": 0.25, + "Vietnamese,Chinese": 0.36363636363636365, + "Indonesian,Malay": 0.44886363636363635, + "Indonesian,English": 0.3977272727272727, + "Indonesian,Spanish": 0.4715909090909091, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,Chinese": 0.25, + "Malay,English": 0.16477272727272727, + "Malay,Spanish": 0.4943181818181818, + "Malay,Filipino": 0.5227272727272727, + "Malay,Chinese": 0.13636363636363635, + "English,Spanish": 0.3522727272727273, + "English,Filipino": 0.19318181818181818, + "English,Chinese": 0.4715909090909091, + "Spanish,Filipino": 0.4431818181818182, + "Spanish,Chinese": 0.23295454545454544, + "Filipino,Chinese": 0.1875 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.17613636363636365, + "Vietnamese,Indonesian,English": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Chinese": 0.11931818181818182, + "Vietnamese,Malay,English": 0.08522727272727272, + "Vietnamese,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Malay,Filipino": 0.14204545454545456, + "Vietnamese,Malay,Chinese": 0.05113636363636364, + "Vietnamese,English,Spanish": 0.1534090909090909, + "Vietnamese,English,Filipino": 0.07386363636363637, + "Vietnamese,English,Chinese": 0.23863636363636365, + "Vietnamese,Spanish,Filipino": 0.11363636363636363, + "Vietnamese,Spanish,Chinese": 0.09659090909090909, + "Vietnamese,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,English": 0.10795454545454546, + "Indonesian,Malay,Spanish": 0.26136363636363635, + "Indonesian,Malay,Filipino": 0.2727272727272727, + "Indonesian,Malay,Chinese": 0.0625, + "Indonesian,English,Spanish": 0.21022727272727273, + "Indonesian,English,Filipino": 0.09659090909090909, + "Indonesian,English,Chinese": 0.17045454545454544, + "Indonesian,Spanish,Filipino": 0.2159090909090909, + "Indonesian,Spanish,Chinese": 0.125, + "Indonesian,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish": 0.09659090909090909, + "Malay,English,Filipino": 0.09090909090909091, + "Malay,English,Chinese": 0.011363636363636364, + "Malay,Spanish,Filipino": 0.30113636363636365, + "Malay,Spanish,Chinese": 0.0625, + "Malay,Filipino,Chinese": 0.07386363636363637, + "English,Spanish,Filipino": 0.09659090909090909, + "English,Spanish,Chinese": 0.125, + "English,Filipino,Chinese": 0.0625, + "Spanish,Filipino,Chinese": 0.0625 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Filipino": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,English,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,English,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,English,Chinese": 0.08522727272727272, + "Vietnamese,Indonesian,Spanish,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Spanish": 0.05113636363636364, + "Vietnamese,Malay,English,Filipino": 0.045454545454545456, + "Vietnamese,Malay,English,Chinese": 0.005681818181818182, + "Vietnamese,Malay,Spanish,Filipino": 0.07386363636363637, + "Vietnamese,Malay,Spanish,Chinese": 0.022727272727272728, + "Vietnamese,Malay,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,English,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,English,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Spanish,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Malay,English,Spanish": 0.06818181818181818, + "Indonesian,Malay,English,Filipino": 0.056818181818181816, + "Indonesian,Malay,English,Chinese": 0.005681818181818182, + "Indonesian,Malay,Spanish,Filipino": 0.16477272727272727, + "Indonesian,Malay,Spanish,Chinese": 0.03409090909090909, + "Indonesian,Malay,Filipino,Chinese": 0.045454545454545456, + "Indonesian,English,Spanish,Filipino": 0.056818181818181816, + "Indonesian,English,Spanish,Chinese": 0.07954545454545454, + "Indonesian,English,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Spanish,Filipino,Chinese": 0.03977272727272727, + "Malay,English,Spanish,Filipino": 0.045454545454545456, + "Malay,English,Spanish,Chinese": 0.0, + "Malay,English,Filipino,Chinese": 0.011363636363636364, + "Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "English,Spanish,Filipino,Chinese": 0.022727272727272728 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.0, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.022727272727272728, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Malay,English,Spanish,Filipino": 0.017045454545454544, + "Vietnamese,Malay,English,Spanish,Chinese": 0.0, + "Vietnamese,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Indonesian,Malay,English,Spanish,Filipino": 0.028409090909090908, + "Indonesian,Malay,English,Spanish,Chinese": 0.0, + "Indonesian,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.022727272727272728, + "Indonesian,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Malay,English,Spanish,Filipino,Chinese": 0.0 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.0, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.0, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.0, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.3038269270788805, + "AC3_3": 0.1775053304462973, + "AC3_4": 0.08531225979250648, + "AC3_5": 0.03302442387291463, + "AC3_6": 0.007996810878511451, + "AC3_7": 0.0 + }, + "prompt_3": { + "overall_acc": 0.25243506493506496, + "language_acc": { + "Vietnamese": 0.23295454545454544, + "Indonesian": 0.26136363636363635, + "Malay": 0.23295454545454544, + "English": 0.23295454545454544, + "Spanish": 0.23863636363636365, + "Filipino": 0.2840909090909091, + "Chinese": 0.2840909090909091 + }, + "consistency_score_2": 0.34063852813852813, + "consistency_score_3": 0.13360389610389609, + "consistency_score_4": 0.05681818181818183, + "consistency_score_5": 0.024080086580086577, + "consistency_score_6": 0.008116883116883118, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.39204545454545453, + "Vietnamese,Malay": 0.25, + "Vietnamese,English": 0.5795454545454546, + "Vietnamese,Spanish": 0.3465909090909091, + "Vietnamese,Filipino": 0.24431818181818182, + "Vietnamese,Chinese": 0.48863636363636365, + "Indonesian,Malay": 0.44886363636363635, + "Indonesian,English": 0.3693181818181818, + "Indonesian,Spanish": 0.4602272727272727, + "Indonesian,Filipino": 0.3352272727272727, + "Indonesian,Chinese": 0.2897727272727273, + "Malay,English": 0.14772727272727273, + "Malay,Spanish": 0.375, + "Malay,Filipino": 0.3806818181818182, + "Malay,Chinese": 0.13068181818181818, + "English,Spanish": 0.3409090909090909, + "English,Filipino": 0.2159090909090909, + "English,Chinese": 0.5340909090909091, + "Spanish,Filipino": 0.3181818181818182, + "Spanish,Chinese": 0.3068181818181818, + "Filipino,Chinese": 0.19886363636363635 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,English": 0.26136363636363635, + "Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Indonesian,Filipino": 0.10795454545454546, + "Vietnamese,Indonesian,Chinese": 0.21022727272727273, + "Vietnamese,Malay,English": 0.09090909090909091, + "Vietnamese,Malay,Spanish": 0.125, + "Vietnamese,Malay,Filipino": 0.09090909090909091, + "Vietnamese,Malay,Chinese": 0.07954545454545454, + "Vietnamese,English,Spanish": 0.2215909090909091, + "Vietnamese,English,Filipino": 0.125, + "Vietnamese,English,Chinese": 0.3352272727272727, + "Vietnamese,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,Spanish,Chinese": 0.18181818181818182, + "Vietnamese,Filipino,Chinese": 0.09659090909090909, + "Indonesian,Malay,English": 0.10227272727272728, + "Indonesian,Malay,Spanish": 0.2215909090909091, + "Indonesian,Malay,Filipino": 0.17045454545454544, + "Indonesian,Malay,Chinese": 0.06818181818181818, + "Indonesian,English,Spanish": 0.18181818181818182, + "Indonesian,English,Filipino": 0.11931818181818182, + "Indonesian,English,Chinese": 0.20454545454545456, + "Indonesian,Spanish,Filipino": 0.1534090909090909, + "Indonesian,Spanish,Chinese": 0.1534090909090909, + "Indonesian,Filipino,Chinese": 0.09090909090909091, + "Malay,English,Spanish": 0.07954545454545454, + "Malay,English,Filipino": 0.05113636363636364, + "Malay,English,Chinese": 0.03977272727272727, + "Malay,Spanish,Filipino": 0.13636363636363635, + "Malay,Spanish,Chinese": 0.07954545454545454, + "Malay,Filipino,Chinese": 0.045454545454545456, + "English,Spanish,Filipino": 0.056818181818181816, + "English,Spanish,Chinese": 0.19886363636363635, + "English,Filipino,Chinese": 0.08522727272727272, + "Spanish,Filipino,Chinese": 0.07386363636363637 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,English,Spanish": 0.13636363636363635, + "Vietnamese,Indonesian,English,Filipino": 0.06818181818181818, + "Vietnamese,Indonesian,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Filipino": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Chinese": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino,Chinese": 0.0625, + "Vietnamese,Malay,English,Spanish": 0.0625, + "Vietnamese,Malay,English,Filipino": 0.028409090909090908, + "Vietnamese,Malay,English,Chinese": 0.03409090909090909, + "Vietnamese,Malay,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Malay,Spanish,Chinese": 0.05113636363636364, + "Vietnamese,Malay,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,English,Spanish,Chinese": 0.13636363636363635, + "Vietnamese,English,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Spanish,Filipino,Chinese": 0.045454545454545456, + "Indonesian,Malay,English,Spanish": 0.0625, + "Indonesian,Malay,English,Filipino": 0.03409090909090909, + "Indonesian,Malay,English,Chinese": 0.028409090909090908, + "Indonesian,Malay,Spanish,Filipino": 0.07386363636363637, + "Indonesian,Malay,Spanish,Chinese": 0.045454545454545456, + "Indonesian,Malay,Filipino,Chinese": 0.022727272727272728, + "Indonesian,English,Spanish,Filipino": 0.03977272727272727, + "Indonesian,English,Spanish,Chinese": 0.10227272727272728, + "Indonesian,English,Filipino,Chinese": 0.056818181818181816, + "Indonesian,Spanish,Filipino,Chinese": 0.045454545454545456, + "Malay,English,Spanish,Filipino": 0.011363636363636364, + "Malay,English,Spanish,Chinese": 0.03409090909090909, + "Malay,English,Filipino,Chinese": 0.011363636363636364, + "Malay,Spanish,Filipino,Chinese": 0.022727272727272728, + "English,Spanish,Filipino,Chinese": 0.028409090909090908 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.022727272727272728, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Spanish,Filipino": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.022727272727272728, + "Indonesian,Malay,English,Spanish,Filipino": 0.005681818181818182, + "Indonesian,Malay,English,Spanish,Chinese": 0.022727272727272728, + "Indonesian,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.011363636363636364, + "Indonesian,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.28997787107633105, + "AC3_3": 0.17473007433788187, + "AC3_4": 0.09275829154719495, + "AC3_5": 0.043966185462027046, + "AC3_6": 0.015728041422939577, + "AC3_7": 0.0 + }, + "prompt_4": { + "overall_acc": 0.26055194805194803, + "language_acc": { + "Vietnamese": 0.26704545454545453, + "Indonesian": 0.2784090909090909, + "Malay": 0.2840909090909091, + "English": 0.2215909090909091, + "Spanish": 0.2556818181818182, + "Filipino": 0.2556818181818182, + "Chinese": 0.26136363636363635 + }, + "consistency_score_2": 0.34983766233766234, + "consistency_score_3": 0.14545454545454548, + "consistency_score_4": 0.0732142857142857, + "consistency_score_5": 0.04626623376623377, + "consistency_score_6": 0.0349025974025974, + "consistency_score_7": 0.028409090909090908, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3522727272727273, + "Vietnamese,Malay": 0.4659090909090909, + "Vietnamese,English": 0.45454545454545453, + "Vietnamese,Spanish": 0.48295454545454547, + "Vietnamese,Filipino": 0.24431818181818182, + "Vietnamese,Chinese": 0.24431818181818182, + "Indonesian,Malay": 0.4090909090909091, + "Indonesian,English": 0.38636363636363635, + "Indonesian,Spanish": 0.29545454545454547, + "Indonesian,Filipino": 0.4147727272727273, + "Indonesian,Chinese": 0.4034090909090909, + "Malay,English": 0.38636363636363635, + "Malay,Spanish": 0.35795454545454547, + "Malay,Filipino": 0.3125, + "Malay,Chinese": 0.2556818181818182, + "English,Spanish": 0.4090909090909091, + "English,Filipino": 0.2727272727272727, + "English,Chinese": 0.3352272727272727, + "Spanish,Filipino": 0.2159090909090909, + "Spanish,Chinese": 0.25, + "Filipino,Chinese": 0.3977272727272727 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Indonesian,English": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "Vietnamese,Indonesian,Chinese": 0.11363636363636363, + "Vietnamese,Malay,English": 0.2215909090909091, + "Vietnamese,Malay,Spanish": 0.25, + "Vietnamese,Malay,Filipino": 0.125, + "Vietnamese,Malay,Chinese": 0.10227272727272728, + "Vietnamese,English,Spanish": 0.2727272727272727, + "Vietnamese,English,Filipino": 0.11363636363636363, + "Vietnamese,English,Chinese": 0.13636363636363635, + "Vietnamese,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Spanish,Chinese": 0.10795454545454546, + "Vietnamese,Filipino,Chinese": 0.10795454545454546, + "Indonesian,Malay,English": 0.17613636363636365, + "Indonesian,Malay,Spanish": 0.16477272727272727, + "Indonesian,Malay,Filipino": 0.17045454545454544, + "Indonesian,Malay,Chinese": 0.1534090909090909, + "Indonesian,English,Spanish": 0.1590909090909091, + "Indonesian,English,Filipino": 0.14772727272727273, + "Indonesian,English,Chinese": 0.1590909090909091, + "Indonesian,Spanish,Filipino": 0.10795454545454546, + "Indonesian,Spanish,Chinese": 0.10795454545454546, + "Indonesian,Filipino,Chinese": 0.19318181818181818, + "Malay,English,Spanish": 0.1875, + "Malay,English,Filipino": 0.13068181818181818, + "Malay,English,Chinese": 0.14772727272727273, + "Malay,Spanish,Filipino": 0.09090909090909091, + "Malay,Spanish,Chinese": 0.09659090909090909, + "Malay,Filipino,Chinese": 0.13068181818181818, + "English,Spanish,Filipino": 0.07386363636363637, + "English,Spanish,Chinese": 0.09659090909090909, + "English,Filipino,Chinese": 0.13068181818181818, + "Spanish,Filipino,Chinese": 0.10795454545454546 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.125, + "Vietnamese,Indonesian,Malay,Filipino": 0.07954545454545454, + "Vietnamese,Indonesian,Malay,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,English,Spanish": 0.13068181818181818, + "Vietnamese,Indonesian,English,Filipino": 0.0625, + "Vietnamese,Indonesian,English,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish,Filipino": 0.0625, + "Vietnamese,Indonesian,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Malay,English,Spanish": 0.1534090909090909, + "Vietnamese,Malay,English,Filipino": 0.08522727272727272, + "Vietnamese,Malay,English,Chinese": 0.06818181818181818, + "Vietnamese,Malay,Spanish,Filipino": 0.0625, + "Vietnamese,Malay,Spanish,Chinese": 0.0625, + "Vietnamese,Malay,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish,Filipino": 0.0625, + "Vietnamese,English,Spanish,Chinese": 0.07954545454545454, + "Vietnamese,English,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Spanish,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,English,Spanish": 0.10227272727272728, + "Indonesian,Malay,English,Filipino": 0.06818181818181818, + "Indonesian,Malay,English,Chinese": 0.07386363636363637, + "Indonesian,Malay,Spanish,Filipino": 0.06818181818181818, + "Indonesian,Malay,Spanish,Chinese": 0.06818181818181818, + "Indonesian,Malay,Filipino,Chinese": 0.08522727272727272, + "Indonesian,English,Spanish,Filipino": 0.05113636363636364, + "Indonesian,English,Spanish,Chinese": 0.05113636363636364, + "Indonesian,English,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Spanish,Filipino,Chinese": 0.0625, + "Malay,English,Spanish,Filipino": 0.045454545454545456, + "Malay,English,Spanish,Chinese": 0.056818181818181816, + "Malay,English,Filipino,Chinese": 0.07386363636363637, + "Malay,Spanish,Filipino,Chinese": 0.05113636363636364, + "English,Spanish,Filipino,Chinese": 0.03977272727272727 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Malay,English,Spanish,Chinese": 0.05113636363636364, + "Vietnamese,Malay,English,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.03409090909090909, + "Indonesian,Malay,English,Spanish,Filipino": 0.03977272727272727, + "Indonesian,Malay,English,Spanish,Chinese": 0.03977272727272727, + "Indonesian,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.045454545454545456, + "Indonesian,English,Spanish,Filipino,Chinese": 0.028409090909090908, + "Malay,English,Spanish,Filipino,Chinese": 0.03409090909090909 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.03409090909090909, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908 + } + }, + "AC3_2": 0.29866459997870176, + "AC3_3": 0.18668896073334287, + "AC3_4": 0.11430829624146072, + "AC3_5": 0.07857915891069081, + "AC3_6": 0.061558976716712815, + "AC3_7": 0.05123212459922631 + }, + "prompt_5": { + "overall_acc": 0.2556818181818182, + "language_acc": { + "Vietnamese": 0.2556818181818182, + "Indonesian": 0.25, + "Malay": 0.26136363636363635, + "English": 0.23863636363636365, + "Spanish": 0.2784090909090909, + "Filipino": 0.23295454545454544, + "Chinese": 0.2727272727272727 + }, + "consistency_score_2": 0.3484848484848484, + "consistency_score_3": 0.1431818181818182, + "consistency_score_4": 0.06542207792207791, + "consistency_score_5": 0.03246753246753246, + "consistency_score_6": 0.015422077922077924, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.2840909090909091, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,English": 0.4659090909090909, + "Vietnamese,Spanish": 0.5227272727272727, + "Vietnamese,Filipino": 0.14772727272727273, + "Vietnamese,Chinese": 0.19318181818181818, + "Indonesian,Malay": 0.3352272727272727, + "Indonesian,English": 0.42045454545454547, + "Indonesian,Spanish": 0.2727272727272727, + "Indonesian,Filipino": 0.4318181818181818, + "Indonesian,Chinese": 0.3977272727272727, + "Malay,English": 0.3693181818181818, + "Malay,Spanish": 0.45454545454545453, + "Malay,Filipino": 0.29545454545454547, + "Malay,Chinese": 0.24431818181818182, + "English,Spanish": 0.4659090909090909, + "English,Filipino": 0.30113636363636365, + "English,Chinese": 0.3693181818181818, + "Spanish,Filipino": 0.2215909090909091, + "Spanish,Chinese": 0.25, + "Filipino,Chinese": 0.3977272727272727 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,English": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.14204545454545456, + "Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "Vietnamese,Indonesian,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English": 0.25, + "Vietnamese,Malay,Spanish": 0.30113636363636365, + "Vietnamese,Malay,Filipino": 0.07386363636363637, + "Vietnamese,Malay,Chinese": 0.06818181818181818, + "Vietnamese,English,Spanish": 0.2897727272727273, + "Vietnamese,English,Filipino": 0.08522727272727272, + "Vietnamese,English,Chinese": 0.13068181818181818, + "Vietnamese,Spanish,Filipino": 0.07386363636363637, + "Vietnamese,Spanish,Chinese": 0.09659090909090909, + "Vietnamese,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,English": 0.14204545454545456, + "Indonesian,Malay,Spanish": 0.16477272727272727, + "Indonesian,Malay,Filipino": 0.13068181818181818, + "Indonesian,Malay,Chinese": 0.10227272727272728, + "Indonesian,English,Spanish": 0.16477272727272727, + "Indonesian,English,Filipino": 0.20454545454545456, + "Indonesian,English,Chinese": 0.19886363636363635, + "Indonesian,Spanish,Filipino": 0.10227272727272728, + "Indonesian,Spanish,Chinese": 0.09090909090909091, + "Indonesian,Filipino,Chinese": 0.23863636363636365, + "Malay,English,Spanish": 0.24431818181818182, + "Malay,English,Filipino": 0.11363636363636363, + "Malay,English,Chinese": 0.11931818181818182, + "Malay,Spanish,Filipino": 0.11931818181818182, + "Malay,Spanish,Chinese": 0.10795454545454546, + "Malay,Filipino,Chinese": 0.13068181818181818, + "English,Spanish,Filipino": 0.11363636363636363, + "English,Spanish,Chinese": 0.14772727272727273, + "English,Filipino,Chinese": 0.1875, + "Spanish,Filipino,Chinese": 0.10795454545454546 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Filipino": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,English,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,English,Filipino": 0.0625, + "Vietnamese,Indonesian,English,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Malay,English,Spanish": 0.17613636363636365, + "Vietnamese,Malay,English,Filipino": 0.045454545454545456, + "Vietnamese,Malay,English,Chinese": 0.045454545454545456, + "Vietnamese,Malay,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Malay,Spanish,Chinese": 0.045454545454545456, + "Vietnamese,Malay,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,English,Spanish,Chinese": 0.07386363636363637, + "Vietnamese,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Spanish,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,English,Spanish": 0.10795454545454546, + "Indonesian,Malay,English,Filipino": 0.056818181818181816, + "Indonesian,Malay,English,Chinese": 0.056818181818181816, + "Indonesian,Malay,Spanish,Filipino": 0.0625, + "Indonesian,Malay,Spanish,Chinese": 0.05113636363636364, + "Indonesian,Malay,Filipino,Chinese": 0.06818181818181818, + "Indonesian,English,Spanish,Filipino": 0.06818181818181818, + "Indonesian,English,Spanish,Chinese": 0.0625, + "Indonesian,English,Filipino,Chinese": 0.13636363636363635, + "Indonesian,Spanish,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish,Filipino": 0.06818181818181818, + "Malay,English,Spanish,Chinese": 0.07386363636363637, + "Malay,English,Filipino,Chinese": 0.07386363636363637, + "Malay,Spanish,Filipino,Chinese": 0.056818181818181816, + "English,Spanish,Filipino,Chinese": 0.07386363636363637 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.07386363636363637, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Malay,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Malay,English,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Malay,English,Spanish,Filipino": 0.03977272727272727, + "Indonesian,Malay,English,Spanish,Chinese": 0.03409090909090909, + "Indonesian,Malay,English,Filipino,Chinese": 0.03977272727272727, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.03977272727272727, + "Indonesian,English,Spanish,Filipino,Chinese": 0.045454545454545456, + "Malay,English,Spanish,Filipino,Chinese": 0.03977272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.022727272727272728 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + } + }, + "AC3_2": 0.29495582782236807, + "AC3_3": 0.1835664335204112, + "AC3_4": 0.1041858166786486, + "AC3_5": 0.05761843788013199, + "AC3_6": 0.029089548165644484, + "AC3_7": 0.011116600786260526 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32038834951456313 + }, + "prompt_2": { + "accuracy": 0.2912621359223301 + }, + "prompt_3": { + "accuracy": 0.30097087378640774 + }, + "prompt_4": { + "accuracy": 0.27184466019417475 + }, + "prompt_5": { + "accuracy": 0.2815533980582524 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26666666666666666 + }, + "prompt_2": { + "accuracy": 0.2 + }, + "prompt_3": { + "accuracy": 0.3047619047619048 + }, + "prompt_4": { + "accuracy": 0.22857142857142856 + }, + "prompt_5": { + "accuracy": 0.2857142857142857 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29906542056074764 + }, + "prompt_2": { + "accuracy": 0.2523364485981308 + }, + "prompt_3": { + "accuracy": 0.2523364485981308 + }, + "prompt_4": { + "accuracy": 0.22429906542056074 + }, + "prompt_5": { + "accuracy": 0.2616822429906542 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18, + "category_acc": { + "brand": 0.1, + "demographics": 0.2, + "biology": 0.1, + "history": 0.3333333333333333, + "literature": 0.0, + "politics": 0.1, + "culture": 0.2, + "film": 0.3, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.27, + "category_acc": { + "brand": 0.3, + "demographics": 0.6, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.2, + "culture": 0.2, + "film": 0.3, + "law": 0.4, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.28, + "category_acc": { + "brand": 0.5, + "demographics": 0.6, + "biology": 0.0, + "history": 0.13333333333333333, + "literature": 0.4, + "politics": 0.1, + "culture": 0.2, + "film": 0.4, + "law": 0.5, + "geography": 0.2 + } + }, + "prompt_4": { + "accuracy": 0.24, + "category_acc": { + "brand": 0.1, + "demographics": 0.6, + "biology": 0.4, + "history": 0.2, + "literature": 0.1, + "politics": 0.1, + "culture": 0.2, + "film": 0.3, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.11, + "category_acc": { + "brand": 0.0, + "demographics": 0.0, + "biology": 0.2, + "history": 0.13333333333333333, + "literature": 0.1, + "politics": 0.0, + "culture": 0.1, + "film": 0.1, + "law": 0.2, + "geography": 0.2 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07248912975789758 + }, + "prompt_2": { + "bleu_score": 0.06497362991930647 + }, + "prompt_3": { + "bleu_score": 0.06198404186724674 + }, + "prompt_4": { + "bleu_score": 0.05405168319767091 + }, + "prompt_5": { + "bleu_score": 0.04584217954886731 + } }, "indommlu": { "prompt_1": -1, @@ -11890,244 +104120,1689 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07692003672500233 + }, + "prompt_2": { + "bleu_score": 0.06718051530104757 + }, + "prompt_3": { + "bleu_score": 0.0671747898873427 + }, + "prompt_4": { + "bleu_score": 0.07826796506401824 + }, + "prompt_5": { + "bleu_score": 0.039087715748873174 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06453957664021964 + }, + "prompt_2": { + "bleu_score": 0.053163767071868546 + }, + "prompt_3": { + "bleu_score": 0.05176664146658031 + }, + "prompt_4": { + "bleu_score": 0.06817467987602832 + }, + "prompt_5": { + "bleu_score": 0.042225809505108826 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.039762617080741594 + }, + "prompt_2": { + "bleu_score": 0.05593395687214599 + }, + "prompt_3": { + "bleu_score": 0.054601907354379434 + }, + "prompt_4": { + "bleu_score": 0.05472829487488319 + }, + "prompt_5": { + "bleu_score": 0.052523096185344835 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07753538344405285 + }, + "prompt_2": { + "bleu_score": 0.06734087090013723 + }, + "prompt_3": { + "bleu_score": 0.0660582091155768 + }, + "prompt_4": { + "bleu_score": 0.07723223967687388 + }, + "prompt_5": { + "bleu_score": 0.042509582336678946 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2707117852975496 + }, + "prompt_2": { + "accuracy": 0.24970828471411902 + }, + "prompt_3": { + "accuracy": 0.26254375729288215 + }, + "prompt_4": { + "accuracy": 0.2532088681446908 + }, + "prompt_5": { + "accuracy": 0.27887981330221706 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27179120486235253, + "category_acc": { + "high_school_european_history": 0.25, + "business_ethics": 0.2727272727272727, + "clinical_knowledge": 0.2537878787878788, + "medical_genetics": 0.26262626262626265, + "high_school_us_history": 0.21674876847290642, + "high_school_physics": 0.37333333333333335, + "high_school_world_history": 0.2330508474576271, + "virology": 0.24242424242424243, + "high_school_microeconomics": 0.28270042194092826, + "econometrics": 0.22123893805309736, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.2783171521035599, + "abstract_algebra": 0.18181818181818182, + "professional_accounting": 0.25622775800711745, + "philosophy": 0.2967741935483871, + "professional_medicine": 0.3726937269372694, + "nutrition": 0.2721311475409836, + "global_facts": 0.25252525252525254, + "machine_learning": 0.1891891891891892, + "security_studies": 0.35655737704918034, + "public_relations": 0.22018348623853212, + "professional_psychology": 0.2553191489361702, + "prehistory": 0.2755417956656347, + "anatomy": 0.3880597014925373, + "human_sexuality": 0.34615384615384615, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.296875, + "college_chemistry": 0.32323232323232326, + "logical_fallacies": 0.30246913580246915, + "high_school_geography": 0.27918781725888325, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.17567567567567569, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.24448529411764705, + "formal_logic": 0.28, + "high_school_statistics": 0.3488372093023256, + "international_law": 0.36666666666666664, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.26262626262626265, + "conceptual_physics": 0.21794871794871795, + "miscellaneous": 0.2608695652173913, + "high_school_chemistry": 0.2722772277227723, + "marketing": 0.2575107296137339, + "professional_law": 0.27071102413568165, + "management": 0.2647058823529412, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.205607476635514, + "world_religions": 0.2647058823529412, + "sociology": 0.25, + "us_foreign_policy": 0.3838383838383838, + "high_school_macroeconomics": 0.2750642673521851, + "computer_security": 0.20202020202020202, + "moral_scenarios": 0.2740492170022371, + "moral_disputes": 0.25507246376811593, + "electrical_engineering": 0.3125, + "astronomy": 0.3708609271523179, + "college_biology": 0.25874125874125875 + } + }, + "prompt_2": { + "accuracy": 0.26235252055774044, + "category_acc": { + "high_school_european_history": 0.25, + "business_ethics": 0.2828282828282828, + "clinical_knowledge": 0.24242424242424243, + "medical_genetics": 0.35353535353535354, + "high_school_us_history": 0.22167487684729065, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.2457627118644068, + "virology": 0.30303030303030304, + "high_school_microeconomics": 0.2320675105485232, + "econometrics": 0.21238938053097345, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.2686084142394822, + "abstract_algebra": 0.20202020202020202, + "professional_accounting": 0.24199288256227758, + "philosophy": 0.27741935483870966, + "professional_medicine": 0.23616236162361623, + "nutrition": 0.26229508196721313, + "global_facts": 0.21212121212121213, + "machine_learning": 0.26126126126126126, + "security_studies": 0.36475409836065575, + "public_relations": 0.1743119266055046, + "professional_psychology": 0.26677577741407527, + "prehistory": 0.29411764705882354, + "anatomy": 0.27611940298507465, + "human_sexuality": 0.2692307692307692, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.2864583333333333, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.2777777777777778, + "high_school_geography": 0.24873096446700507, + "elementary_mathematics": 0.23076923076923078, + "human_aging": 0.23423423423423423, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.20955882352941177, + "formal_logic": 0.272, + "high_school_statistics": 0.32558139534883723, + "international_law": 0.3416666666666667, + "high_school_mathematics": 0.26394052044609667, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.24358974358974358, + "miscellaneous": 0.25191815856777494, + "high_school_chemistry": 0.24752475247524752, + "marketing": 0.296137339055794, + "professional_law": 0.27071102413568165, + "management": 0.20588235294117646, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.25882352941176473, + "sociology": 0.255, + "us_foreign_policy": 0.31313131313131315, + "high_school_macroeconomics": 0.2467866323907455, + "computer_security": 0.2828282828282828, + "moral_scenarios": 0.25838926174496646, + "moral_disputes": 0.2579710144927536, + "electrical_engineering": 0.2152777777777778, + "astronomy": 0.2847682119205298, + "college_biology": 0.24475524475524477 + } + }, + "prompt_3": { + "accuracy": 0.2635681086878799, + "category_acc": { + "high_school_european_history": 0.2865853658536585, + "business_ethics": 0.25252525252525254, + "clinical_knowledge": 0.25757575757575757, + "medical_genetics": 0.31313131313131315, + "high_school_us_history": 0.22660098522167488, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.2754237288135593, + "virology": 0.21818181818181817, + "high_school_microeconomics": 0.2616033755274262, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.2750809061488673, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.2206405693950178, + "philosophy": 0.3064516129032258, + "professional_medicine": 0.24723247232472326, + "nutrition": 0.2819672131147541, + "global_facts": 0.29292929292929293, + "machine_learning": 0.22522522522522523, + "security_studies": 0.36885245901639346, + "public_relations": 0.1926605504587156, + "professional_psychology": 0.23731587561374795, + "prehistory": 0.30030959752321984, + "anatomy": 0.29850746268656714, + "human_sexuality": 0.3, + "college_medicine": 0.2558139534883721, + "high_school_government_and_politics": 0.2864583333333333, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.30246913580246915, + "high_school_geography": 0.28426395939086296, + "elementary_mathematics": 0.20689655172413793, + "human_aging": 0.2072072072072072, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.21323529411764705, + "formal_logic": 0.296, + "high_school_statistics": 0.3581395348837209, + "international_law": 0.275, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.26495726495726496, + "miscellaneous": 0.25191815856777494, + "high_school_chemistry": 0.2623762376237624, + "marketing": 0.296137339055794, + "professional_law": 0.26157860404435745, + "management": 0.27450980392156865, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.29906542056074764, + "world_religions": 0.3176470588235294, + "sociology": 0.235, + "us_foreign_policy": 0.32323232323232326, + "high_school_macroeconomics": 0.2467866323907455, + "computer_security": 0.20202020202020202, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.2608695652173913, + "electrical_engineering": 0.2569444444444444, + "astronomy": 0.3509933774834437, + "college_biology": 0.3006993006993007 + } + }, + "prompt_4": { + "accuracy": 0.2664998212370397, + "category_acc": { + "high_school_european_history": 0.2621951219512195, + "business_ethics": 0.2828282828282828, + "clinical_knowledge": 0.23484848484848486, + "medical_genetics": 0.30303030303030304, + "high_school_us_history": 0.22167487684729065, + "high_school_physics": 0.28, + "high_school_world_history": 0.25, + "virology": 0.24848484848484848, + "high_school_microeconomics": 0.25738396624472576, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.36363636363636365, + "high_school_biology": 0.24271844660194175, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.25622775800711745, + "philosophy": 0.2903225806451613, + "professional_medicine": 0.25461254612546125, + "nutrition": 0.3081967213114754, + "global_facts": 0.29292929292929293, + "machine_learning": 0.2702702702702703, + "security_studies": 0.29098360655737704, + "public_relations": 0.23853211009174313, + "professional_psychology": 0.24058919803600654, + "prehistory": 0.29102167182662536, + "anatomy": 0.34328358208955223, + "human_sexuality": 0.34615384615384615, + "college_medicine": 0.26744186046511625, + "high_school_government_and_politics": 0.3020833333333333, + "college_chemistry": 0.30303030303030304, + "logical_fallacies": 0.2839506172839506, + "high_school_geography": 0.26903553299492383, + "elementary_mathematics": 0.22811671087533156, + "human_aging": 0.21621621621621623, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.2665441176470588, + "formal_logic": 0.288, + "high_school_statistics": 0.26976744186046514, + "international_law": 0.3, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.2828282828282828, + "conceptual_physics": 0.24786324786324787, + "miscellaneous": 0.2672634271099744, + "high_school_chemistry": 0.27722772277227725, + "marketing": 0.3090128755364807, + "professional_law": 0.2628832354859752, + "management": 0.2549019607843137, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.24299065420560748, + "world_religions": 0.24705882352941178, + "sociology": 0.25, + "us_foreign_policy": 0.35353535353535354, + "high_school_macroeconomics": 0.2827763496143959, + "computer_security": 0.18181818181818182, + "moral_scenarios": 0.2494407158836689, + "moral_disputes": 0.24057971014492754, + "electrical_engineering": 0.2986111111111111, + "astronomy": 0.36423841059602646, + "college_biology": 0.3006993006993007 + } + }, + "prompt_5": { + "accuracy": 0.2679299249195567, + "category_acc": { + "high_school_european_history": 0.2621951219512195, + "business_ethics": 0.2727272727272727, + "clinical_knowledge": 0.24621212121212122, + "medical_genetics": 0.2828282828282828, + "high_school_us_history": 0.20689655172413793, + "high_school_physics": 0.23333333333333334, + "high_school_world_history": 0.2754237288135593, + "virology": 0.21818181818181817, + "high_school_microeconomics": 0.28270042194092826, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.2621359223300971, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.2313167259786477, + "philosophy": 0.3, + "professional_medicine": 0.30996309963099633, + "nutrition": 0.3081967213114754, + "global_facts": 0.2222222222222222, + "machine_learning": 0.1981981981981982, + "security_studies": 0.2786885245901639, + "public_relations": 0.29357798165137616, + "professional_psychology": 0.26677577741407527, + "prehistory": 0.28173374613003094, + "anatomy": 0.373134328358209, + "human_sexuality": 0.3230769230769231, + "college_medicine": 0.26744186046511625, + "high_school_government_and_politics": 0.3020833333333333, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.32098765432098764, + "high_school_geography": 0.28426395939086296, + "elementary_mathematics": 0.20954907161803712, + "human_aging": 0.18468468468468469, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.2610294117647059, + "formal_logic": 0.24, + "high_school_statistics": 0.2558139534883721, + "international_law": 0.2833333333333333, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.3434343434343434, + "conceptual_physics": 0.25213675213675213, + "miscellaneous": 0.2544757033248082, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.30042918454935624, + "professional_law": 0.25962165688193084, + "management": 0.27450980392156865, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.24299065420560748, + "world_religions": 0.25882352941176473, + "sociology": 0.26, + "us_foreign_policy": 0.3333333333333333, + "high_school_macroeconomics": 0.2930591259640103, + "computer_security": 0.21212121212121213, + "moral_scenarios": 0.27181208053691275, + "moral_disputes": 0.2318840579710145, + "electrical_engineering": 0.2986111111111111, + "astronomy": 0.32450331125827814, + "college_biology": 0.2727272727272727 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26448736998514116 + }, + "prompt_2": { + "accuracy": 0.26300148588410105 + }, + "prompt_3": { + "accuracy": 0.2578008915304606 + }, + "prompt_4": { + "accuracy": 0.2578008915304606 + }, + "prompt_5": { + "accuracy": 0.24665676077265974 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2627646326276463, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.21428571428571427, + "college_physics": 0.375, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.08333333333333333, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.08333333333333333, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.25, + "business_administration": 0.23684210526315788, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.375, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.2962962962962963, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.12, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.2692307692307692, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.5416666666666666, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.16666666666666666, + "physician": 0.2962962962962963 + } + }, + "prompt_2": { + "accuracy": 0.2640099626400996, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.23076923076923078, + "college_programming": 0.11904761904761904, + "college_physics": 0.375, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.391304347826087, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.2413793103448276, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.125, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.10714285714285714, + "college_economics": 0.16666666666666666, + "business_administration": 0.2631578947368421, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.13793103448275862, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.2962962962962963, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.2, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.23076923076923078, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.6666666666666666, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.18518518518518517, + "physician": 0.2222222222222222 + } + }, + "prompt_3": { + "accuracy": 0.25093399750934, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.07692307692307693, + "college_programming": 0.09523809523809523, + "college_physics": 0.375, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.08333333333333333, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.25, + "college_economics": 0.18333333333333332, + "business_administration": 0.2894736842105263, + "marxism": 0.25, + "mao_zedong_thought": 0.20689655172413793, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.5, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.23076923076923078, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.25925925925925924, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.21428571428571427, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.375, + "high_school_history": 0.28, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.3269230769230769, + "sports_science": 0.2916666666666667, + "plant_protection": 0.18518518518518517, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2962962962962963, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.24074074074074073, + "physician": 0.3148148148148148 + } + }, + "prompt_4": { + "accuracy": 0.2590286425902864, + "category_acc": { + "computer_network": 0.041666666666666664, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.21428571428571427, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.125, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.2, + "business_administration": 0.18421052631578946, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.06896551724137931, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.08333333333333333, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.3333333333333333, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.25, + "high_school_history": 0.04, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.19230769230769232, + "sports_science": 0.25, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.5, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.24074074074074073, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2962962962962963, + "physician": 0.3148148148148148 + } + }, + "prompt_5": { + "accuracy": 0.263387297633873, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.23809523809523808, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.43478260869565216, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.14285714285714285, + "college_economics": 0.23333333333333334, + "business_administration": 0.18421052631578946, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.25, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.3333333333333333, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.08, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.2692307692307692, + "sports_science": 0.25, + "plant_protection": 0.14814814814814814, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.25925925925925924, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2777777777777778, + "physician": 0.2777777777777778 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23297491039426524 + }, + "prompt_2": { + "accuracy": 0.21863799283154123 + }, + "prompt_3": { + "accuracy": 0.2616487455197133 + }, + "prompt_4": { + "accuracy": 0.2114695340501792 + }, + "prompt_5": { + "accuracy": 0.21505376344086022 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25194266965981693, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.23780487804878048, + "arts": 0.24375, + "astronomy": 0.23636363636363636, + "business_ethics": 0.22966507177033493, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.26717557251908397, + "chinese_food_culture": 0.22794117647058823, + "chinese_foreign_policy": 0.21495327102803738, + "chinese_history": 0.25077399380804954, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.27932960893854747, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.21495327102803738, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.28703703703703703, + "college_mathematics": 0.1523809523809524, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.25274725274725274, + "computer_science": 0.2647058823529412, + "computer_security": 0.23976608187134502, + "conceptual_physics": 0.24489795918367346, + "construction_project_management": 0.22302158273381295, + "economics": 0.3018867924528302, + "education": 0.26380368098159507, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.2222222222222222, + "elementary_commonsense": 0.30303030303030304, + "elementary_information_and_technology": 0.25210084033613445, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.23703703703703705, + "food_science": 0.3146853146853147, + "genetics": 0.2159090909090909, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.19491525423728814, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.17482517482517482, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2702702702702703, + "journalism": 0.26744186046511625, + "jurisprudence": 0.26520681265206814, + "legal_and_moral_basis": 0.27102803738317754, + "logical": 0.22764227642276422, + "machine_learning": 0.29508196721311475, + "management": 0.28095238095238095, + "marketing": 0.3, + "marxist_theory": 0.2804232804232804, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.30344827586206896, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.21714285714285714, + "professional_law": 0.22274881516587677, + "professional_medicine": 0.2553191489361702, + "professional_psychology": 0.1939655172413793, + "public_relations": 0.2413793103448276, + "security_study": 0.28888888888888886, + "sociology": 0.2743362831858407, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.28402366863905326, + "world_history": 0.22981366459627328, + "world_religions": 0.25625 + } + }, + "prompt_2": { + "accuracy": 0.2562597133482991, + "category_acc": { + "agronomy": 0.28994082840236685, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.21341463414634146, + "arts": 0.2875, + "astronomy": 0.22424242424242424, + "business_ethics": 0.28708133971291866, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.26717557251908397, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.2693498452012384, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.27932960893854747, + "clinical_knowledge": 0.24050632911392406, + "college_actuarial_science": 0.18867924528301888, + "college_education": 0.21495327102803738, + "college_engineering_hydrology": 0.14150943396226415, + "college_law": 0.28703703703703703, + "college_mathematics": 0.2, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.2783882783882784, + "computer_science": 0.24019607843137256, + "computer_security": 0.2982456140350877, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.2589928057553957, + "economics": 0.3081761006289308, + "education": 0.2392638036809816, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.3037037037037037, + "food_science": 0.25874125874125875, + "genetics": 0.19318181818181818, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.2781065088757396, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.19491525423728814, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.22377622377622378, + "human_sexuality": 0.2857142857142857, + "international_law": 0.24864864864864866, + "journalism": 0.26744186046511625, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.2616822429906542, + "logical": 0.2032520325203252, + "machine_learning": 0.26229508196721313, + "management": 0.2714285714285714, + "marketing": 0.2611111111111111, + "marxist_theory": 0.2804232804232804, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.2620689655172414, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.23404255319148937, + "professional_psychology": 0.25, + "public_relations": 0.25862068965517243, + "security_study": 0.2814814814814815, + "sociology": 0.27876106194690264, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.27218934911242604, + "world_history": 0.2484472049689441, + "world_religions": 0.24375 + } + }, + "prompt_3": { + "accuracy": 0.2522880331548955, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.21621621621621623, + "ancient_chinese": 0.23170731707317074, + "arts": 0.28125, + "astronomy": 0.2545454545454545, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.2748091603053435, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.3177570093457944, + "chinese_history": 0.2724458204334365, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.2737430167597765, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.19626168224299065, + "college_engineering_hydrology": 0.19811320754716982, + "college_law": 0.3055555555555556, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.26373626373626374, + "computer_science": 0.2549019607843137, + "computer_security": 0.25146198830409355, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.23741007194244604, + "economics": 0.27044025157232704, + "education": 0.2147239263803681, + "electrical_engineering": 0.2558139534883721, + "elementary_chinese": 0.23809523809523808, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.2857142857142857, + "elementary_mathematics": 0.21739130434782608, + "ethnology": 0.2222222222222222, + "food_science": 0.2937062937062937, + "genetics": 0.22727272727272727, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.19491525423728814, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.23015873015873015, + "international_law": 0.25405405405405407, + "journalism": 0.27906976744186046, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2032520325203252, + "machine_learning": 0.26229508196721313, + "management": 0.26666666666666666, + "marketing": 0.26666666666666666, + "marxist_theory": 0.20634920634920634, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.25517241379310346, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.24, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.2074468085106383, + "professional_psychology": 0.28879310344827586, + "public_relations": 0.2988505747126437, + "security_study": 0.3037037037037037, + "sociology": 0.2743362831858407, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.23783783783783785, + "virology": 0.24260355029585798, + "world_history": 0.2422360248447205, + "world_religions": 0.20625 + } + }, + "prompt_4": { + "accuracy": 0.2506475565532723, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.2073170731707317, + "arts": 0.225, + "astronomy": 0.296969696969697, + "business_ethics": 0.2535885167464115, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.2595419847328244, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.2260061919504644, + "chinese_literature": 0.2696078431372549, + "chinese_teacher_qualification": 0.2849162011173184, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.22429906542056074, + "college_engineering_hydrology": 0.2169811320754717, + "college_law": 0.3148148148148148, + "college_mathematics": 0.1619047619047619, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.2490842490842491, + "computer_science": 0.2549019607843137, + "computer_security": 0.29239766081871343, + "conceptual_physics": 0.23809523809523808, + "construction_project_management": 0.26618705035971224, + "economics": 0.27672955974842767, + "education": 0.22085889570552147, + "electrical_engineering": 0.22674418604651161, + "elementary_chinese": 0.19444444444444445, + "elementary_commonsense": 0.2828282828282828, + "elementary_information_and_technology": 0.2605042016806723, + "elementary_mathematics": 0.30869565217391304, + "ethnology": 0.25925925925925924, + "food_science": 0.2727272727272727, + "genetics": 0.23295454545454544, + "global_facts": 0.21476510067114093, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.20454545454545456, + "high_school_geography": 0.22033898305084745, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.21678321678321677, + "human_sexuality": 0.2698412698412698, + "international_law": 0.25405405405405407, + "journalism": 0.25, + "jurisprudence": 0.25304136253041365, + "legal_and_moral_basis": 0.24299065420560748, + "logical": 0.24390243902439024, + "machine_learning": 0.3114754098360656, + "management": 0.23333333333333334, + "marketing": 0.2611111111111111, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.20689655172413793, + "nutrition": 0.27586206896551724, + "philosophy": 0.23809523809523808, + "professional_accounting": 0.26285714285714284, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.23670212765957446, + "professional_psychology": 0.21551724137931033, + "public_relations": 0.22988505747126436, + "security_study": 0.23703703703703705, + "sociology": 0.26548672566371684, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.26627218934911245, + "world_history": 0.22981366459627328, + "world_religions": 0.26875 + } + }, + "prompt_5": { + "accuracy": 0.2526333966499741, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.22560975609756098, + "arts": 0.225, + "astronomy": 0.22424242424242424, + "business_ethics": 0.2631578947368421, + "chinese_civil_service_exam": 0.28125, + "chinese_driving_rule": 0.24427480916030533, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.23529411764705882, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.25738396624472576, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.17757009345794392, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.3055555555555556, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.2358490566037736, + "college_medicine": 0.23443223443223443, + "computer_science": 0.23529411764705882, + "computer_security": 0.2807017543859649, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.2517985611510791, + "economics": 0.27672955974842767, + "education": 0.2883435582822086, + "electrical_engineering": 0.23255813953488372, + "elementary_chinese": 0.17857142857142858, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.25630252100840334, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.26666666666666666, + "food_science": 0.24475524475524477, + "genetics": 0.2556818181818182, + "global_facts": 0.22818791946308725, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.22560975609756098, + "high_school_physics": 0.20909090909090908, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.29365079365079366, + "international_law": 0.2648648648648649, + "journalism": 0.2441860465116279, + "jurisprudence": 0.2725060827250608, + "legal_and_moral_basis": 0.2616822429906542, + "logical": 0.2764227642276423, + "machine_learning": 0.3524590163934426, + "management": 0.23333333333333334, + "marketing": 0.22777777777777777, + "marxist_theory": 0.2804232804232804, + "modern_chinese": 0.19827586206896552, + "nutrition": 0.2827586206896552, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.25862068965517243, + "security_study": 0.2740740740740741, + "sociology": 0.24778761061946902, + "sports_science": 0.23636363636363636, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.26627218934911245, + "world_history": 0.2670807453416149, + "world_religions": 0.26875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.12121212121212122 + }, + "prompt_4": { + "accuracy": 0.2727272727272727 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.15454545454545454 + }, + "prompt_2": { + "accuracy": 0.11136363636363636 + }, + "prompt_3": { + "accuracy": 0.12045454545454545 + }, + "prompt_4": { + "accuracy": 0.15454545454545454 + }, + "prompt_5": { + "accuracy": 0.15454545454545454 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3430508474576271 + }, + "prompt_2": { + "accuracy": 0.3325423728813559 + }, + "prompt_3": { + "accuracy": 0.3213559322033898 + }, + "prompt_4": { + "accuracy": 0.34 + }, + "prompt_5": { + "accuracy": 0.34440677966101696 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27786088257292446 + }, + "prompt_2": { + "accuracy": 0.29768137621540763 + }, + "prompt_3": { + "accuracy": 0.29468960359012714 + }, + "prompt_4": { + "accuracy": 0.27299925205684367 + }, + "prompt_5": { + "accuracy": 0.2819745699326851 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3596276335129838 + }, + "prompt_2": { + "accuracy": 0.3444390004899559 + }, + "prompt_3": { + "accuracy": 0.3512983831455169 + }, + "prompt_4": { + "accuracy": 0.35864772170504655 + }, + "prompt_5": { + "accuracy": 0.3468887800097991 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.20708500246015654, + "rouge2": 0.06559723811622607, + "rougeL": 0.16037315643415484, + "avg_rouge": 0.1443517990035125 + }, + "prompt_2": { + "rouge1": 0.18954507712597332, + "rouge2": 0.059050234501540864, + "rougeL": 0.14566119018411847, + "avg_rouge": 0.13141883393721088 + }, + "prompt_3": { + "rouge1": 0.153825837106142, + "rouge2": 0.045061029430159404, + "rougeL": 0.12129155012650203, + "avg_rouge": 0.10672613888760114 + }, + "prompt_4": { + "rouge1": 0.20094662523236545, + "rouge2": 0.06376961391715008, + "rougeL": 0.15574785708691388, + "avg_rouge": 0.14015469874547648 + }, + "prompt_5": { + "rouge1": 0.17822855705326188, + "rouge2": 0.05576964640513887, + "rougeL": 0.1400374512637407, + "avg_rouge": 0.12467855157404716 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.20007688039955962, + "rouge2": 0.05335804975779555, + "rougeL": 0.14785790977677035, + "avg_rouge": 0.13376427997804186 + }, + "prompt_2": { + "rouge1": 0.21120851600214696, + "rouge2": 0.057245357337426646, + "rougeL": 0.15535965185988396, + "avg_rouge": 0.14127117506648587 + }, + "prompt_3": { + "rouge1": 0.2112716856729785, + "rouge2": 0.05770946168395422, + "rougeL": 0.15538268808417005, + "avg_rouge": 0.14145461181370092 + }, + "prompt_4": { + "rouge1": 0.2086362444037295, + "rouge2": 0.057526481552556304, + "rougeL": 0.15419209708250917, + "avg_rouge": 0.140118274346265 + }, + "prompt_5": { + "rouge1": 0.17118387735342586, + "rouge2": 0.04314537927614836, + "rougeL": 0.13291313913576316, + "avg_rouge": 0.11574746525511247 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4541284403669725 + }, + "prompt_2": { + "accuracy": 0.49770642201834864 + }, + "prompt_3": { + "accuracy": 0.5286697247706422 + }, + "prompt_4": { + "accuracy": 0.5298165137614679 + }, + "prompt_5": { + "accuracy": 0.5745412844036697 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5023969319271333 + }, + "prompt_2": { + "accuracy": 0.5186960690316395 + }, + "prompt_3": { + "accuracy": 0.5292425695110259 + }, + "prompt_4": { + "accuracy": 0.6395014381591563 + }, + "prompt_5": { + "accuracy": 0.6903163950143816 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4955 + }, + "prompt_2": { + "accuracy": 0.524 + }, + "prompt_3": { + "accuracy": 0.506 + }, + "prompt_4": { + "accuracy": 0.492 + }, + "prompt_5": { + "accuracy": 0.465 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3665 + }, + "prompt_2": { + "accuracy": 0.3335 + }, + "prompt_3": { + "accuracy": 0.354 + }, + "prompt_4": { + "accuracy": 0.3425 + }, + "prompt_5": { + "accuracy": 0.354 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4935 + }, + "prompt_2": { + "accuracy": 0.5165 + }, + "prompt_3": { + "accuracy": 0.5295 + }, + "prompt_4": { + "accuracy": 0.509 + }, + "prompt_5": { + "accuracy": 0.494 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36619718309859156 + }, + "prompt_2": { + "accuracy": 0.39436619718309857 + }, + "prompt_3": { + "accuracy": 0.4225352112676056 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.4647887323943662 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.51985559566787 + }, + "prompt_2": { + "accuracy": 0.516245487364621 + }, + "prompt_3": { + "accuracy": 0.5126353790613718 + }, + "prompt_4": { + "accuracy": 0.5018050541516246 + }, + "prompt_5": { + "accuracy": 0.516245487364621 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5343137254901961 + }, + "prompt_2": { + "accuracy": 0.5514705882352942 + }, + "prompt_3": { + "accuracy": 0.5318627450980392 + }, + "prompt_4": { + "accuracy": 0.41911764705882354 + }, + "prompt_5": { + "accuracy": 0.5245098039215687 + } } }, "five_shot": { "cross_mmlu": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.259047619047619, + "language_acc": { + "English": 0.20666666666666667, + "Vietnamese": 0.26, + "Malay": 0.29333333333333333, + "Indonesian": 0.24666666666666667, + "Spanish": 0.32666666666666666, + "Chinese": 0.23333333333333334, + "Filipino": 0.24666666666666667 + }, + "consistency_score_2": 0.3809523809523809, + "consistency_score_3": 0.17752380952380956, + "consistency_score_4": 0.09276190476190477, + "consistency_score_5": 0.054603174603174605, + "consistency_score_6": 0.036190476190476197, + "consistency_score_7": 0.02666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.38, + "English,Malay": 0.36666666666666664, + "English,Indonesian": 0.44666666666666666, + "English,Spanish": 0.4, + "English,Chinese": 0.38, + "English,Filipino": 0.36, + "Vietnamese,Malay": 0.38, + "Vietnamese,Indonesian": 0.42, + "Vietnamese,Spanish": 0.32666666666666666, + "Vietnamese,Chinese": 0.4, + "Vietnamese,Filipino": 0.4266666666666667, + "Malay,Indonesian": 0.4066666666666667, + "Malay,Spanish": 0.44666666666666666, + "Malay,Chinese": 0.38, + "Malay,Filipino": 0.4, + "Indonesian,Spanish": 0.32666666666666666, + "Indonesian,Chinese": 0.37333333333333335, + "Indonesian,Filipino": 0.32666666666666666, + "Spanish,Chinese": 0.37333333333333335, + "Spanish,Filipino": 0.3333333333333333, + "Chinese,Filipino": 0.3466666666666667 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.15333333333333332, + "English,Vietnamese,Indonesian": 0.2, + "English,Vietnamese,Spanish": 0.14666666666666667, + "English,Vietnamese,Chinese": 0.18666666666666668, + "English,Vietnamese,Filipino": 0.2, + "English,Malay,Indonesian": 0.2, + "English,Malay,Spanish": 0.20666666666666667, + "English,Malay,Chinese": 0.18, + "English,Malay,Filipino": 0.18, + "English,Indonesian,Spanish": 0.18666666666666668, + "English,Indonesian,Chinese": 0.18666666666666668, + "English,Indonesian,Filipino": 0.17333333333333334, + "English,Spanish,Chinese": 0.17333333333333334, + "English,Spanish,Filipino": 0.16666666666666666, + "English,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Malay,Indonesian": 0.21333333333333335, + "Vietnamese,Malay,Spanish": 0.18, + "Vietnamese,Malay,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Filipino": 0.22, + "Vietnamese,Indonesian,Spanish": 0.16, + "Vietnamese,Indonesian,Chinese": 0.22, + "Vietnamese,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Spanish,Chinese": 0.16, + "Vietnamese,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Chinese,Filipino": 0.16666666666666666, + "Malay,Indonesian,Spanish": 0.18, + "Malay,Indonesian,Chinese": 0.19333333333333333, + "Malay,Indonesian,Filipino": 0.19333333333333333, + "Malay,Spanish,Chinese": 0.2, + "Malay,Spanish,Filipino": 0.20666666666666667, + "Malay,Chinese,Filipino": 0.16, + "Indonesian,Spanish,Chinese": 0.14666666666666667, + "Indonesian,Spanish,Filipino": 0.12666666666666668, + "Indonesian,Chinese,Filipino": 0.14, + "Spanish,Chinese,Filipino": 0.12 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.09333333333333334, + "English,Vietnamese,Malay,Spanish": 0.08666666666666667, + "English,Vietnamese,Malay,Chinese": 0.08, + "English,Vietnamese,Malay,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.09333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.08, + "English,Vietnamese,Spanish,Filipino": 0.08666666666666667, + "English,Vietnamese,Chinese,Filipino": 0.08666666666666667, + "English,Malay,Indonesian,Spanish": 0.1, + "English,Malay,Indonesian,Chinese": 0.11333333333333333, + "English,Malay,Indonesian,Filipino": 0.10666666666666667, + "English,Malay,Spanish,Chinese": 0.09333333333333334, + "English,Malay,Spanish,Filipino": 0.11333333333333333, + "English,Malay,Chinese,Filipino": 0.08666666666666667, + "English,Indonesian,Spanish,Chinese": 0.08, + "English,Indonesian,Spanish,Filipino": 0.08666666666666667, + "English,Indonesian,Chinese,Filipino": 0.08, + "English,Spanish,Chinese,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.09333333333333334, + "Vietnamese,Malay,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Malay,Indonesian,Filipino": 0.12666666666666668, + "Vietnamese,Malay,Spanish,Chinese": 0.09333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.1, + "Vietnamese,Indonesian,Spanish,Chinese": 0.1, + "Vietnamese,Indonesian,Spanish,Filipino": 0.08666666666666667, + "Vietnamese,Indonesian,Chinese,Filipino": 0.08666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.07333333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.1, + "Malay,Indonesian,Spanish,Filipino": 0.09333333333333334, + "Malay,Indonesian,Chinese,Filipino": 0.08, + "Malay,Spanish,Chinese,Filipino": 0.07333333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.05333333333333334, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.06, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.07333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.06666666666666667, + "English,Vietnamese,Malay,Chinese,Filipino": 0.06, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.05333333333333334, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.06, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.06, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.04, + "English,Malay,Indonesian,Spanish,Chinese": 0.05333333333333334, + "English,Malay,Indonesian,Spanish,Filipino": 0.06666666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.05333333333333334, + "English,Malay,Spanish,Chinese,Filipino": 0.04666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.06, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.05333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04666666666666667, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.03333333333333333, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667 + } + }, + "AC3_2": 0.30839002262755094, + "AC3_3": 0.2106739798403831, + "AC3_4": 0.1366065949543381, + "AC3_5": 0.0901947175343822, + "AC3_6": 0.06350844851919568, + "AC3_7": 0.04835555553863111 + } }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.14563106796116504 + } }, "cn_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2 + } }, "us_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.22429906542056074 + } }, "ph_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.5, + "culture": 0.2, + "film": 0.4, + "law": 0.3, + "geography": 0.3 + } + } }, "sing2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.07241748883928695 + } }, "indommlu": { "prompt_1": -1 }, "flores_ind2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.07369708717535596 + } }, "flores_vie2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.06007383514043186 + } }, "flores_zho2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.06655056356394193 + } }, "flores_zsm2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.077662612518808 + } }, "mmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.25204200700116686 + } }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.25705794947994054 + } }, "c_eval_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.25529265255292655, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.25, + "computer_architecture": 0.15384615384615385, + "college_programming": 0.21428571428571427, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.43478260869565216, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.14285714285714285, + "metrology_engineer": 0.034482758620689655, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.26666666666666666, + "business_administration": 0.3684210526315789, + "marxism": 0.375, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.18518518518518517, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.36, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.23076923076923078, + "sports_science": 0.3333333333333333, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.37037037037037035, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.12962962962962962, + "physician": 0.2777777777777778 + } + } }, "cmmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.23297491039426524 + } }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + } }, "ind_emotion": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.175 + } }, "ocnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3338983050847458 + } }, "c3": { "prompt_1": -1 @@ -12142,28 +105817,44 @@ "prompt_1": -1 }, "sst2": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5 + } }, "cola": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.348993288590604 + } }, "qqp": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.543 + } }, "mnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.347 + } }, "qnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.498 + } }, "wnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5915492957746479 + } }, "rte": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5090252707581228 + } }, "mrpc": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4215686274509804 + } } } }, @@ -12172,53 +105863,1733 @@ "model_link": "https://huggingface.co/aisingapore/sealion7b-instruct-nc", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2780952380952381, + "language_acc": { + "English": 0.25333333333333335, + "Vietnamese": 0.30666666666666664, + "Malay": 0.26, + "Indonesian": 0.2733333333333333, + "Spanish": 0.29333333333333333, + "Chinese": 0.29333333333333333, + "Filipino": 0.26666666666666666 + }, + "consistency_score_2": 0.4965079365079366, + "consistency_score_3": 0.2963809523809524, + "consistency_score_4": 0.19314285714285712, + "consistency_score_5": 0.133968253968254, + "consistency_score_6": 0.09714285714285716, + "consistency_score_7": 0.07333333333333333, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.49333333333333335, + "English,Malay": 0.49333333333333335, + "English,Indonesian": 0.5733333333333334, + "English,Spanish": 0.58, + "English,Chinese": 0.4666666666666667, + "English,Filipino": 0.4866666666666667, + "Vietnamese,Malay": 0.44, + "Vietnamese,Indonesian": 0.54, + "Vietnamese,Spanish": 0.5133333333333333, + "Vietnamese,Chinese": 0.4533333333333333, + "Vietnamese,Filipino": 0.48, + "Malay,Indonesian": 0.5533333333333333, + "Malay,Spanish": 0.47333333333333333, + "Malay,Chinese": 0.46, + "Malay,Filipino": 0.4533333333333333, + "Indonesian,Spanish": 0.5733333333333334, + "Indonesian,Chinese": 0.47333333333333333, + "Indonesian,Filipino": 0.5266666666666666, + "Spanish,Chinese": 0.4066666666666667, + "Spanish,Filipino": 0.49333333333333335, + "Chinese,Filipino": 0.49333333333333335 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.2733333333333333, + "English,Vietnamese,Indonesian": 0.35333333333333333, + "English,Vietnamese,Spanish": 0.34, + "English,Vietnamese,Chinese": 0.29333333333333333, + "English,Vietnamese,Filipino": 0.2733333333333333, + "English,Malay,Indonesian": 0.36, + "English,Malay,Spanish": 0.34, + "English,Malay,Chinese": 0.26, + "English,Malay,Filipino": 0.28, + "English,Indonesian,Spanish": 0.38666666666666666, + "English,Indonesian,Chinese": 0.29333333333333333, + "English,Indonesian,Filipino": 0.3333333333333333, + "English,Spanish,Chinese": 0.2733333333333333, + "English,Spanish,Filipino": 0.30666666666666664, + "English,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Malay,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Filipino": 0.24666666666666667, + "Vietnamese,Indonesian,Spanish": 0.36, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Indonesian,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese": 0.26, + "Vietnamese,Spanish,Filipino": 0.30666666666666664, + "Vietnamese,Chinese,Filipino": 0.28, + "Malay,Indonesian,Spanish": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.29333333333333333, + "Malay,Indonesian,Filipino": 0.3, + "Malay,Spanish,Chinese": 0.23333333333333334, + "Malay,Spanish,Filipino": 0.25333333333333335, + "Malay,Chinese,Filipino": 0.26, + "Indonesian,Spanish,Chinese": 0.28, + "Indonesian,Spanish,Filipino": 0.3333333333333333, + "Indonesian,Chinese,Filipino": 0.2866666666666667, + "Spanish,Chinese,Filipino": 0.26 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.22, + "English,Vietnamese,Malay,Spanish": 0.20666666666666667, + "English,Vietnamese,Malay,Chinese": 0.16666666666666666, + "English,Vietnamese,Malay,Filipino": 0.18, + "English,Vietnamese,Indonesian,Spanish": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.22, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.19333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.18666666666666668, + "English,Malay,Indonesian,Spanish": 0.26666666666666666, + "English,Malay,Indonesian,Chinese": 0.18666666666666668, + "English,Malay,Indonesian,Filipino": 0.22, + "English,Malay,Spanish,Chinese": 0.17333333333333334, + "English,Malay,Spanish,Filipino": 0.18666666666666668, + "English,Malay,Chinese,Filipino": 0.16666666666666666, + "English,Indonesian,Spanish,Chinese": 0.20666666666666667, + "English,Indonesian,Spanish,Filipino": 0.22666666666666666, + "English,Indonesian,Chinese,Filipino": 0.18666666666666668, + "English,Spanish,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.22, + "Vietnamese,Malay,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Spanish,Chinese": 0.12666666666666668, + "Vietnamese,Malay,Spanish,Filipino": 0.16, + "Vietnamese,Malay,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,Indonesian,Spanish,Chinese": 0.19333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese,Filipino": 0.2, + "Vietnamese,Spanish,Chinese,Filipino": 0.19333333333333333, + "Malay,Indonesian,Spanish,Chinese": 0.18, + "Malay,Indonesian,Spanish,Filipino": 0.19333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.17333333333333334, + "Malay,Spanish,Chinese,Filipino": 0.14, + "Indonesian,Spanish,Chinese,Filipino": 0.19333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.16666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.13333333333333333, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.15333333333333332, + "English,Vietnamese,Malay,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Chinese,Filipino": 0.12, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.14666666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.16, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.14666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.14, + "English,Malay,Indonesian,Spanish,Chinese": 0.14, + "English,Malay,Indonesian,Spanish,Filipino": 0.15333333333333332, + "English,Malay,Indonesian,Chinese,Filipino": 0.12666666666666668, + "English,Malay,Spanish,Chinese,Filipino": 0.10666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.14, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.10666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.1, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.08, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.11333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.08666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.07333333333333333 + } + }, + "AC3_2": 0.3565089773154117, + "AC3_3": 0.28694707586264384, + "AC3_4": 0.2279616583417393, + "AC3_5": 0.18082617942636417, + "AC3_6": 0.1439883973510401, + "AC3_7": 0.11606142724791386 + }, + "prompt_2": { + "overall_acc": 0.2552380952380952, + "language_acc": { + "English": 0.3, + "Vietnamese": 0.25333333333333335, + "Malay": 0.2733333333333333, + "Indonesian": 0.25333333333333335, + "Spanish": 0.20666666666666667, + "Chinese": 0.2, + "Filipino": 0.3 + }, + "consistency_score_2": 0.29333333333333333, + "consistency_score_3": 0.10266666666666666, + "consistency_score_4": 0.04095238095238096, + "consistency_score_5": 0.016825396825396827, + "consistency_score_6": 0.005714285714285714, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4, + "English,Malay": 0.25333333333333335, + "English,Indonesian": 0.32, + "English,Spanish": 0.22, + "English,Chinese": 0.26, + "English,Filipino": 0.32, + "Vietnamese,Malay": 0.2866666666666667, + "Vietnamese,Indonesian": 0.36, + "Vietnamese,Spanish": 0.29333333333333333, + "Vietnamese,Chinese": 0.29333333333333333, + "Vietnamese,Filipino": 0.30666666666666664, + "Malay,Indonesian": 0.36666666666666664, + "Malay,Spanish": 0.21333333333333335, + "Malay,Chinese": 0.26666666666666666, + "Malay,Filipino": 0.29333333333333333, + "Indonesian,Spanish": 0.26, + "Indonesian,Chinese": 0.32, + "Indonesian,Filipino": 0.38666666666666666, + "Spanish,Chinese": 0.23333333333333334, + "Spanish,Filipino": 0.19333333333333333, + "Chinese,Filipino": 0.31333333333333335 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.10666666666666667, + "English,Vietnamese,Indonesian": 0.16, + "English,Vietnamese,Spanish": 0.10666666666666667, + "English,Vietnamese,Chinese": 0.13333333333333333, + "English,Vietnamese,Filipino": 0.13333333333333333, + "English,Malay,Indonesian": 0.12, + "English,Malay,Spanish": 0.04, + "English,Malay,Chinese": 0.07333333333333333, + "English,Malay,Filipino": 0.07333333333333333, + "English,Indonesian,Spanish": 0.08666666666666667, + "English,Indonesian,Chinese": 0.10666666666666667, + "English,Indonesian,Filipino": 0.15333333333333332, + "English,Spanish,Chinese": 0.06666666666666667, + "English,Spanish,Filipino": 0.06666666666666667, + "English,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Indonesian": 0.15333333333333332, + "Vietnamese,Malay,Spanish": 0.07333333333333333, + "Vietnamese,Malay,Chinese": 0.1, + "Vietnamese,Malay,Filipino": 0.12, + "Vietnamese,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,Indonesian,Chinese": 0.12666666666666668, + "Vietnamese,Indonesian,Filipino": 0.16666666666666666, + "Vietnamese,Spanish,Chinese": 0.08, + "Vietnamese,Spanish,Filipino": 0.04666666666666667, + "Vietnamese,Chinese,Filipino": 0.12666666666666668, + "Malay,Indonesian,Spanish": 0.08666666666666667, + "Malay,Indonesian,Chinese": 0.11333333333333333, + "Malay,Indonesian,Filipino": 0.14, + "Malay,Spanish,Chinese": 0.03333333333333333, + "Malay,Spanish,Filipino": 0.05333333333333334, + "Malay,Chinese,Filipino": 0.10666666666666667, + "Indonesian,Spanish,Chinese": 0.09333333333333334, + "Indonesian,Spanish,Filipino": 0.09333333333333334, + "Indonesian,Chinese,Filipino": 0.16666666666666666, + "Spanish,Chinese,Filipino": 0.05333333333333334 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.05333333333333334, + "English,Vietnamese,Malay,Spanish": 0.013333333333333334, + "English,Vietnamese,Malay,Chinese": 0.04, + "English,Vietnamese,Malay,Filipino": 0.04, + "English,Vietnamese,Indonesian,Spanish": 0.05333333333333334, + "English,Vietnamese,Indonesian,Chinese": 0.06, + "English,Vietnamese,Indonesian,Filipino": 0.08, + "English,Vietnamese,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.03333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.06666666666666667, + "English,Malay,Indonesian,Spanish": 0.03333333333333333, + "English,Malay,Indonesian,Chinese": 0.04, + "English,Malay,Indonesian,Filipino": 0.04666666666666667, + "English,Malay,Spanish,Chinese": 0.006666666666666667, + "English,Malay,Spanish,Filipino": 0.013333333333333334, + "English,Malay,Chinese,Filipino": 0.03333333333333333, + "English,Indonesian,Spanish,Chinese": 0.04, + "English,Indonesian,Spanish,Filipino": 0.04, + "English,Indonesian,Chinese,Filipino": 0.06666666666666667, + "English,Spanish,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.04, + "Vietnamese,Malay,Indonesian,Chinese": 0.06, + "Vietnamese,Malay,Indonesian,Filipino": 0.07333333333333333, + "Vietnamese,Malay,Spanish,Chinese": 0.013333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.013333333333333334, + "Vietnamese,Malay,Chinese,Filipino": 0.05333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese": 0.03333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.04, + "Vietnamese,Indonesian,Chinese,Filipino": 0.08666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.02666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.006666666666666667, + "Malay,Indonesian,Spanish,Filipino": 0.03333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.07333333333333333, + "Malay,Spanish,Chinese,Filipino": 0.0, + "Indonesian,Spanish,Chinese,Filipino": 0.04 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.013333333333333334, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.02, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.02666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.0, + "English,Vietnamese,Malay,Spanish,Filipino": 0.006666666666666667, + "English,Vietnamese,Malay,Chinese,Filipino": 0.02, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.02666666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.04, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.02666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.006666666666666667, + "English,Malay,Indonesian,Spanish,Filipino": 0.013333333333333334, + "English,Malay,Indonesian,Chinese,Filipino": 0.02, + "English,Malay,Spanish,Chinese,Filipino": 0.0, + "English,Indonesian,Spanish,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.0, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.013333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.0, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.02, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.0 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.0, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.006666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.013333333333333334, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.0, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.02, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.0, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.0 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.0 + } + }, + "AC3_2": 0.2729629629132041, + "AC3_3": 0.14643249951558787, + "AC3_4": 0.07058030927030631, + "AC3_5": 0.03156970605051767, + "AC3_6": 0.011178310736070876, + "AC3_7": 0.0 + }, + "prompt_3": { + "overall_acc": 0.24952380952380954, + "language_acc": { + "English": 0.24666666666666667, + "Vietnamese": 0.25333333333333335, + "Malay": 0.26666666666666666, + "Indonesian": 0.2866666666666667, + "Spanish": 0.2733333333333333, + "Chinese": 0.23333333333333334, + "Filipino": 0.18666666666666668 + }, + "consistency_score_2": 0.2565079365079364, + "consistency_score_3": 0.07409523809523808, + "consistency_score_4": 0.02876190476190476, + "consistency_score_5": 0.01492063492063492, + "consistency_score_6": 0.009523809523809523, + "consistency_score_7": 0.006666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.32, + "English,Malay": 0.20666666666666667, + "English,Indonesian": 0.21333333333333335, + "English,Spanish": 0.26666666666666666, + "English,Chinese": 0.24, + "English,Filipino": 0.22, + "Vietnamese,Malay": 0.24, + "Vietnamese,Indonesian": 0.28, + "Vietnamese,Spanish": 0.30666666666666664, + "Vietnamese,Chinese": 0.23333333333333334, + "Vietnamese,Filipino": 0.22666666666666666, + "Malay,Indonesian": 0.24, + "Malay,Spanish": 0.25333333333333335, + "Malay,Chinese": 0.24, + "Malay,Filipino": 0.3, + "Indonesian,Spanish": 0.25333333333333335, + "Indonesian,Chinese": 0.29333333333333333, + "Indonesian,Filipino": 0.29333333333333333, + "Spanish,Chinese": 0.2733333333333333, + "Spanish,Filipino": 0.21333333333333335, + "Chinese,Filipino": 0.2733333333333333 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.06, + "English,Vietnamese,Indonesian": 0.07333333333333333, + "English,Vietnamese,Spanish": 0.10666666666666667, + "English,Vietnamese,Chinese": 0.08, + "English,Vietnamese,Filipino": 0.07333333333333333, + "English,Malay,Indonesian": 0.05333333333333334, + "English,Malay,Spanish": 0.04, + "English,Malay,Chinese": 0.06666666666666667, + "English,Malay,Filipino": 0.05333333333333334, + "English,Indonesian,Spanish": 0.07333333333333333, + "English,Indonesian,Chinese": 0.06666666666666667, + "English,Indonesian,Filipino": 0.04666666666666667, + "English,Spanish,Chinese": 0.06, + "English,Spanish,Filipino": 0.06, + "English,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Indonesian": 0.08666666666666667, + "Vietnamese,Malay,Spanish": 0.05333333333333334, + "Vietnamese,Malay,Chinese": 0.04666666666666667, + "Vietnamese,Malay,Filipino": 0.08666666666666667, + "Vietnamese,Indonesian,Spanish": 0.08666666666666667, + "Vietnamese,Indonesian,Chinese": 0.08666666666666667, + "Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "Vietnamese,Spanish,Chinese": 0.10666666666666667, + "Vietnamese,Spanish,Filipino": 0.06, + "Vietnamese,Chinese,Filipino": 0.07333333333333333, + "Malay,Indonesian,Spanish": 0.07333333333333333, + "Malay,Indonesian,Chinese": 0.06666666666666667, + "Malay,Indonesian,Filipino": 0.09333333333333334, + "Malay,Spanish,Chinese": 0.08, + "Malay,Spanish,Filipino": 0.08666666666666667, + "Malay,Chinese,Filipino": 0.10666666666666667, + "Indonesian,Spanish,Chinese": 0.08666666666666667, + "Indonesian,Spanish,Filipino": 0.08, + "Indonesian,Chinese,Filipino": 0.09333333333333334, + "Spanish,Chinese,Filipino": 0.09333333333333334 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.03333333333333333, + "English,Vietnamese,Malay,Spanish": 0.02, + "English,Vietnamese,Malay,Chinese": 0.02, + "English,Vietnamese,Malay,Filipino": 0.02666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.02666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.02, + "English,Vietnamese,Indonesian,Filipino": 0.02, + "English,Vietnamese,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.02666666666666667, + "English,Vietnamese,Chinese,Filipino": 0.04, + "English,Malay,Indonesian,Spanish": 0.013333333333333334, + "English,Malay,Indonesian,Chinese": 0.02666666666666667, + "English,Malay,Indonesian,Filipino": 0.013333333333333334, + "English,Malay,Spanish,Chinese": 0.013333333333333334, + "English,Malay,Spanish,Filipino": 0.02, + "English,Malay,Chinese,Filipino": 0.03333333333333333, + "English,Indonesian,Spanish,Chinese": 0.02, + "English,Indonesian,Spanish,Filipino": 0.02, + "English,Indonesian,Chinese,Filipino": 0.02, + "English,Spanish,Chinese,Filipino": 0.02, + "Vietnamese,Malay,Indonesian,Spanish": 0.02666666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.02, + "Vietnamese,Malay,Indonesian,Filipino": 0.04, + "Vietnamese,Malay,Spanish,Chinese": 0.03333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese": 0.03333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.03333333333333333, + "Vietnamese,Indonesian,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.04, + "Malay,Indonesian,Spanish,Chinese": 0.04, + "Malay,Indonesian,Spanish,Filipino": 0.03333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "Malay,Spanish,Chinese,Filipino": 0.05333333333333334, + "Indonesian,Spanish,Chinese,Filipino": 0.04 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.013333333333333334, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.006666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.006666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese": 0.013333333333333334, + "English,Vietnamese,Malay,Spanish,Filipino": 0.013333333333333334, + "English,Vietnamese,Malay,Chinese,Filipino": 0.02, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.013333333333333334, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.013333333333333334, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.02, + "English,Malay,Indonesian,Spanish,Chinese": 0.006666666666666667, + "English,Malay,Indonesian,Spanish,Filipino": 0.006666666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.013333333333333334, + "English,Malay,Spanish,Chinese,Filipino": 0.013333333333333334, + "English,Indonesian,Spanish,Chinese,Filipino": 0.006666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.02, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.02, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.02, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.02666666666666667, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.02, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.006666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.006666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.006666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.013333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.006666666666666667, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.006666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.02 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.006666666666666667 + } + }, + "AC3_2": 0.252967676355578, + "AC3_3": 0.11426104988201102, + "AC3_4": 0.051578501334094286, + "AC3_5": 0.02815754872313141, + "AC3_6": 0.018347338928491615, + "AC3_7": 0.012986369263828122 + }, + "prompt_4": { + "overall_acc": 0.24380952380952378, + "language_acc": { + "English": 0.2866666666666667, + "Vietnamese": 0.26, + "Malay": 0.18666666666666668, + "Indonesian": 0.23333333333333334, + "Spanish": 0.26666666666666666, + "Chinese": 0.26666666666666666, + "Filipino": 0.20666666666666667 + }, + "consistency_score_2": 0.33523809523809533, + "consistency_score_3": 0.14457142857142857, + "consistency_score_4": 0.07752380952380955, + "consistency_score_5": 0.05047619047619048, + "consistency_score_6": 0.039047619047619046, + "consistency_score_7": 0.03333333333333333, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.48, + "English,Malay": 0.28, + "English,Indonesian": 0.32, + "English,Spanish": 0.3333333333333333, + "English,Chinese": 0.4066666666666667, + "English,Filipino": 0.37333333333333335, + "Vietnamese,Malay": 0.3333333333333333, + "Vietnamese,Indonesian": 0.30666666666666664, + "Vietnamese,Spanish": 0.34, + "Vietnamese,Chinese": 0.38, + "Vietnamese,Filipino": 0.34, + "Malay,Indonesian": 0.31333333333333335, + "Malay,Spanish": 0.35333333333333333, + "Malay,Chinese": 0.32666666666666666, + "Malay,Filipino": 0.28, + "Indonesian,Spanish": 0.30666666666666664, + "Indonesian,Chinese": 0.36, + "Indonesian,Filipino": 0.28, + "Spanish,Chinese": 0.32666666666666666, + "Spanish,Filipino": 0.28, + "Chinese,Filipino": 0.32 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.17333333333333334, + "English,Vietnamese,Indonesian": 0.16666666666666666, + "English,Vietnamese,Spanish": 0.20666666666666667, + "English,Vietnamese,Chinese": 0.24666666666666667, + "English,Vietnamese,Filipino": 0.2, + "English,Malay,Indonesian": 0.12, + "English,Malay,Spanish": 0.13333333333333333, + "English,Malay,Chinese": 0.15333333333333332, + "English,Malay,Filipino": 0.12, + "English,Indonesian,Spanish": 0.12, + "English,Indonesian,Chinese": 0.17333333333333334, + "English,Indonesian,Filipino": 0.13333333333333333, + "English,Spanish,Chinese": 0.17333333333333334, + "English,Spanish,Filipino": 0.13333333333333333, + "English,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Indonesian": 0.14, + "Vietnamese,Malay,Spanish": 0.14, + "Vietnamese,Malay,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Filipino": 0.12666666666666668, + "Vietnamese,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Indonesian,Filipino": 0.11333333333333333, + "Vietnamese,Spanish,Chinese": 0.14666666666666667, + "Vietnamese,Spanish,Filipino": 0.13333333333333333, + "Vietnamese,Chinese,Filipino": 0.18, + "Malay,Indonesian,Spanish": 0.13333333333333333, + "Malay,Indonesian,Chinese": 0.14, + "Malay,Indonesian,Filipino": 0.08666666666666667, + "Malay,Spanish,Chinese": 0.13333333333333333, + "Malay,Spanish,Filipino": 0.12, + "Malay,Chinese,Filipino": 0.13333333333333333, + "Indonesian,Spanish,Chinese": 0.14, + "Indonesian,Spanish,Filipino": 0.10666666666666667, + "Indonesian,Chinese,Filipino": 0.12666666666666668, + "Spanish,Chinese,Filipino": 0.11333333333333333 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.08666666666666667, + "English,Vietnamese,Malay,Spanish": 0.08, + "English,Vietnamese,Malay,Chinese": 0.10666666666666667, + "English,Vietnamese,Malay,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.08666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.11333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.06666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.12666666666666668, + "English,Vietnamese,Spanish,Filipino": 0.09333333333333334, + "English,Vietnamese,Chinese,Filipino": 0.12666666666666668, + "English,Malay,Indonesian,Spanish": 0.07333333333333333, + "English,Malay,Indonesian,Chinese": 0.08666666666666667, + "English,Malay,Indonesian,Filipino": 0.06666666666666667, + "English,Malay,Spanish,Chinese": 0.08, + "English,Malay,Spanish,Filipino": 0.06, + "English,Malay,Chinese,Filipino": 0.08666666666666667, + "English,Indonesian,Spanish,Chinese": 0.08, + "English,Indonesian,Spanish,Filipino": 0.06, + "English,Indonesian,Chinese,Filipino": 0.09333333333333334, + "English,Spanish,Chinese,Filipino": 0.06666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.08, + "Vietnamese,Malay,Indonesian,Chinese": 0.07333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.06, + "Vietnamese,Malay,Spanish,Chinese": 0.06666666666666667, + "Vietnamese,Malay,Spanish,Filipino": 0.06, + "Vietnamese,Malay,Chinese,Filipino": 0.08, + "Vietnamese,Indonesian,Spanish,Chinese": 0.07333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.06, + "Vietnamese,Indonesian,Chinese,Filipino": 0.07333333333333333, + "Vietnamese,Spanish,Chinese,Filipino": 0.06666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.06, + "Malay,Indonesian,Spanish,Filipino": 0.06, + "Malay,Indonesian,Chinese,Filipino": 0.06, + "Malay,Spanish,Chinese,Filipino": 0.06, + "Indonesian,Spanish,Chinese,Filipino": 0.05333333333333334 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.05333333333333334, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.06666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.05333333333333334, + "English,Vietnamese,Malay,Spanish,Chinese": 0.06, + "English,Vietnamese,Malay,Spanish,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Chinese,Filipino": 0.07333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.06666666666666667, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.04, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.06, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.05333333333333334, + "English,Malay,Indonesian,Spanish,Chinese": 0.05333333333333334, + "English,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.05333333333333334, + "English,Malay,Spanish,Chinese,Filipino": 0.04, + "English,Indonesian,Spanish,Chinese,Filipino": 0.04, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.04, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.04666666666666667, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.04, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.04666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.04, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333 + } + }, + "AC3_2": 0.2823057643622741, + "AC3_3": 0.18151194558487657, + "AC3_4": 0.11764134695861926, + "AC3_5": 0.08363692399685323, + "AC3_6": 0.06731441395728262, + "AC3_7": 0.05864833903954842 + }, + "prompt_5": { + "overall_acc": 0.25999999999999995, + "language_acc": { + "English": 0.22666666666666666, + "Vietnamese": 0.29333333333333333, + "Malay": 0.26666666666666666, + "Indonesian": 0.22, + "Spanish": 0.2866666666666667, + "Chinese": 0.24666666666666667, + "Filipino": 0.28 + }, + "consistency_score_2": 0.3022222222222222, + "consistency_score_3": 0.10114285714285716, + "consistency_score_4": 0.035809523809523805, + "consistency_score_5": 0.013015873015873012, + "consistency_score_6": 0.00380952380952381, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.22666666666666666, + "English,Malay": 0.3, + "English,Indonesian": 0.29333333333333333, + "English,Spanish": 0.35333333333333333, + "English,Chinese": 0.31333333333333335, + "English,Filipino": 0.31333333333333335, + "Vietnamese,Malay": 0.21333333333333335, + "Vietnamese,Indonesian": 0.23333333333333334, + "Vietnamese,Spanish": 0.4266666666666667, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.42, + "Malay,Indonesian": 0.26, + "Malay,Spanish": 0.26666666666666666, + "Malay,Chinese": 0.23333333333333334, + "Malay,Filipino": 0.22, + "Indonesian,Spanish": 0.22666666666666666, + "Indonesian,Chinese": 0.23333333333333334, + "Indonesian,Filipino": 0.26666666666666666, + "Spanish,Chinese": 0.41333333333333333, + "Spanish,Filipino": 0.41333333333333333, + "Chinese,Filipino": 0.3466666666666667 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.06666666666666667, + "English,Vietnamese,Indonesian": 0.04666666666666667, + "English,Vietnamese,Spanish": 0.1, + "English,Vietnamese,Chinese": 0.08666666666666667, + "English,Vietnamese,Filipino": 0.10666666666666667, + "English,Malay,Indonesian": 0.09333333333333334, + "English,Malay,Spanish": 0.11333333333333333, + "English,Malay,Chinese": 0.1, + "English,Malay,Filipino": 0.09333333333333334, + "English,Indonesian,Spanish": 0.08666666666666667, + "English,Indonesian,Chinese": 0.08, + "English,Indonesian,Filipino": 0.08666666666666667, + "English,Spanish,Chinese": 0.17333333333333334, + "English,Spanish,Filipino": 0.15333333333333332, + "English,Chinese,Filipino": 0.08666666666666667, + "Vietnamese,Malay,Indonesian": 0.06, + "Vietnamese,Malay,Spanish": 0.1, + "Vietnamese,Malay,Chinese": 0.09333333333333334, + "Vietnamese,Malay,Filipino": 0.06, + "Vietnamese,Indonesian,Spanish": 0.08666666666666667, + "Vietnamese,Indonesian,Chinese": 0.09333333333333334, + "Vietnamese,Indonesian,Filipino": 0.07333333333333333, + "Vietnamese,Spanish,Chinese": 0.2, + "Vietnamese,Spanish,Filipino": 0.22666666666666666, + "Vietnamese,Chinese,Filipino": 0.18666666666666668, + "Malay,Indonesian,Spanish": 0.07333333333333333, + "Malay,Indonesian,Chinese": 0.06, + "Malay,Indonesian,Filipino": 0.06666666666666667, + "Malay,Spanish,Chinese": 0.09333333333333334, + "Malay,Spanish,Filipino": 0.1, + "Malay,Chinese,Filipino": 0.06, + "Indonesian,Spanish,Chinese": 0.08666666666666667, + "Indonesian,Spanish,Filipino": 0.08666666666666667, + "Indonesian,Chinese,Filipino": 0.08, + "Spanish,Chinese,Filipino": 0.18 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.02, + "English,Vietnamese,Malay,Spanish": 0.04, + "English,Vietnamese,Malay,Chinese": 0.03333333333333333, + "English,Vietnamese,Malay,Filipino": 0.02, + "English,Vietnamese,Indonesian,Spanish": 0.02666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.02666666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.006666666666666667, + "English,Vietnamese,Spanish,Chinese": 0.05333333333333334, + "English,Vietnamese,Spanish,Filipino": 0.05333333333333334, + "English,Vietnamese,Chinese,Filipino": 0.03333333333333333, + "English,Malay,Indonesian,Spanish": 0.03333333333333333, + "English,Malay,Indonesian,Chinese": 0.03333333333333333, + "English,Malay,Indonesian,Filipino": 0.03333333333333333, + "English,Malay,Spanish,Chinese": 0.04666666666666667, + "English,Malay,Spanish,Filipino": 0.04666666666666667, + "English,Malay,Chinese,Filipino": 0.02, + "English,Indonesian,Spanish,Chinese": 0.03333333333333333, + "English,Indonesian,Spanish,Filipino": 0.02666666666666667, + "English,Indonesian,Chinese,Filipino": 0.013333333333333334, + "English,Spanish,Chinese,Filipino": 0.06, + "Vietnamese,Malay,Indonesian,Spanish": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.03333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.006666666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.05333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.02666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.03333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese": 0.05333333333333334, + "Vietnamese,Indonesian,Spanish,Filipino": 0.04, + "Vietnamese,Indonesian,Chinese,Filipino": 0.04666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.12, + "Malay,Indonesian,Spanish,Chinese": 0.03333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.02666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.02, + "Malay,Spanish,Chinese,Filipino": 0.02666666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.04 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.02, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.02, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.0, + "English,Vietnamese,Malay,Spanish,Chinese": 0.02666666666666667, + "English,Vietnamese,Malay,Spanish,Filipino": 0.006666666666666667, + "English,Vietnamese,Malay,Chinese,Filipino": 0.006666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.02, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.0, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.0, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.02666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.02, + "English,Malay,Indonesian,Spanish,Filipino": 0.013333333333333334, + "English,Malay,Indonesian,Chinese,Filipino": 0.006666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.006666666666666667, + "English,Indonesian,Spanish,Chinese,Filipino": 0.0, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.02666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.006666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.006666666666666667, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.013333333333333334, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.03333333333333333, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.013333333333333334 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.02, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.0, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.0, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.0, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.0, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.0, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.006666666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.0 + } + }, + "AC3_2": 0.27952569164988667, + "AC3_3": 0.1456329113520795, + "AC3_4": 0.06294913069346536, + "AC3_5": 0.02479069766533828, + "AC3_6": 0.007509025267911742, + "AC3_7": 0.0 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2564935064935065, + "language_acc": { + "Vietnamese": 0.2840909090909091, + "Indonesian": 0.26704545454545453, + "Malay": 0.24431818181818182, + "English": 0.2215909090909091, + "Spanish": 0.25, + "Filipino": 0.26136363636363635, + "Chinese": 0.26704545454545453 + }, + "consistency_score_2": 0.38041125541125537, + "consistency_score_3": 0.17905844155844156, + "consistency_score_4": 0.09740259740259744, + "consistency_score_5": 0.05979437229437228, + "consistency_score_6": 0.041396103896103896, + "consistency_score_7": 0.03409090909090909, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3977272727272727, + "Vietnamese,Malay": 0.44886363636363635, + "Vietnamese,English": 0.3806818181818182, + "Vietnamese,Spanish": 0.3352272727272727, + "Vietnamese,Filipino": 0.39204545454545453, + "Vietnamese,Chinese": 0.36363636363636365, + "Indonesian,Malay": 0.4943181818181818, + "Indonesian,English": 0.42045454545454547, + "Indonesian,Spanish": 0.35795454545454547, + "Indonesian,Filipino": 0.3977272727272727, + "Indonesian,Chinese": 0.375, + "Malay,English": 0.4375, + "Malay,Spanish": 0.4034090909090909, + "Malay,Filipino": 0.42613636363636365, + "Malay,Chinese": 0.3465909090909091, + "English,Spanish": 0.3125, + "English,Filipino": 0.3181818181818182, + "English,Chinese": 0.4431818181818182, + "Spanish,Filipino": 0.36363636363636365, + "Spanish,Chinese": 0.30113636363636365, + "Filipino,Chinese": 0.2727272727272727 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.25, + "Vietnamese,Indonesian,English": 0.17613636363636365, + "Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Chinese": 0.17613636363636365, + "Vietnamese,Malay,English": 0.22727272727272727, + "Vietnamese,Malay,Spanish": 0.19886363636363635, + "Vietnamese,Malay,Filipino": 0.24431818181818182, + "Vietnamese,Malay,Chinese": 0.17045454545454544, + "Vietnamese,English,Spanish": 0.14204545454545456, + "Vietnamese,English,Filipino": 0.14204545454545456, + "Vietnamese,English,Chinese": 0.19318181818181818, + "Vietnamese,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Spanish,Chinese": 0.10227272727272728, + "Vietnamese,Filipino,Chinese": 0.13636363636363635, + "Indonesian,Malay,English": 0.26136363636363635, + "Indonesian,Malay,Spanish": 0.2159090909090909, + "Indonesian,Malay,Filipino": 0.24431818181818182, + "Indonesian,Malay,Chinese": 0.19318181818181818, + "Indonesian,English,Spanish": 0.17045454545454544, + "Indonesian,English,Filipino": 0.17613636363636365, + "Indonesian,English,Chinese": 0.2159090909090909, + "Indonesian,Spanish,Filipino": 0.17045454545454544, + "Indonesian,Spanish,Chinese": 0.125, + "Indonesian,Filipino,Chinese": 0.14204545454545456, + "Malay,English,Spanish": 0.1875, + "Malay,English,Filipino": 0.19886363636363635, + "Malay,English,Chinese": 0.19886363636363635, + "Malay,Spanish,Filipino": 0.1875, + "Malay,Spanish,Chinese": 0.14772727272727273, + "Malay,Filipino,Chinese": 0.1590909090909091, + "English,Spanish,Filipino": 0.14204545454545456, + "English,Spanish,Chinese": 0.1590909090909091, + "English,Filipino,Chinese": 0.14772727272727273, + "Spanish,Filipino,Chinese": 0.11931818181818182 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,Spanish": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,Malay,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,English,Spanish": 0.07386363636363637, + "Vietnamese,Indonesian,English,Filipino": 0.09090909090909091, + "Vietnamese,Indonesian,English,Chinese": 0.10227272727272728, + "Vietnamese,Indonesian,Spanish,Filipino": 0.10795454545454546, + "Vietnamese,Indonesian,Spanish,Chinese": 0.03977272727272727, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Spanish": 0.10795454545454546, + "Vietnamese,Malay,English,Filipino": 0.125, + "Vietnamese,Malay,English,Chinese": 0.11931818181818182, + "Vietnamese,Malay,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Malay,Spanish,Chinese": 0.07386363636363637, + "Vietnamese,Malay,Filipino,Chinese": 0.09090909090909091, + "Vietnamese,English,Spanish,Filipino": 0.07954545454545454, + "Vietnamese,English,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,English,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Spanish,Filipino,Chinese": 0.056818181818181816, + "Indonesian,Malay,English,Spanish": 0.11931818181818182, + "Indonesian,Malay,English,Filipino": 0.125, + "Indonesian,Malay,English,Chinese": 0.125, + "Indonesian,Malay,Spanish,Filipino": 0.11363636363636363, + "Indonesian,Malay,Spanish,Chinese": 0.08522727272727272, + "Indonesian,Malay,Filipino,Chinese": 0.09090909090909091, + "Indonesian,English,Spanish,Filipino": 0.08522727272727272, + "Indonesian,English,Spanish,Chinese": 0.09659090909090909, + "Indonesian,English,Filipino,Chinese": 0.09090909090909091, + "Indonesian,Spanish,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish,Filipino": 0.10227272727272728, + "Malay,English,Spanish,Chinese": 0.09659090909090909, + "Malay,English,Filipino,Chinese": 0.11363636363636363, + "Malay,Spanish,Filipino,Chinese": 0.07954545454545454, + "English,Spanish,Filipino,Chinese": 0.09090909090909091 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.08522727272727272, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.07386363636363637, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Spanish,Filipino": 0.07386363636363637, + "Vietnamese,Malay,English,Spanish,Chinese": 0.05113636363636364, + "Vietnamese,Malay,English,Filipino,Chinese": 0.07386363636363637, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.045454545454545456, + "Indonesian,Malay,English,Spanish,Filipino": 0.0625, + "Indonesian,Malay,English,Spanish,Chinese": 0.06818181818181818, + "Indonesian,Malay,English,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.05113636363636364, + "Indonesian,English,Spanish,Filipino,Chinese": 0.0625, + "Malay,English,Spanish,Filipino,Chinese": 0.06818181818181818 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.045454545454545456, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.045454545454545456 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.03409090909090909 + } + }, + "AC3_2": 0.3063975106824536, + "AC3_3": 0.21089253641251043, + "AC3_4": 0.14118908610331898, + "AC3_5": 0.09698043612855935, + "AC3_6": 0.07128702357916922, + "AC3_7": 0.06018283390513971 + }, + "prompt_2": { + "overall_acc": 0.2524350649350649, + "language_acc": { + "Vietnamese": 0.2215909090909091, + "Indonesian": 0.23863636363636365, + "Malay": 0.2556818181818182, + "English": 0.2727272727272727, + "Spanish": 0.2727272727272727, + "Filipino": 0.2556818181818182, + "Chinese": 0.25 + }, + "consistency_score_2": 0.3728354978354978, + "consistency_score_3": 0.17581168831168834, + "consistency_score_4": 0.09123376623376624, + "consistency_score_5": 0.04734848484848484, + "consistency_score_6": 0.021915584415584416, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.38636363636363635, + "Vietnamese,Malay": 0.5, + "Vietnamese,English": 0.42045454545454547, + "Vietnamese,Spanish": 0.25, + "Vietnamese,Filipino": 0.3522727272727273, + "Vietnamese,Chinese": 0.3409090909090909, + "Indonesian,Malay": 0.42613636363636365, + "Indonesian,English": 0.4318181818181818, + "Indonesian,Spanish": 0.32954545454545453, + "Indonesian,Filipino": 0.3181818181818182, + "Indonesian,Chinese": 0.36363636363636365, + "Malay,English": 0.4772727272727273, + "Malay,Spanish": 0.3181818181818182, + "Malay,Filipino": 0.42613636363636365, + "Malay,Chinese": 0.3693181818181818, + "English,Spanish": 0.32954545454545453, + "English,Filipino": 0.3977272727272727, + "English,Chinese": 0.42045454545454547, + "Spanish,Filipino": 0.3465909090909091, + "Spanish,Chinese": 0.3125, + "Filipino,Chinese": 0.3125 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Indonesian,English": 0.21022727272727273, + "Vietnamese,Indonesian,Spanish": 0.11931818181818182, + "Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,Chinese": 0.1875, + "Vietnamese,Malay,English": 0.3125, + "Vietnamese,Malay,Spanish": 0.16477272727272727, + "Vietnamese,Malay,Filipino": 0.24431818181818182, + "Vietnamese,Malay,Chinese": 0.20454545454545456, + "Vietnamese,English,Spanish": 0.13068181818181818, + "Vietnamese,English,Filipino": 0.1875, + "Vietnamese,English,Chinese": 0.19318181818181818, + "Vietnamese,Spanish,Filipino": 0.11363636363636363, + "Vietnamese,Spanish,Chinese": 0.10227272727272728, + "Vietnamese,Filipino,Chinese": 0.13068181818181818, + "Indonesian,Malay,English": 0.2727272727272727, + "Indonesian,Malay,Spanish": 0.17045454545454544, + "Indonesian,Malay,Filipino": 0.21022727272727273, + "Indonesian,Malay,Chinese": 0.2159090909090909, + "Indonesian,English,Spanish": 0.14772727272727273, + "Indonesian,English,Filipino": 0.14772727272727273, + "Indonesian,English,Chinese": 0.2159090909090909, + "Indonesian,Spanish,Filipino": 0.11931818181818182, + "Indonesian,Spanish,Chinese": 0.14772727272727273, + "Indonesian,Filipino,Chinese": 0.13068181818181818, + "Malay,English,Spanish": 0.16477272727272727, + "Malay,English,Filipino": 0.24431818181818182, + "Malay,English,Chinese": 0.22727272727272727, + "Malay,Spanish,Filipino": 0.1590909090909091, + "Malay,Spanish,Chinese": 0.13068181818181818, + "Malay,Filipino,Chinese": 0.1590909090909091, + "English,Spanish,Filipino": 0.14204545454545456, + "English,Spanish,Chinese": 0.13636363636363635, + "English,Filipino,Chinese": 0.1534090909090909, + "Spanish,Filipino,Chinese": 0.125 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.09090909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,English,Spanish": 0.0625, + "Vietnamese,Indonesian,English,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,English,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish,Filipino": 0.0625, + "Vietnamese,Indonesian,Spanish,Chinese": 0.0625, + "Vietnamese,Indonesian,Filipino,Chinese": 0.07954545454545454, + "Vietnamese,Malay,English,Spanish": 0.10227272727272728, + "Vietnamese,Malay,English,Filipino": 0.1534090909090909, + "Vietnamese,Malay,English,Chinese": 0.14772727272727273, + "Vietnamese,Malay,Spanish,Filipino": 0.09090909090909091, + "Vietnamese,Malay,Spanish,Chinese": 0.0625, + "Vietnamese,Malay,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,English,Spanish,Filipino": 0.0625, + "Vietnamese,English,Spanish,Chinese": 0.0625, + "Vietnamese,English,Filipino,Chinese": 0.08522727272727272, + "Vietnamese,Spanish,Filipino,Chinese": 0.05113636363636364, + "Indonesian,Malay,English,Spanish": 0.09659090909090909, + "Indonesian,Malay,English,Filipino": 0.13068181818181818, + "Indonesian,Malay,English,Chinese": 0.1590909090909091, + "Indonesian,Malay,Spanish,Filipino": 0.06818181818181818, + "Indonesian,Malay,Spanish,Chinese": 0.08522727272727272, + "Indonesian,Malay,Filipino,Chinese": 0.09659090909090909, + "Indonesian,English,Spanish,Filipino": 0.045454545454545456, + "Indonesian,English,Spanish,Chinese": 0.06818181818181818, + "Indonesian,English,Filipino,Chinese": 0.07386363636363637, + "Indonesian,Spanish,Filipino,Chinese": 0.056818181818181816, + "Malay,English,Spanish,Filipino": 0.07954545454545454, + "Malay,English,Spanish,Chinese": 0.07386363636363637, + "Malay,English,Filipino,Chinese": 0.09659090909090909, + "Malay,Spanish,Filipino,Chinese": 0.0625, + "English,Spanish,Filipino,Chinese": 0.045454545454545456 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.056818181818181816, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.045454545454545456, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.0625, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Malay,English,Spanish,Chinese": 0.045454545454545456, + "Vietnamese,Malay,English,Filipino,Chinese": 0.06818181818181818, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Malay,English,Spanish,Filipino": 0.03409090909090909, + "Indonesian,Malay,English,Spanish,Chinese": 0.045454545454545456, + "Indonesian,Malay,English,Filipino,Chinese": 0.0625, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.028409090909090908, + "Indonesian,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Malay,English,Spanish,Filipino,Chinese": 0.028409090909090908 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.05113636363636364, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.022727272727272728, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + } + }, + "AC3_2": 0.301043288121927, + "AC3_3": 0.20726851801428098, + "AC3_4": 0.1340278757224619, + "AC3_5": 0.0797403183193147, + "AC3_6": 0.04032986243054694, + "AC3_7": 0.011113493420508574 + }, + "prompt_3": { + "overall_acc": 0.2483766233766234, + "language_acc": { + "Vietnamese": 0.24431818181818182, + "Indonesian": 0.29545454545454547, + "Malay": 0.22727272727272727, + "English": 0.2556818181818182, + "Spanish": 0.2727272727272727, + "Filipino": 0.23863636363636365, + "Chinese": 0.20454545454545456 + }, + "consistency_score_2": 0.2532467532467533, + "consistency_score_3": 0.06461038961038959, + "consistency_score_4": 0.017045454545454537, + "consistency_score_5": 0.003787878787878787, + "consistency_score_6": 0.0, + "consistency_score_7": 0.0, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.23295454545454544, + "Vietnamese,Malay": 0.24431818181818182, + "Vietnamese,English": 0.25, + "Vietnamese,Spanish": 0.26136363636363635, + "Vietnamese,Filipino": 0.2727272727272727, + "Vietnamese,Chinese": 0.2897727272727273, + "Indonesian,Malay": 0.25, + "Indonesian,English": 0.24431818181818182, + "Indonesian,Spanish": 0.24431818181818182, + "Indonesian,Filipino": 0.2727272727272727, + "Indonesian,Chinese": 0.25, + "Malay,English": 0.24431818181818182, + "Malay,Spanish": 0.24431818181818182, + "Malay,Filipino": 0.23863636363636365, + "Malay,Chinese": 0.23295454545454544, + "English,Spanish": 0.3181818181818182, + "English,Filipino": 0.1875, + "English,Chinese": 0.26136363636363635, + "Spanish,Filipino": 0.3068181818181818, + "Spanish,Chinese": 0.1875, + "Filipino,Chinese": 0.2840909090909091 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.05113636363636364, + "Vietnamese,Indonesian,English": 0.05113636363636364, + "Vietnamese,Indonesian,Spanish": 0.0625, + "Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "Vietnamese,Indonesian,Chinese": 0.045454545454545456, + "Vietnamese,Malay,English": 0.07386363636363637, + "Vietnamese,Malay,Spanish": 0.056818181818181816, + "Vietnamese,Malay,Filipino": 0.056818181818181816, + "Vietnamese,Malay,Chinese": 0.056818181818181816, + "Vietnamese,English,Spanish": 0.07386363636363637, + "Vietnamese,English,Filipino": 0.045454545454545456, + "Vietnamese,English,Chinese": 0.0625, + "Vietnamese,Spanish,Filipino": 0.08522727272727272, + "Vietnamese,Spanish,Chinese": 0.09659090909090909, + "Vietnamese,Filipino,Chinese": 0.08522727272727272, + "Indonesian,Malay,English": 0.03409090909090909, + "Indonesian,Malay,Spanish": 0.0625, + "Indonesian,Malay,Filipino": 0.0625, + "Indonesian,Malay,Chinese": 0.06818181818181818, + "Indonesian,English,Spanish": 0.06818181818181818, + "Indonesian,English,Filipino": 0.05113636363636364, + "Indonesian,English,Chinese": 0.056818181818181816, + "Indonesian,Spanish,Filipino": 0.07954545454545454, + "Indonesian,Spanish,Chinese": 0.045454545454545456, + "Indonesian,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish": 0.10795454545454546, + "Malay,English,Filipino": 0.06818181818181818, + "Malay,English,Chinese": 0.06818181818181818, + "Malay,Spanish,Filipino": 0.06818181818181818, + "Malay,Spanish,Chinese": 0.03977272727272727, + "Malay,Filipino,Chinese": 0.0625, + "English,Spanish,Filipino": 0.09090909090909091, + "English,Spanish,Chinese": 0.0625, + "English,Filipino,Chinese": 0.056818181818181816, + "Spanish,Filipino,Chinese": 0.0625 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,Spanish": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,Filipino": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,English,Spanish": 0.005681818181818182, + "Vietnamese,Indonesian,English,Filipino": 0.005681818181818182, + "Vietnamese,Indonesian,English,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Spanish,Filipino": 0.017045454545454544, + "Vietnamese,Indonesian,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish": 0.03409090909090909, + "Vietnamese,Malay,English,Filipino": 0.028409090909090908, + "Vietnamese,Malay,English,Chinese": 0.022727272727272728, + "Vietnamese,Malay,Spanish,Filipino": 0.017045454545454544, + "Vietnamese,Malay,Spanish,Chinese": 0.011363636363636364, + "Vietnamese,Malay,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,English,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,English,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,English,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Spanish,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Malay,English,Spanish": 0.022727272727272728, + "Indonesian,Malay,English,Filipino": 0.011363636363636364, + "Indonesian,Malay,English,Chinese": 0.011363636363636364, + "Indonesian,Malay,Spanish,Filipino": 0.022727272727272728, + "Indonesian,Malay,Spanish,Chinese": 0.017045454545454544, + "Indonesian,Malay,Filipino,Chinese": 0.011363636363636364, + "Indonesian,English,Spanish,Filipino": 0.017045454545454544, + "Indonesian,English,Spanish,Chinese": 0.005681818181818182, + "Indonesian,English,Filipino,Chinese": 0.011363636363636364, + "Indonesian,Spanish,Filipino,Chinese": 0.017045454545454544, + "Malay,English,Spanish,Filipino": 0.03409090909090909, + "Malay,English,Spanish,Chinese": 0.022727272727272728, + "Malay,English,Filipino,Chinese": 0.017045454545454544, + "Malay,Spanish,Filipino,Chinese": 0.017045454545454544, + "English,Spanish,Filipino,Chinese": 0.022727272727272728 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.0, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.0, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.0, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.0, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.0, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.0, + "Vietnamese,Malay,English,Spanish,Filipino": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Chinese": 0.005681818181818182, + "Vietnamese,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.0, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Indonesian,Malay,English,Spanish,Filipino": 0.005681818181818182, + "Indonesian,Malay,English,Spanish,Chinese": 0.005681818181818182, + "Indonesian,Malay,English,Filipino,Chinese": 0.0, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.005681818181818182, + "Indonesian,English,Spanish,Filipino,Chinese": 0.0, + "Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.0, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.0, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.0, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.0, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.0, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.0, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.0 + } + }, + "AC3_2": 0.2507880468545565, + "AC3_3": 0.10254553534472205, + "AC3_4": 0.03190158464185903, + "AC3_5": 0.007461958639256987, + "AC3_6": 0.0, + "AC3_7": 0.0 + }, + "prompt_4": { + "overall_acc": 0.2564935064935065, + "language_acc": { + "Vietnamese": 0.23863636363636365, + "Indonesian": 0.29545454545454547, + "Malay": 0.25, + "English": 0.25, + "Spanish": 0.2556818181818182, + "Filipino": 0.2784090909090909, + "Chinese": 0.22727272727272727 + }, + "consistency_score_2": 0.3027597402597402, + "consistency_score_3": 0.11185064935064933, + "consistency_score_4": 0.05016233766233765, + "consistency_score_5": 0.02732683982683982, + "consistency_score_6": 0.017045454545454544, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3068181818181818, + "Vietnamese,Malay": 0.3125, + "Vietnamese,English": 0.3465909090909091, + "Vietnamese,Spanish": 0.30113636363636365, + "Vietnamese,Filipino": 0.32954545454545453, + "Vietnamese,Chinese": 0.3181818181818182, + "Indonesian,Malay": 0.4034090909090909, + "Indonesian,English": 0.26136363636363635, + "Indonesian,Spanish": 0.2897727272727273, + "Indonesian,Filipino": 0.24431818181818182, + "Indonesian,Chinese": 0.2727272727272727, + "Malay,English": 0.3409090909090909, + "Malay,Spanish": 0.2784090909090909, + "Malay,Filipino": 0.30113636363636365, + "Malay,Chinese": 0.30113636363636365, + "English,Spanish": 0.23863636363636365, + "English,Filipino": 0.2897727272727273, + "English,Chinese": 0.375, + "Spanish,Filipino": 0.3181818181818182, + "Spanish,Chinese": 0.22727272727272727, + "Filipino,Chinese": 0.30113636363636365 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Vietnamese,Indonesian,English": 0.11931818181818182, + "Vietnamese,Indonesian,Spanish": 0.10227272727272728, + "Vietnamese,Indonesian,Filipino": 0.10227272727272728, + "Vietnamese,Indonesian,Chinese": 0.13636363636363635, + "Vietnamese,Malay,English": 0.1590909090909091, + "Vietnamese,Malay,Spanish": 0.10227272727272728, + "Vietnamese,Malay,Filipino": 0.125, + "Vietnamese,Malay,Chinese": 0.11931818181818182, + "Vietnamese,English,Spanish": 0.11363636363636363, + "Vietnamese,English,Filipino": 0.11363636363636363, + "Vietnamese,English,Chinese": 0.1534090909090909, + "Vietnamese,Spanish,Filipino": 0.11931818181818182, + "Vietnamese,Spanish,Chinese": 0.10795454545454546, + "Vietnamese,Filipino,Chinese": 0.11931818181818182, + "Indonesian,Malay,English": 0.13636363636363635, + "Indonesian,Malay,Spanish": 0.13068181818181818, + "Indonesian,Malay,Filipino": 0.11931818181818182, + "Indonesian,Malay,Chinese": 0.13068181818181818, + "Indonesian,English,Spanish": 0.07386363636363637, + "Indonesian,English,Filipino": 0.06818181818181818, + "Indonesian,English,Chinese": 0.11363636363636363, + "Indonesian,Spanish,Filipino": 0.09090909090909091, + "Indonesian,Spanish,Chinese": 0.07386363636363637, + "Indonesian,Filipino,Chinese": 0.09090909090909091, + "Malay,English,Spanish": 0.10227272727272728, + "Malay,English,Filipino": 0.11931818181818182, + "Malay,English,Chinese": 0.13068181818181818, + "Malay,Spanish,Filipino": 0.10227272727272728, + "Malay,Spanish,Chinese": 0.07954545454545454, + "Malay,Filipino,Chinese": 0.10795454545454546, + "English,Spanish,Filipino": 0.09659090909090909, + "English,Spanish,Chinese": 0.10795454545454546, + "English,Filipino,Chinese": 0.10227272727272728, + "Spanish,Filipino,Chinese": 0.09090909090909091 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.07386363636363637, + "Vietnamese,Indonesian,Malay,Spanish": 0.056818181818181816, + "Vietnamese,Indonesian,Malay,Filipino": 0.056818181818181816, + "Vietnamese,Indonesian,Malay,Chinese": 0.0625, + "Vietnamese,Indonesian,English,Spanish": 0.045454545454545456, + "Vietnamese,Indonesian,English,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,English,Chinese": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Indonesian,Spanish,Chinese": 0.045454545454545456, + "Vietnamese,Indonesian,Filipino,Chinese": 0.0625, + "Vietnamese,Malay,English,Spanish": 0.0625, + "Vietnamese,Malay,English,Filipino": 0.056818181818181816, + "Vietnamese,Malay,English,Chinese": 0.06818181818181818, + "Vietnamese,Malay,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,Malay,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,Malay,Filipino,Chinese": 0.056818181818181816, + "Vietnamese,English,Spanish,Filipino": 0.05113636363636364, + "Vietnamese,English,Spanish,Chinese": 0.056818181818181816, + "Vietnamese,English,Filipino,Chinese": 0.03977272727272727, + "Vietnamese,Spanish,Filipino,Chinese": 0.045454545454545456, + "Indonesian,Malay,English,Spanish": 0.05113636363636364, + "Indonesian,Malay,English,Filipino": 0.045454545454545456, + "Indonesian,Malay,English,Chinese": 0.056818181818181816, + "Indonesian,Malay,Spanish,Filipino": 0.045454545454545456, + "Indonesian,Malay,Spanish,Chinese": 0.045454545454545456, + "Indonesian,Malay,Filipino,Chinese": 0.05113636363636364, + "Indonesian,English,Spanish,Filipino": 0.028409090909090908, + "Indonesian,English,Spanish,Chinese": 0.03409090909090909, + "Indonesian,English,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Spanish,Filipino,Chinese": 0.03977272727272727, + "Malay,English,Spanish,Filipino": 0.05113636363636364, + "Malay,English,Spanish,Chinese": 0.05113636363636364, + "Malay,English,Filipino,Chinese": 0.05113636363636364, + "Malay,Spanish,Filipino,Chinese": 0.03409090909090909, + "English,Spanish,Filipino,Chinese": 0.03977272727272727 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.03409090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.022727272727272728, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Malay,English,Spanish,Filipino": 0.03977272727272727, + "Vietnamese,Malay,English,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Malay,English,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.028409090909090908, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Indonesian,Malay,English,Spanish,Filipino": 0.028409090909090908, + "Indonesian,Malay,English,Spanish,Chinese": 0.028409090909090908, + "Indonesian,Malay,English,Filipino,Chinese": 0.022727272727272728, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.022727272727272728, + "Indonesian,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Malay,English,Spanish,Filipino,Chinese": 0.022727272727272728 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.022727272727272728, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.017045454545454544, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.017045454545454544 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.011363636363636364 + } + }, + "AC3_2": 0.2777128531348934, + "AC3_3": 0.15577260989515027, + "AC3_4": 0.08391370405574175, + "AC3_5": 0.04939150456952641, + "AC3_6": 0.03196654975901778, + "AC3_7": 0.02176308539132415 + }, + "prompt_5": { + "overall_acc": 0.25405844155844154, + "language_acc": { + "Vietnamese": 0.2784090909090909, + "Indonesian": 0.2159090909090909, + "Malay": 0.2215909090909091, + "English": 0.2727272727272727, + "Spanish": 0.24431818181818182, + "Filipino": 0.3068181818181818, + "Chinese": 0.23863636363636365 + }, + "consistency_score_2": 0.2827380952380952, + "consistency_score_3": 0.08912337662337663, + "consistency_score_4": 0.03165584415584415, + "consistency_score_5": 0.013257575757575756, + "consistency_score_6": 0.007305194805194807, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.2840909090909091, + "Vietnamese,Malay": 0.22727272727272727, + "Vietnamese,English": 0.2215909090909091, + "Vietnamese,Spanish": 0.3068181818181818, + "Vietnamese,Filipino": 0.23295454545454544, + "Vietnamese,Chinese": 0.29545454545454547, + "Indonesian,Malay": 0.2897727272727273, + "Indonesian,English": 0.26704545454545453, + "Indonesian,Spanish": 0.2840909090909091, + "Indonesian,Filipino": 0.25, + "Indonesian,Chinese": 0.3068181818181818, + "Malay,English": 0.2727272727272727, + "Malay,Spanish": 0.25, + "Malay,Filipino": 0.2840909090909091, + "Malay,Chinese": 0.3465909090909091, + "English,Spanish": 0.2727272727272727, + "English,Filipino": 0.2784090909090909, + "English,Chinese": 0.3409090909090909, + "Spanish,Filipino": 0.3522727272727273, + "Spanish,Chinese": 0.3352272727272727, + "Filipino,Chinese": 0.23863636363636365 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.0625, + "Vietnamese,Indonesian,English": 0.06818181818181818, + "Vietnamese,Indonesian,Spanish": 0.07386363636363637, + "Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "Vietnamese,Indonesian,Chinese": 0.08522727272727272, + "Vietnamese,Malay,English": 0.028409090909090908, + "Vietnamese,Malay,Spanish": 0.0625, + "Vietnamese,Malay,Filipino": 0.07954545454545454, + "Vietnamese,Malay,Chinese": 0.07386363636363637, + "Vietnamese,English,Spanish": 0.06818181818181818, + "Vietnamese,English,Filipino": 0.056818181818181816, + "Vietnamese,English,Chinese": 0.07386363636363637, + "Vietnamese,Spanish,Filipino": 0.10227272727272728, + "Vietnamese,Spanish,Chinese": 0.11931818181818182, + "Vietnamese,Filipino,Chinese": 0.06818181818181818, + "Indonesian,Malay,English": 0.09659090909090909, + "Indonesian,Malay,Spanish": 0.07386363636363637, + "Indonesian,Malay,Filipino": 0.08522727272727272, + "Indonesian,Malay,Chinese": 0.09659090909090909, + "Indonesian,English,Spanish": 0.09659090909090909, + "Indonesian,English,Filipino": 0.07954545454545454, + "Indonesian,English,Chinese": 0.09659090909090909, + "Indonesian,Spanish,Filipino": 0.11363636363636363, + "Indonesian,Spanish,Chinese": 0.11931818181818182, + "Indonesian,Filipino,Chinese": 0.06818181818181818, + "Malay,English,Spanish": 0.07954545454545454, + "Malay,English,Filipino": 0.07954545454545454, + "Malay,English,Chinese": 0.14204545454545456, + "Malay,Spanish,Filipino": 0.10795454545454546, + "Malay,Spanish,Chinese": 0.11931818181818182, + "Malay,Filipino,Chinese": 0.11931818181818182, + "English,Spanish,Filipino": 0.11931818181818182, + "English,Spanish,Chinese": 0.11363636363636363, + "English,Filipino,Chinese": 0.08522727272727272, + "Spanish,Filipino,Chinese": 0.125 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,Spanish": 0.017045454545454544, + "Vietnamese,Indonesian,Malay,Filipino": 0.028409090909090908, + "Vietnamese,Indonesian,Malay,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,English,Spanish": 0.028409090909090908, + "Vietnamese,Indonesian,English,Filipino": 0.022727272727272728, + "Vietnamese,Indonesian,English,Chinese": 0.028409090909090908, + "Vietnamese,Indonesian,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,Indonesian,Spanish,Chinese": 0.017045454545454544, + "Vietnamese,Indonesian,Filipino,Chinese": 0.022727272727272728, + "Vietnamese,Malay,English,Spanish": 0.011363636363636364, + "Vietnamese,Malay,English,Filipino": 0.011363636363636364, + "Vietnamese,Malay,English,Chinese": 0.017045454545454544, + "Vietnamese,Malay,Spanish,Filipino": 0.022727272727272728, + "Vietnamese,Malay,Spanish,Chinese": 0.03409090909090909, + "Vietnamese,Malay,Filipino,Chinese": 0.017045454545454544, + "Vietnamese,English,Spanish,Filipino": 0.03409090909090909, + "Vietnamese,English,Spanish,Chinese": 0.028409090909090908, + "Vietnamese,English,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,Spanish,Filipino,Chinese": 0.03409090909090909, + "Indonesian,Malay,English,Spanish": 0.028409090909090908, + "Indonesian,Malay,English,Filipino": 0.022727272727272728, + "Indonesian,Malay,English,Chinese": 0.03977272727272727, + "Indonesian,Malay,Spanish,Filipino": 0.03977272727272727, + "Indonesian,Malay,Spanish,Chinese": 0.045454545454545456, + "Indonesian,Malay,Filipino,Chinese": 0.028409090909090908, + "Indonesian,English,Spanish,Filipino": 0.045454545454545456, + "Indonesian,English,Spanish,Chinese": 0.05113636363636364, + "Indonesian,English,Filipino,Chinese": 0.028409090909090908, + "Indonesian,Spanish,Filipino,Chinese": 0.03977272727272727, + "Malay,English,Spanish,Filipino": 0.045454545454545456, + "Malay,English,Spanish,Chinese": 0.056818181818181816, + "Malay,English,Filipino,Chinese": 0.056818181818181816, + "Malay,Spanish,Filipino,Chinese": 0.06818181818181818, + "English,Spanish,Filipino,Chinese": 0.056818181818181816 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.017045454545454544, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Malay,English,Spanish,Filipino": 0.005681818181818182, + "Vietnamese,Malay,English,Spanish,Chinese": 0.011363636363636364, + "Vietnamese,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.011363636363636364, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.011363636363636364, + "Indonesian,Malay,English,Spanish,Filipino": 0.011363636363636364, + "Indonesian,Malay,English,Spanish,Chinese": 0.022727272727272728, + "Indonesian,Malay,English,Filipino,Chinese": 0.011363636363636364, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.022727272727272728, + "Indonesian,English,Spanish,Filipino,Chinese": 0.022727272727272728, + "Malay,English,Spanish,Filipino,Chinese": 0.03977272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.011363636363636364, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.005681818181818182, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.011363636363636364 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.005681818181818182 + } + }, + "AC3_2": 0.26763212840636386, + "AC3_3": 0.13195656043040851, + "AC3_4": 0.05629704100745069, + "AC3_5": 0.025200128809122774, + "AC3_6": 0.014202024677957328, + "AC3_7": 0.011115056813902522 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2524271844660194 + }, + "prompt_2": { + "accuracy": 0.18446601941747573 + }, + "prompt_3": { + "accuracy": 0.24271844660194175 + }, + "prompt_4": { + "accuracy": 0.22330097087378642 + }, + "prompt_5": { + "accuracy": 0.33980582524271846 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.21904761904761905 + }, + "prompt_2": { + "accuracy": 0.18095238095238095 + }, + "prompt_3": { + "accuracy": 0.2 + }, + "prompt_4": { + "accuracy": 0.22857142857142856 + }, + "prompt_5": { + "accuracy": 0.24761904761904763 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.22429906542056074 + }, + "prompt_2": { + "accuracy": 0.27102803738317754 + }, + "prompt_3": { + "accuracy": 0.14953271028037382 + }, + "prompt_4": { + "accuracy": 0.14953271028037382 + }, + "prompt_5": { + "accuracy": 0.2336448598130841 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.5, + "politics": 0.8, + "culture": 0.3, + "film": 0.2, + "law": 0.3, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.3, + "history": 0.13333333333333333, + "literature": 0.3, + "politics": 0.6, + "culture": 0.3, + "film": 0.5, + "law": 0.3, + "geography": 0.4 + } + }, + "prompt_3": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.4, + "demographics": 0.6, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.7, + "culture": 0.1, + "film": 0.2, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_4": { + "accuracy": 0.34, + "category_acc": { + "brand": 0.3, + "demographics": 0.6, + "biology": 0.2, + "history": 0.2, + "literature": 0.5, + "politics": 0.8, + "culture": 0.1, + "film": 0.3, + "law": 0.3, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.33, + "category_acc": { + "brand": 0.0, + "demographics": 0.8, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.3, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.045582081500857526 + }, + "prompt_2": { + "bleu_score": 0.03992758448540713 + }, + "prompt_3": { + "bleu_score": 0.04189605777722635 + }, + "prompt_4": { + "bleu_score": 0.03961742495964162 + }, + "prompt_5": { + "bleu_score": 0.0331185986101603 + } }, "indommlu": { "prompt_1": -1, @@ -12228,244 +107599,1689 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.012295758955687148 + }, + "prompt_2": { + "bleu_score": 0.027254623395354057 + }, + "prompt_3": { + "bleu_score": 0.023408287560585238 + }, + "prompt_4": { + "bleu_score": 0.014959513603702445 + }, + "prompt_5": { + "bleu_score": 0.008048362232418494 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.02536214269191094 + }, + "prompt_2": { + "bleu_score": 0.01489915453686534 + }, + "prompt_3": { + "bleu_score": 0.026511302577627252 + }, + "prompt_4": { + "bleu_score": 0.0314255368257811 + }, + "prompt_5": { + "bleu_score": 0.009915369171731282 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.009232091353875814 + }, + "prompt_2": { + "bleu_score": 0.011782292357984833 + }, + "prompt_3": { + "bleu_score": 0.00861814381182431 + }, + "prompt_4": { + "bleu_score": 0.016777357644487248 + }, + "prompt_5": { + "bleu_score": 0.016872138788912104 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.01426890101006039 + }, + "prompt_2": { + "bleu_score": 0.02897043239551227 + }, + "prompt_3": { + "bleu_score": 0.02771466694077164 + }, + "prompt_4": { + "bleu_score": 0.020710729870485292 + }, + "prompt_5": { + "bleu_score": 0.0066293122653927755 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23337222870478413 + }, + "prompt_2": { + "accuracy": 0.23453908984830804 + }, + "prompt_3": { + "accuracy": 0.2660443407234539 + }, + "prompt_4": { + "accuracy": 0.2637106184364061 + }, + "prompt_5": { + "accuracy": 0.2707117852975496 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24233106900250267, + "category_acc": { + "high_school_european_history": 0.23170731707317074, + "business_ethics": 0.24242424242424243, + "clinical_knowledge": 0.26136363636363635, + "medical_genetics": 0.2727272727272727, + "high_school_us_history": 0.21674876847290642, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.22033898305084745, + "virology": 0.24848484848484848, + "high_school_microeconomics": 0.24050632911392406, + "econometrics": 0.19469026548672566, + "college_computer_science": 0.21212121212121213, + "high_school_biology": 0.28802588996763756, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.23843416370106763, + "philosophy": 0.23870967741935484, + "professional_medicine": 0.24354243542435425, + "nutrition": 0.24262295081967214, + "global_facts": 0.2222222222222222, + "machine_learning": 0.18018018018018017, + "security_studies": 0.26229508196721313, + "public_relations": 0.22018348623853212, + "professional_psychology": 0.24549918166939444, + "prehistory": 0.22910216718266255, + "anatomy": 0.208955223880597, + "human_sexuality": 0.27692307692307694, + "college_medicine": 0.23837209302325582, + "high_school_government_and_politics": 0.23958333333333334, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.2345679012345679, + "high_school_geography": 0.19796954314720813, + "elementary_mathematics": 0.23607427055702918, + "human_aging": 0.21171171171171171, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.21875, + "formal_logic": 0.272, + "high_school_statistics": 0.24651162790697675, + "international_law": 0.35833333333333334, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.31313131313131315, + "conceptual_physics": 0.24358974358974358, + "miscellaneous": 0.24680306905370844, + "high_school_chemistry": 0.23267326732673269, + "marketing": 0.2532188841201717, + "professional_law": 0.24722765818656228, + "management": 0.19607843137254902, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.2897196261682243, + "world_religions": 0.27058823529411763, + "sociology": 0.24, + "us_foreign_policy": 0.2727272727272727, + "high_school_macroeconomics": 0.2416452442159383, + "computer_security": 0.2828282828282828, + "moral_scenarios": 0.2225950782997763, + "moral_disputes": 0.25217391304347825, + "electrical_engineering": 0.25, + "astronomy": 0.2781456953642384, + "college_biology": 0.21678321678321677 + } + }, + "prompt_2": { + "accuracy": 0.2426170897390061, + "category_acc": { + "high_school_european_history": 0.31097560975609756, + "business_ethics": 0.26262626262626265, + "clinical_knowledge": 0.24621212121212122, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.270935960591133, + "high_school_physics": 0.20666666666666667, + "high_school_world_history": 0.2542372881355932, + "virology": 0.296969696969697, + "high_school_microeconomics": 0.22362869198312235, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.2222222222222222, + "high_school_biology": 0.22330097087378642, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.21352313167259787, + "philosophy": 0.22903225806451613, + "professional_medicine": 0.25830258302583026, + "nutrition": 0.3114754098360656, + "global_facts": 0.24242424242424243, + "machine_learning": 0.26126126126126126, + "security_studies": 0.1680327868852459, + "public_relations": 0.21100917431192662, + "professional_psychology": 0.2635024549918167, + "prehistory": 0.23219814241486067, + "anatomy": 0.17164179104477612, + "human_sexuality": 0.2230769230769231, + "college_medicine": 0.2441860465116279, + "high_school_government_and_politics": 0.24479166666666666, + "college_chemistry": 0.21212121212121213, + "logical_fallacies": 0.2962962962962963, + "high_school_geography": 0.20812182741116753, + "elementary_mathematics": 0.22811671087533156, + "human_aging": 0.23423423423423423, + "college_mathematics": 0.20202020202020202, + "high_school_psychology": 0.20772058823529413, + "formal_logic": 0.312, + "high_school_statistics": 0.17209302325581396, + "international_law": 0.36666666666666664, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.24242424242424243, + "conceptual_physics": 0.24786324786324787, + "miscellaneous": 0.23657289002557544, + "high_school_chemistry": 0.19801980198019803, + "marketing": 0.2532188841201717, + "professional_law": 0.25440313111545987, + "management": 0.23529411764705882, + "college_physics": 0.15841584158415842, + "jurisprudence": 0.29906542056074764, + "world_religions": 0.3058823529411765, + "sociology": 0.29, + "us_foreign_policy": 0.2222222222222222, + "high_school_macroeconomics": 0.1928020565552699, + "computer_security": 0.26262626262626265, + "moral_scenarios": 0.22818791946308725, + "moral_disputes": 0.2608695652173913, + "electrical_engineering": 0.25, + "astronomy": 0.2119205298013245, + "college_biology": 0.25874125874125875 + } + }, + "prompt_3": { + "accuracy": 0.24562030747229174, + "category_acc": { + "high_school_european_history": 0.23780487804878048, + "business_ethics": 0.2828282828282828, + "clinical_knowledge": 0.22348484848484848, + "medical_genetics": 0.3333333333333333, + "high_school_us_history": 0.2413793103448276, + "high_school_physics": 0.22, + "high_school_world_history": 0.288135593220339, + "virology": 0.3090909090909091, + "high_school_microeconomics": 0.19831223628691982, + "econometrics": 0.19469026548672566, + "college_computer_science": 0.21212121212121213, + "high_school_biology": 0.2168284789644013, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.22419928825622776, + "philosophy": 0.22258064516129034, + "professional_medicine": 0.23247232472324722, + "nutrition": 0.33114754098360655, + "global_facts": 0.2222222222222222, + "machine_learning": 0.3153153153153153, + "security_studies": 0.18032786885245902, + "public_relations": 0.1743119266055046, + "professional_psychology": 0.281505728314239, + "prehistory": 0.23219814241486067, + "anatomy": 0.1865671641791045, + "human_sexuality": 0.23076923076923078, + "college_medicine": 0.23255813953488372, + "high_school_government_and_politics": 0.22395833333333334, + "college_chemistry": 0.21212121212121213, + "logical_fallacies": 0.29012345679012347, + "high_school_geography": 0.2233502538071066, + "elementary_mathematics": 0.23872679045092837, + "human_aging": 0.23873873873873874, + "college_mathematics": 0.1919191919191919, + "high_school_psychology": 0.20955882352941177, + "formal_logic": 0.304, + "high_school_statistics": 0.20930232558139536, + "international_law": 0.3, + "high_school_mathematics": 0.29739776951672864, + "high_school_computer_science": 0.30303030303030304, + "conceptual_physics": 0.24358974358974358, + "miscellaneous": 0.2557544757033248, + "high_school_chemistry": 0.21782178217821782, + "marketing": 0.2446351931330472, + "professional_law": 0.2576647097195042, + "management": 0.20588235294117646, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.27102803738317754, + "world_religions": 0.2823529411764706, + "sociology": 0.295, + "us_foreign_policy": 0.23232323232323232, + "high_school_macroeconomics": 0.2133676092544987, + "computer_security": 0.25252525252525254, + "moral_scenarios": 0.25838926174496646, + "moral_disputes": 0.24057971014492754, + "electrical_engineering": 0.24305555555555555, + "astronomy": 0.2119205298013245, + "college_biology": 0.23076923076923078 + } + }, + "prompt_4": { + "accuracy": 0.25434393993564536, + "category_acc": { + "high_school_european_history": 0.21341463414634146, + "business_ethics": 0.3333333333333333, + "clinical_knowledge": 0.24621212121212122, + "medical_genetics": 0.2727272727272727, + "high_school_us_history": 0.2019704433497537, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.25, + "virology": 0.2545454545454545, + "high_school_microeconomics": 0.23628691983122363, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.2727272727272727, + "high_school_biology": 0.27184466019417475, + "abstract_algebra": 0.3434343434343434, + "professional_accounting": 0.23843416370106763, + "philosophy": 0.2032258064516129, + "professional_medicine": 0.24354243542435425, + "nutrition": 0.2721311475409836, + "global_facts": 0.24242424242424243, + "machine_learning": 0.2882882882882883, + "security_studies": 0.25, + "public_relations": 0.30275229357798167, + "professional_psychology": 0.2635024549918167, + "prehistory": 0.24148606811145512, + "anatomy": 0.23880597014925373, + "human_sexuality": 0.26153846153846155, + "college_medicine": 0.18604651162790697, + "high_school_government_and_politics": 0.2604166666666667, + "college_chemistry": 0.2828282828282828, + "logical_fallacies": 0.24074074074074073, + "high_school_geography": 0.2233502538071066, + "elementary_mathematics": 0.23342175066312998, + "human_aging": 0.3108108108108108, + "college_mathematics": 0.1717171717171717, + "high_school_psychology": 0.23897058823529413, + "formal_logic": 0.224, + "high_school_statistics": 0.3116279069767442, + "international_law": 0.25, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.26262626262626265, + "conceptual_physics": 0.25213675213675213, + "miscellaneous": 0.2544757033248082, + "high_school_chemistry": 0.2376237623762376, + "marketing": 0.22746781115879827, + "professional_law": 0.2465753424657534, + "management": 0.21568627450980393, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.29906542056074764, + "world_religions": 0.28823529411764703, + "sociology": 0.27, + "us_foreign_policy": 0.36363636363636365, + "high_school_macroeconomics": 0.2262210796915167, + "computer_security": 0.25252525252525254, + "moral_scenarios": 0.26286353467561524, + "moral_disputes": 0.26666666666666666, + "electrical_engineering": 0.2986111111111111, + "astronomy": 0.33112582781456956, + "college_biology": 0.25874125874125875 + } + }, + "prompt_5": { + "accuracy": 0.25126921701823385, + "category_acc": { + "high_school_european_history": 0.2682926829268293, + "business_ethics": 0.3333333333333333, + "clinical_knowledge": 0.25757575757575757, + "medical_genetics": 0.32323232323232326, + "high_school_us_history": 0.2512315270935961, + "high_school_physics": 0.36666666666666664, + "high_school_world_history": 0.2796610169491525, + "virology": 0.23030303030303031, + "high_school_microeconomics": 0.25316455696202533, + "econometrics": 0.24778761061946902, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.2621359223300971, + "abstract_algebra": 0.23232323232323232, + "professional_accounting": 0.2206405693950178, + "philosophy": 0.24516129032258063, + "professional_medicine": 0.22140221402214022, + "nutrition": 0.22295081967213115, + "global_facts": 0.20202020202020202, + "machine_learning": 0.18018018018018017, + "security_studies": 0.23770491803278687, + "public_relations": 0.3119266055045872, + "professional_psychology": 0.2618657937806874, + "prehistory": 0.2476780185758514, + "anatomy": 0.3208955223880597, + "human_sexuality": 0.2153846153846154, + "college_medicine": 0.2441860465116279, + "high_school_government_and_politics": 0.265625, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.16666666666666666, + "high_school_geography": 0.20304568527918782, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.24774774774774774, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.24448529411764705, + "formal_logic": 0.248, + "high_school_statistics": 0.27906976744186046, + "international_law": 0.25, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.24242424242424243, + "conceptual_physics": 0.23076923076923078, + "miscellaneous": 0.26342710997442453, + "high_school_chemistry": 0.26732673267326734, + "marketing": 0.19742489270386265, + "professional_law": 0.2609262883235486, + "management": 0.27450980392156865, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.2336448598130841, + "world_religions": 0.3176470588235294, + "sociology": 0.245, + "us_foreign_policy": 0.24242424242424243, + "high_school_macroeconomics": 0.2442159383033419, + "computer_security": 0.16161616161616163, + "moral_scenarios": 0.21923937360178972, + "moral_disputes": 0.263768115942029, + "electrical_engineering": 0.24305555555555555, + "astronomy": 0.23841059602649006, + "college_biology": 0.23076923076923078 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2540861812778603 + }, + "prompt_2": { + "accuracy": 0.25037147102526003 + }, + "prompt_3": { + "accuracy": 0.2563150074294205 + }, + "prompt_4": { + "accuracy": 0.25928677563150077 + }, + "prompt_5": { + "accuracy": 0.25185735512630014 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24968866749688667, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.19047619047619047, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.25, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.23076923076923078, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.25, + "college_economics": 0.3, + "business_administration": 0.23684210526315788, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.20588235294117646, + "teacher_qualification": 0.16326530612244897, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.1111111111111111, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.125, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.21153846153846154, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.3148148148148148, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.24074074074074073, + "physician": 0.24074074074074073 + } + }, + "prompt_2": { + "accuracy": 0.2590286425902864, + "category_acc": { + "computer_network": 0.125, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.2619047619047619, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.14285714285714285, + "college_economics": 0.2833333333333333, + "business_administration": 0.15789473684210525, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.22448979591836735, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.375, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.1111111111111111, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.25, + "high_school_history": 0.4, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.11538461538461539, + "sports_science": 0.2916666666666667, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.2777777777777778, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.2222222222222222, + "physician": 0.25925925925925924 + } + }, + "prompt_3": { + "accuracy": 0.22291407222914073, + "category_acc": { + "computer_network": 0.08333333333333333, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.2619047619047619, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.041666666666666664, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.21666666666666667, + "business_administration": 0.07894736842105263, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.14705882352941177, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.14285714285714285, + "ideological_and_moral_cultivation": 0.3333333333333333, + "logic": 0.2962962962962963, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.23684210526315788, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.32, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.11538461538461539, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.17647058823529413, + "accountant": 0.3148148148148148, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2222222222222222, + "physician": 0.3148148148148148 + } + }, + "prompt_4": { + "accuracy": 0.23848069738480698, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.16666666666666666, + "computer_architecture": 0.15384615384615385, + "college_programming": 0.21428571428571427, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.125, + "high_school_chemistry": 0.25, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.31666666666666665, + "business_administration": 0.21052631578947367, + "marxism": 0.25, + "mao_zedong_thought": 0.1724137931034483, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.041666666666666664, + "high_school_geography": 0.25, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.08333333333333333, + "logic": 0.25925925925925924, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.25, + "high_school_chinese": 0.125, + "high_school_history": 0.28, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.21153846153846154, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.375, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.29411764705882354, + "accountant": 0.24074074074074073, + "fire_engineer": 0.1111111111111111, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.14814814814814814, + "physician": 0.25925925925925924 + } + }, + "prompt_5": { + "accuracy": 0.24782067247820672, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.25, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.13793103448275862, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.25, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.16666666666666666, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.17857142857142858, + "college_economics": 0.25, + "business_administration": 0.13157894736842105, + "marxism": 0.375, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.1836734693877551, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.375, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.1111111111111111, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.4, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.19230769230769232, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2222222222222222, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.25925925925925924, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2222222222222222, + "physician": 0.2777777777777778 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3154121863799283 + }, + "prompt_2": { + "accuracy": 0.3010752688172043 + }, + "prompt_3": { + "accuracy": 0.2867383512544803 + }, + "prompt_4": { + "accuracy": 0.21863799283154123 + }, + "prompt_5": { + "accuracy": 0.3118279569892473 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2628216197547919, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2804878048780488, + "arts": 0.23125, + "astronomy": 0.23636363636363636, + "business_ethics": 0.2679425837320574, + "chinese_civil_service_exam": 0.25625, + "chinese_driving_rule": 0.26717557251908397, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.23219814241486067, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.2869198312236287, + "college_actuarial_science": 0.3018867924528302, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.2037037037037037, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.27472527472527475, + "computer_science": 0.28921568627450983, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.2517985611510791, + "economics": 0.3018867924528302, + "education": 0.2822085889570552, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.23529411764705882, + "elementary_mathematics": 0.2608695652173913, + "ethnology": 0.26666666666666666, + "food_science": 0.36363636363636365, + "genetics": 0.23295454545454544, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.24603174603174602, + "international_law": 0.2648648648648649, + "journalism": 0.2616279069767442, + "jurisprudence": 0.24817518248175183, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.2032520325203252, + "machine_learning": 0.1885245901639344, + "management": 0.2523809523809524, + "marketing": 0.26666666666666666, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.2827586206896552, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.1943127962085308, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.28160919540229884, + "security_study": 0.2962962962962963, + "sociology": 0.2831858407079646, + "sports_science": 0.2909090909090909, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.3136094674556213, + "world_history": 0.2484472049689441, + "world_religions": 0.21875 + } + }, + "prompt_2": { + "accuracy": 0.2629079606285616, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.3048780487804878, + "arts": 0.25, + "astronomy": 0.21818181818181817, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.21875, + "chinese_driving_rule": 0.2748091603053435, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.24528301886792453, + "college_law": 0.16666666666666666, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.22641509433962265, + "college_medicine": 0.28205128205128205, + "computer_science": 0.28921568627450983, + "computer_security": 0.29239766081871343, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.2805755395683453, + "economics": 0.2641509433962264, + "education": 0.25766871165644173, + "electrical_engineering": 0.3023255813953488, + "elementary_chinese": 0.23809523809523808, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.21008403361344538, + "elementary_mathematics": 0.2217391304347826, + "ethnology": 0.2518518518518518, + "food_science": 0.3706293706293706, + "genetics": 0.23863636363636365, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.21301775147928995, + "high_school_chemistry": 0.25757575757575757, + "high_school_geography": 0.2796610169491525, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.25396825396825395, + "international_law": 0.2594594594594595, + "journalism": 0.27325581395348836, + "jurisprudence": 0.23357664233576642, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.2601626016260163, + "machine_learning": 0.19672131147540983, + "management": 0.23809523809523808, + "marketing": 0.25555555555555554, + "marxist_theory": 0.2962962962962963, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.2620689655172414, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.30857142857142855, + "professional_law": 0.1943127962085308, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.25862068965517243, + "public_relations": 0.28735632183908044, + "security_study": 0.31851851851851853, + "sociology": 0.3008849557522124, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.23783783783783785, + "virology": 0.3254437869822485, + "world_history": 0.2422360248447205, + "world_religions": 0.29375 + } + }, + "prompt_3": { + "accuracy": 0.2515973061647384, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.13414634146341464, + "arts": 0.18125, + "astronomy": 0.24242424242424243, + "business_ethics": 0.2535885167464115, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.24427480916030533, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.2476780185758514, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.2803738317757009, + "college_engineering_hydrology": 0.2169811320754717, + "college_law": 0.2222222222222222, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.24528301886792453, + "college_medicine": 0.2564102564102564, + "computer_science": 0.22058823529411764, + "computer_security": 0.21052631578947367, + "conceptual_physics": 0.23809523809523808, + "construction_project_management": 0.20863309352517986, + "economics": 0.27044025157232704, + "education": 0.2822085889570552, + "electrical_engineering": 0.21511627906976744, + "elementary_chinese": 0.23412698412698413, + "elementary_commonsense": 0.24242424242424243, + "elementary_information_and_technology": 0.2773109243697479, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.24444444444444444, + "food_science": 0.3006993006993007, + "genetics": 0.2727272727272727, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.25, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.2804878048780488, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.2619047619047619, + "international_law": 0.25405405405405407, + "journalism": 0.2558139534883721, + "jurisprudence": 0.22871046228710462, + "legal_and_moral_basis": 0.26635514018691586, + "logical": 0.1951219512195122, + "machine_learning": 0.23770491803278687, + "management": 0.24285714285714285, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.27586206896551724, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.3314285714285714, + "professional_law": 0.20853080568720378, + "professional_medicine": 0.27393617021276595, + "professional_psychology": 0.21551724137931033, + "public_relations": 0.25287356321839083, + "security_study": 0.2518518518518518, + "sociology": 0.2610619469026549, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.24324324324324326, + "virology": 0.28994082840236685, + "world_history": 0.2670807453416149, + "world_religions": 0.275 + } + }, + "prompt_4": { + "accuracy": 0.25116560179589015, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.25609756097560976, + "arts": 0.25, + "astronomy": 0.23636363636363636, + "business_ethics": 0.23444976076555024, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.22900763358778625, + "chinese_food_culture": 0.25735294117647056, + "chinese_foreign_policy": 0.2803738317757009, + "chinese_history": 0.25696594427244585, + "chinese_literature": 0.23529411764705882, + "chinese_teacher_qualification": 0.2849162011173184, + "clinical_knowledge": 0.19831223628691982, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.21495327102803738, + "college_engineering_hydrology": 0.25471698113207547, + "college_law": 0.24074074074074073, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.1792452830188679, + "college_medicine": 0.2271062271062271, + "computer_science": 0.25980392156862747, + "computer_security": 0.2807017543859649, + "conceptual_physics": 0.25170068027210885, + "construction_project_management": 0.2589928057553957, + "economics": 0.20754716981132076, + "education": 0.2147239263803681, + "electrical_engineering": 0.29651162790697677, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.28991596638655465, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.2814814814814815, + "food_science": 0.2517482517482518, + "genetics": 0.24431818181818182, + "global_facts": 0.24161073825503357, + "high_school_biology": 0.23076923076923078, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.1984126984126984, + "international_law": 0.2648648648648649, + "journalism": 0.2441860465116279, + "jurisprudence": 0.25060827250608275, + "legal_and_moral_basis": 0.27102803738317754, + "logical": 0.1951219512195122, + "machine_learning": 0.319672131147541, + "management": 0.23333333333333334, + "marketing": 0.2777777777777778, + "marxist_theory": 0.2751322751322751, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.1724137931034483, + "philosophy": 0.18095238095238095, + "professional_accounting": 0.21142857142857144, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.26861702127659576, + "professional_psychology": 0.2801724137931034, + "public_relations": 0.27011494252873564, + "security_study": 0.2, + "sociology": 0.26548672566371684, + "sports_science": 0.22424242424242424, + "traditional_chinese_medicine": 0.2864864864864865, + "virology": 0.2958579881656805, + "world_history": 0.2546583850931677, + "world_religions": 0.2875 + } + }, + "prompt_5": { + "accuracy": 0.26264893800725264, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.25, + "arts": 0.20625, + "astronomy": 0.24242424242424243, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.275, + "chinese_driving_rule": 0.2748091603053435, + "chinese_food_culture": 0.2647058823529412, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.24148606811145512, + "chinese_literature": 0.24509803921568626, + "chinese_teacher_qualification": 0.3128491620111732, + "clinical_knowledge": 0.28270042194092826, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.32710280373831774, + "college_engineering_hydrology": 0.22641509433962265, + "college_law": 0.16666666666666666, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.33962264150943394, + "college_medicine": 0.30036630036630035, + "computer_science": 0.24509803921568626, + "computer_security": 0.28654970760233917, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.23741007194244604, + "economics": 0.31446540880503143, + "education": 0.27607361963190186, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.24206349206349206, + "elementary_commonsense": 0.23737373737373738, + "elementary_information_and_technology": 0.24789915966386555, + "elementary_mathematics": 0.21739130434782608, + "ethnology": 0.24444444444444444, + "food_science": 0.35664335664335667, + "genetics": 0.24431818181818182, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.23076923076923078, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2288135593220339, + "high_school_mathematics": 0.25, + "high_school_physics": 0.24545454545454545, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.2222222222222222, + "international_law": 0.24324324324324326, + "journalism": 0.3023255813953488, + "jurisprudence": 0.24574209245742093, + "legal_and_moral_basis": 0.29906542056074764, + "logical": 0.2032520325203252, + "machine_learning": 0.20491803278688525, + "management": 0.2523809523809524, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.2620689655172414, + "philosophy": 0.26666666666666666, + "professional_accounting": 0.26857142857142857, + "professional_law": 0.1943127962085308, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.24568965517241378, + "public_relations": 0.28160919540229884, + "security_study": 0.3037037037037037, + "sociology": 0.30973451327433627, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.3076923076923077, + "world_history": 0.2670807453416149, + "world_religions": 0.25 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.18181818181818182 + }, + "prompt_2": { + "accuracy": 0.12121212121212122 + }, + "prompt_3": { + "accuracy": 0.24242424242424243 + }, + "prompt_4": { + "accuracy": 0.21212121212121213 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.12954545454545455 + }, + "prompt_2": { + "accuracy": 0.125 + }, + "prompt_3": { + "accuracy": 0.1159090909090909 + }, + "prompt_4": { + "accuracy": 0.16590909090909092 + }, + "prompt_5": { + "accuracy": 0.1340909090909091 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3142372881355932 + }, + "prompt_2": { + "accuracy": 0.3030508474576271 + }, + "prompt_3": { + "accuracy": 0.3403389830508475 + }, + "prompt_4": { + "accuracy": 0.31559322033898307 + }, + "prompt_5": { + "accuracy": 0.3298305084745763 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.27860882572924456 + }, + "prompt_2": { + "accuracy": 0.2857142857142857 + }, + "prompt_3": { + "accuracy": 0.28534031413612565 + }, + "prompt_4": { + "accuracy": 0.2924457741211668 + }, + "prompt_5": { + "accuracy": 0.2905759162303665 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3248407643312102 + }, + "prompt_2": { + "accuracy": 0.3214110730034297 + }, + "prompt_3": { + "accuracy": 0.3248407643312102 + }, + "prompt_4": { + "accuracy": 0.32288094071533563 + }, + "prompt_5": { + "accuracy": 0.3410093091621754 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.10927578644609584, + "rouge2": 0.03267428844189666, + "rougeL": 0.08261344758471074, + "avg_rouge": 0.07485450749090107 + }, + "prompt_2": { + "rouge1": 0.19821357380394702, + "rouge2": 0.05949694497264629, + "rougeL": 0.14784694335746582, + "avg_rouge": 0.13518582071135307 + }, + "prompt_3": { + "rouge1": 0.09414896883478109, + "rouge2": 0.027370283089220106, + "rougeL": 0.0702945967469367, + "avg_rouge": 0.0639379495569793 + }, + "prompt_4": { + "rouge1": 0.014248922879908522, + "rouge2": 0.004032354888082322, + "rougeL": 0.010532953412338281, + "avg_rouge": 0.009604743726776374 + }, + "prompt_5": { + "rouge1": 0.18536107512245148, + "rouge2": 0.05349312454503364, + "rougeL": 0.13827660510865072, + "avg_rouge": 0.12571026825871193 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2104900397138977, + "rouge2": 0.05827456634560005, + "rougeL": 0.15515524029138283, + "avg_rouge": 0.14130661545029352 + }, + "prompt_2": { + "rouge1": 0.17074081333421356, + "rouge2": 0.04444955559742033, + "rougeL": 0.12665748647069391, + "avg_rouge": 0.11394928513410928 + }, + "prompt_3": { + "rouge1": 0.18877164525277712, + "rouge2": 0.04946218720642109, + "rougeL": 0.1366722666675921, + "avg_rouge": 0.1249686997089301 + }, + "prompt_4": { + "rouge1": 0.18614619283187378, + "rouge2": 0.05124393688371859, + "rougeL": 0.13855193344689934, + "avg_rouge": 0.1253140210541639 + }, + "prompt_5": { + "rouge1": 0.19005379563336916, + "rouge2": 0.05137801790434311, + "rougeL": 0.14095274778551378, + "avg_rouge": 0.12746152044107537 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.45871559633027525 + }, + "prompt_2": { + "accuracy": 0.49311926605504586 + }, + "prompt_3": { + "accuracy": 0.4873853211009174 + }, + "prompt_4": { + "accuracy": 0.49770642201834864 + }, + "prompt_5": { + "accuracy": 0.5045871559633027 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.46596356663470756 + }, + "prompt_2": { + "accuracy": 0.49089165867689355 + }, + "prompt_3": { + "accuracy": 0.4899328859060403 + }, + "prompt_4": { + "accuracy": 0.5215723873441994 + }, + "prompt_5": { + "accuracy": 0.3566634707574305 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.538 + }, + "prompt_2": { + "accuracy": 0.505 + }, + "prompt_3": { + "accuracy": 0.598 + }, + "prompt_4": { + "accuracy": 0.489 + }, + "prompt_5": { + "accuracy": 0.5 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.316 + }, + "prompt_2": { + "accuracy": 0.346 + }, + "prompt_3": { + "accuracy": 0.344 + }, + "prompt_4": { + "accuracy": 0.33 + }, + "prompt_5": { + "accuracy": 0.342 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5005 + }, + "prompt_2": { + "accuracy": 0.488 + }, + "prompt_3": { + "accuracy": 0.486 + }, + "prompt_4": { + "accuracy": 0.494 + }, + "prompt_5": { + "accuracy": 0.499 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5070422535211268 + }, + "prompt_2": { + "accuracy": 0.5492957746478874 + }, + "prompt_3": { + "accuracy": 0.5633802816901409 + }, + "prompt_4": { + "accuracy": 0.5352112676056338 + }, + "prompt_5": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49097472924187724 + }, + "prompt_2": { + "accuracy": 0.48014440433212996 + }, + "prompt_3": { + "accuracy": 0.47653429602888087 + }, + "prompt_4": { + "accuracy": 0.5018050541516246 + }, + "prompt_5": { + "accuracy": 0.49097472924187724 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49264705882352944 + }, + "prompt_2": { + "accuracy": 0.3799019607843137 + }, + "prompt_3": { + "accuracy": 0.3431372549019608 + }, + "prompt_4": { + "accuracy": 0.4877450980392157 + }, + "prompt_5": { + "accuracy": 0.5049019607843137 + } } }, "five_shot": { "cross_mmlu": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.2533333333333333, + "language_acc": { + "English": 0.24666666666666667, + "Vietnamese": 0.25333333333333335, + "Malay": 0.28, + "Indonesian": 0.26, + "Spanish": 0.24, + "Chinese": 0.22666666666666666, + "Filipino": 0.26666666666666666 + }, + "consistency_score_2": 0.6292063492063492, + "consistency_score_3": 0.4807619047619048, + "consistency_score_4": 0.3803809523809523, + "consistency_score_5": 0.30571428571428577, + "consistency_score_6": 0.2495238095238095, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5933333333333334, + "English,Malay": 0.5266666666666666, + "English,Indonesian": 0.6333333333333333, + "English,Spanish": 0.6133333333333333, + "English,Chinese": 0.64, + "English,Filipino": 0.6533333333333333, + "Vietnamese,Malay": 0.5733333333333334, + "Vietnamese,Indonesian": 0.66, + "Vietnamese,Spanish": 0.6733333333333333, + "Vietnamese,Chinese": 0.64, + "Vietnamese,Filipino": 0.66, + "Malay,Indonesian": 0.5866666666666667, + "Malay,Spanish": 0.62, + "Malay,Chinese": 0.5466666666666666, + "Malay,Filipino": 0.6066666666666667, + "Indonesian,Spanish": 0.62, + "Indonesian,Chinese": 0.68, + "Indonesian,Filipino": 0.6733333333333333, + "Spanish,Chinese": 0.6466666666666666, + "Spanish,Filipino": 0.6733333333333333, + "Chinese,Filipino": 0.6933333333333334 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.3933333333333333, + "English,Vietnamese,Indonesian": 0.48, + "English,Vietnamese,Spanish": 0.48, + "English,Vietnamese,Chinese": 0.49333333333333335, + "English,Vietnamese,Filipino": 0.4866666666666667, + "English,Malay,Indonesian": 0.42, + "English,Malay,Spanish": 0.44, + "English,Malay,Chinese": 0.4066666666666667, + "English,Malay,Filipino": 0.44666666666666666, + "English,Indonesian,Spanish": 0.4666666666666667, + "English,Indonesian,Chinese": 0.5133333333333333, + "English,Indonesian,Filipino": 0.5066666666666667, + "English,Spanish,Chinese": 0.47333333333333333, + "English,Spanish,Filipino": 0.5066666666666667, + "English,Chinese,Filipino": 0.52, + "Vietnamese,Malay,Indonesian": 0.44666666666666666, + "Vietnamese,Malay,Spanish": 0.48, + "Vietnamese,Malay,Chinese": 0.41333333333333333, + "Vietnamese,Malay,Filipino": 0.44666666666666666, + "Vietnamese,Indonesian,Spanish": 0.5066666666666667, + "Vietnamese,Indonesian,Chinese": 0.52, + "Vietnamese,Indonesian,Filipino": 0.52, + "Vietnamese,Spanish,Chinese": 0.5133333333333333, + "Vietnamese,Spanish,Filipino": 0.5466666666666666, + "Vietnamese,Chinese,Filipino": 0.5266666666666666, + "Malay,Indonesian,Spanish": 0.4666666666666667, + "Malay,Indonesian,Chinese": 0.44, + "Malay,Indonesian,Filipino": 0.47333333333333333, + "Malay,Spanish,Chinese": 0.44666666666666666, + "Malay,Spanish,Filipino": 0.49333333333333335, + "Malay,Chinese,Filipino": 0.4666666666666667, + "Indonesian,Spanish,Chinese": 0.4866666666666667, + "Indonesian,Spanish,Filipino": 0.5133333333333333, + "Indonesian,Chinese,Filipino": 0.5466666666666666, + "Spanish,Chinese,Filipino": 0.54 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.32666666666666666, + "English,Vietnamese,Malay,Spanish": 0.3466666666666667, + "English,Vietnamese,Malay,Chinese": 0.31333333333333335, + "English,Vietnamese,Malay,Filipino": 0.3466666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.38, + "English,Vietnamese,Indonesian,Chinese": 0.4066666666666667, + "English,Vietnamese,Indonesian,Filipino": 0.4, + "English,Vietnamese,Spanish,Chinese": 0.3933333333333333, + "English,Vietnamese,Spanish,Filipino": 0.41333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.4066666666666667, + "English,Malay,Indonesian,Spanish": 0.36, + "English,Malay,Indonesian,Chinese": 0.3333333333333333, + "English,Malay,Indonesian,Filipino": 0.36666666666666664, + "English,Malay,Spanish,Chinese": 0.34, + "English,Malay,Spanish,Filipino": 0.38666666666666666, + "English,Malay,Chinese,Filipino": 0.36666666666666664, + "English,Indonesian,Spanish,Chinese": 0.37333333333333335, + "English,Indonesian,Spanish,Filipino": 0.3933333333333333, + "English,Indonesian,Chinese,Filipino": 0.42, + "English,Spanish,Chinese,Filipino": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.38, + "Vietnamese,Malay,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Spanish,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Spanish,Filipino": 0.4, + "Vietnamese,Malay,Chinese,Filipino": 0.36, + "Vietnamese,Indonesian,Spanish,Chinese": 0.4, + "Vietnamese,Indonesian,Spanish,Filipino": 0.42, + "Vietnamese,Indonesian,Chinese,Filipino": 0.4266666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.44, + "Malay,Indonesian,Spanish,Chinese": 0.35333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.3933333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.38, + "Malay,Spanish,Chinese,Filipino": 0.38666666666666666, + "Indonesian,Spanish,Chinese,Filipino": 0.41333333333333333 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.26666666666666666, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.2733333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.31333333333333335, + "English,Vietnamese,Malay,Chinese,Filipino": 0.2866666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.31333333333333335, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.32666666666666666, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.3333333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.34, + "English,Malay,Indonesian,Spanish,Chinese": 0.2733333333333333, + "English,Malay,Indonesian,Spanish,Filipino": 0.31333333333333335, + "English,Malay,Indonesian,Chinese,Filipino": 0.29333333333333333, + "English,Malay,Spanish,Chinese,Filipino": 0.30666666666666664, + "English,Indonesian,Spanish,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.34, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.22666666666666666, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.26, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.24, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.25333333333333335, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.26666666666666666, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.24, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.26 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.20666666666666667 + } + }, + "AC3_2": 0.36122781770487283, + "AC3_3": 0.33181802451802334, + "AC3_4": 0.3041218314318212, + "AC3_5": 0.27706984662846285, + "AC3_6": 0.25141414136414425, + "AC3_7": 0.22763285019206048 + } }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.14563106796116504 + } }, "cn_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.22857142857142856 + } }, "us_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.21495327102803738 + } }, "ph_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.5, + "culture": 0.2, + "film": 0.4, + "law": 0.1, + "geography": 0.3 + } + } }, "sing2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.01012215205693314 + } }, "indommlu": { "prompt_1": -1 }, "flores_ind2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.019360186688278762 + } }, "flores_vie2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.02686115849874378 + } }, "flores_zho2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.012846813544731085 + } }, "flores_zsm2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.021966847435259312 + } }, "mmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2660443407234539 + } }, "mmlu_full": { "prompt_1": -1 }, "c_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.24442793462109955 + } }, "c_eval_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.22851805728518057, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.23809523809523808, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.06896551724137931, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.42857142857142855, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.25, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.08, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.25, + "business_administration": 0.23684210526315788, + "marxism": 0.25, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.17647058823529413, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.25, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.14814814814814814, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.17857142857142858, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.28, + "middle_school_history": 0.18518518518518517, + "civil_servant": 0.23076923076923078, + "sports_science": 0.25, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.08333333333333333, + "clinical_medicine": 0.18518518518518517, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.18518518518518517, + "physician": 0.2777777777777778 + } + } }, "cmmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2867383512544803 + } }, "cmmlu_full": { "prompt_1": -1 }, "zbench": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.12121212121212122 + } }, "ind_emotion": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.15 + } }, "ocnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3311864406779661 + } }, "c3": { "prompt_1": -1 @@ -12480,28 +109296,44 @@ "prompt_1": -1 }, "sst2": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5057339449541285 + } }, "cola": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3969319271332694 + } }, "qqp": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.545 + } }, "mnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3405 + } }, "qnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4935 + } }, "wnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.49458483754512633 + } }, "mrpc": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4166666666666667 + } } } }, @@ -12510,53 +109342,1733 @@ "model_link": "https://huggingface.co/microsoft/phi-2", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3009523809523809, + "language_acc": { + "English": 0.36, + "Vietnamese": 0.29333333333333333, + "Malay": 0.25333333333333335, + "Indonesian": 0.26, + "Spanish": 0.31333333333333335, + "Chinese": 0.32, + "Filipino": 0.30666666666666664 + }, + "consistency_score_2": 0.5882539682539683, + "consistency_score_3": 0.42228571428571426, + "consistency_score_4": 0.31942857142857145, + "consistency_score_5": 0.24698412698412694, + "consistency_score_6": 0.19333333333333336, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.5266666666666666, + "English,Malay": 0.6733333333333333, + "English,Indonesian": 0.6666666666666666, + "English,Spanish": 0.6, + "English,Chinese": 0.43333333333333335, + "English,Filipino": 0.6, + "Vietnamese,Malay": 0.58, + "Vietnamese,Indonesian": 0.6066666666666667, + "Vietnamese,Spanish": 0.54, + "Vietnamese,Chinese": 0.44666666666666666, + "Vietnamese,Filipino": 0.5733333333333334, + "Malay,Indonesian": 0.8733333333333333, + "Malay,Spanish": 0.7133333333333334, + "Malay,Chinese": 0.41333333333333333, + "Malay,Filipino": 0.7333333333333333, + "Indonesian,Spanish": 0.7333333333333333, + "Indonesian,Chinese": 0.44666666666666666, + "Indonesian,Filipino": 0.7266666666666667, + "Spanish,Chinese": 0.46, + "Spanish,Filipino": 0.6066666666666667, + "Chinese,Filipino": 0.4 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.43333333333333335, + "English,Vietnamese,Indonesian": 0.44, + "English,Vietnamese,Spanish": 0.38, + "English,Vietnamese,Chinese": 0.26666666666666666, + "English,Vietnamese,Filipino": 0.4, + "English,Malay,Indonesian": 0.62, + "English,Malay,Spanish": 0.5266666666666666, + "English,Malay,Chinese": 0.30666666666666664, + "English,Malay,Filipino": 0.5333333333333333, + "English,Indonesian,Spanish": 0.5333333333333333, + "English,Indonesian,Chinese": 0.32666666666666666, + "English,Indonesian,Filipino": 0.52, + "English,Spanish,Chinese": 0.32, + "English,Spanish,Filipino": 0.44666666666666666, + "English,Chinese,Filipino": 0.28, + "Vietnamese,Malay,Indonesian": 0.54, + "Vietnamese,Malay,Spanish": 0.44666666666666666, + "Vietnamese,Malay,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Filipino": 0.46, + "Vietnamese,Indonesian,Spanish": 0.47333333333333333, + "Vietnamese,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Indonesian,Filipino": 0.47333333333333333, + "Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Filipino": 0.4, + "Vietnamese,Chinese,Filipino": 0.2733333333333333, + "Malay,Indonesian,Spanish": 0.6733333333333333, + "Malay,Indonesian,Chinese": 0.38666666666666666, + "Malay,Indonesian,Filipino": 0.6666666666666666, + "Malay,Spanish,Chinese": 0.3466666666666667, + "Malay,Spanish,Filipino": 0.5466666666666666, + "Malay,Chinese,Filipino": 0.32, + "Indonesian,Spanish,Chinese": 0.36666666666666664, + "Indonesian,Spanish,Filipino": 0.56, + "Indonesian,Chinese,Filipino": 0.34, + "Spanish,Chinese,Filipino": 0.3 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.4066666666666667, + "English,Vietnamese,Malay,Spanish": 0.3466666666666667, + "English,Vietnamese,Malay,Chinese": 0.20666666666666667, + "English,Vietnamese,Malay,Filipino": 0.36, + "English,Vietnamese,Indonesian,Spanish": 0.35333333333333333, + "English,Vietnamese,Indonesian,Chinese": 0.22, + "English,Vietnamese,Indonesian,Filipino": 0.35333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.19333333333333333, + "English,Malay,Indonesian,Spanish": 0.5066666666666667, + "English,Malay,Indonesian,Chinese": 0.3, + "English,Malay,Indonesian,Filipino": 0.5, + "English,Malay,Spanish,Chinese": 0.2733333333333333, + "English,Malay,Spanish,Filipino": 0.4266666666666667, + "English,Malay,Chinese,Filipino": 0.25333333333333335, + "English,Indonesian,Spanish,Chinese": 0.28, + "English,Indonesian,Spanish,Filipino": 0.42, + "English,Indonesian,Chinese,Filipino": 0.25333333333333335, + "English,Spanish,Chinese,Filipino": 0.24, + "Vietnamese,Malay,Indonesian,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Filipino": 0.4266666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.24666666666666667, + "Vietnamese,Malay,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.38, + "Vietnamese,Indonesian,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.22666666666666666, + "Malay,Indonesian,Spanish,Chinese": 0.3333333333333333, + "Malay,Indonesian,Spanish,Filipino": 0.52, + "Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese,Filipino": 0.26666666666666666, + "Indonesian,Spanish,Chinese,Filipino": 0.2866666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.3333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.2, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.3333333333333333, + "English,Vietnamese,Malay,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.28, + "English,Vietnamese,Malay,Chinese,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.2, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.28, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.17333333333333334, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish,Chinese": 0.26666666666666666, + "English,Malay,Indonesian,Spanish,Filipino": 0.4066666666666667, + "English,Malay,Indonesian,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.22666666666666666, + "English,Indonesian,Spanish,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.24, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.34, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.2, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.22, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.26 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.18666666666666668, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.26666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.16666666666666666, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.16, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.19333333333333333 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.15333333333333332 + } + }, + "AC3_2": 0.39818976211499246, + "AC3_3": 0.35144136343385574, + "AC3_4": 0.30991534711439367, + "AC3_5": 0.2713104893838018, + "AC3_6": 0.23542710335635225, + "AC3_7": 0.20315863028372116 + }, + "prompt_2": { + "overall_acc": 0.31047619047619046, + "language_acc": { + "English": 0.38666666666666666, + "Vietnamese": 0.2866666666666667, + "Malay": 0.2733333333333333, + "Indonesian": 0.26666666666666666, + "Spanish": 0.31333333333333335, + "Chinese": 0.35333333333333333, + "Filipino": 0.29333333333333333 + }, + "consistency_score_2": 0.5819047619047619, + "consistency_score_3": 0.41942857142857154, + "consistency_score_4": 0.3219047619047619, + "consistency_score_5": 0.2539682539682539, + "consistency_score_6": 0.20380952380952383, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.44, + "English,Malay": 0.5933333333333334, + "English,Indonesian": 0.56, + "English,Spanish": 0.56, + "English,Chinese": 0.42, + "English,Filipino": 0.54, + "Vietnamese,Malay": 0.64, + "Vietnamese,Indonesian": 0.62, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,Chinese": 0.44, + "Vietnamese,Filipino": 0.6, + "Malay,Indonesian": 0.8466666666666667, + "Malay,Spanish": 0.74, + "Malay,Chinese": 0.44666666666666666, + "Malay,Filipino": 0.7266666666666667, + "Indonesian,Spanish": 0.7466666666666667, + "Indonesian,Chinese": 0.49333333333333335, + "Indonesian,Filipino": 0.7133333333333334, + "Spanish,Chinese": 0.48, + "Spanish,Filipino": 0.62, + "Chinese,Filipino": 0.44666666666666666 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.36666666666666664, + "English,Vietnamese,Indonesian": 0.35333333333333333, + "English,Vietnamese,Spanish": 0.3333333333333333, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.32666666666666666, + "English,Malay,Indonesian": 0.52, + "English,Malay,Spanish": 0.48, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.46, + "English,Indonesian,Spanish": 0.47333333333333333, + "English,Indonesian,Chinese": 0.3, + "English,Indonesian,Filipino": 0.44, + "English,Spanish,Chinese": 0.3, + "English,Spanish,Filipino": 0.4, + "English,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Indonesian": 0.5666666666666667, + "Vietnamese,Malay,Spanish": 0.49333333333333335, + "Vietnamese,Malay,Chinese": 0.3333333333333333, + "Vietnamese,Malay,Filipino": 0.5066666666666667, + "Vietnamese,Indonesian,Spanish": 0.4866666666666667, + "Vietnamese,Indonesian,Chinese": 0.34, + "Vietnamese,Indonesian,Filipino": 0.49333333333333335, + "Vietnamese,Spanish,Chinese": 0.32, + "Vietnamese,Spanish,Filipino": 0.42, + "Vietnamese,Chinese,Filipino": 0.32, + "Malay,Indonesian,Spanish": 0.6933333333333334, + "Malay,Indonesian,Chinese": 0.42, + "Malay,Indonesian,Filipino": 0.66, + "Malay,Spanish,Chinese": 0.4, + "Malay,Spanish,Filipino": 0.5733333333333334, + "Malay,Chinese,Filipino": 0.36, + "Indonesian,Spanish,Chinese": 0.41333333333333333, + "Indonesian,Spanish,Filipino": 0.58, + "Indonesian,Chinese,Filipino": 0.38666666666666666, + "Spanish,Chinese,Filipino": 0.3466666666666667 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.3333333333333333, + "English,Vietnamese,Malay,Spanish": 0.31333333333333335, + "English,Vietnamese,Malay,Chinese": 0.22, + "English,Vietnamese,Malay,Filipino": 0.29333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.31333333333333335, + "English,Vietnamese,Indonesian,Chinese": 0.21333333333333335, + "English,Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "English,Vietnamese,Spanish,Chinese": 0.20666666666666667, + "English,Vietnamese,Spanish,Filipino": 0.26666666666666666, + "English,Vietnamese,Chinese,Filipino": 0.2, + "English,Malay,Indonesian,Spanish": 0.44666666666666666, + "English,Malay,Indonesian,Chinese": 0.2733333333333333, + "English,Malay,Indonesian,Filipino": 0.4266666666666667, + "English,Malay,Spanish,Chinese": 0.26, + "English,Malay,Spanish,Filipino": 0.37333333333333335, + "English,Malay,Chinese,Filipino": 0.24666666666666667, + "English,Indonesian,Spanish,Chinese": 0.26666666666666666, + "English,Indonesian,Spanish,Filipino": 0.37333333333333335, + "English,Indonesian,Chinese,Filipino": 0.24666666666666667, + "English,Spanish,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.47333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.32, + "Vietnamese,Malay,Indonesian,Filipino": 0.4666666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Spanish,Filipino": 0.4066666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.28, + "Vietnamese,Indonesian,Spanish,Chinese": 0.3, + "Vietnamese,Indonesian,Spanish,Filipino": 0.4066666666666667, + "Vietnamese,Indonesian,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese,Filipino": 0.26, + "Malay,Indonesian,Spanish,Chinese": 0.38666666666666666, + "Malay,Indonesian,Spanish,Filipino": 0.56, + "Malay,Indonesian,Chinese,Filipino": 0.35333333333333333, + "Malay,Spanish,Chinese,Filipino": 0.32666666666666666, + "Indonesian,Spanish,Chinese,Filipino": 0.34 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.3, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.20666666666666667, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.28, + "English,Vietnamese,Malay,Spanish,Chinese": 0.2, + "English,Vietnamese,Malay,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Malay,Chinese,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.25333333333333335, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.18, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.17333333333333334, + "English,Malay,Indonesian,Spanish,Chinese": 0.25333333333333335, + "English,Malay,Indonesian,Spanish,Filipino": 0.36, + "English,Malay,Indonesian,Chinese,Filipino": 0.24, + "English,Malay,Spanish,Chinese,Filipino": 0.22, + "English,Indonesian,Spanish,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.3, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.4, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.32666666666666666 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.24666666666666667, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.18, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.22, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666 + } + }, + "AC3_2": 0.40491131773680794, + "AC3_3": 0.3568207574816837, + "AC3_4": 0.31608720591674033, + "AC3_5": 0.27939364717316967, + "AC3_6": 0.24608112869994633, + "AC3_7": 0.21689953421934574 + }, + "prompt_3": { + "overall_acc": 0.29904761904761906, + "language_acc": { + "English": 0.38666666666666666, + "Vietnamese": 0.26666666666666666, + "Malay": 0.26666666666666666, + "Indonesian": 0.26, + "Spanish": 0.2866666666666667, + "Chinese": 0.32, + "Filipino": 0.30666666666666664 + }, + "consistency_score_2": 0.6123809523809525, + "consistency_score_3": 0.46095238095238095, + "consistency_score_4": 0.36933333333333335, + "consistency_score_5": 0.30571428571428566, + "consistency_score_6": 0.26, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4866666666666667, + "English,Malay": 0.62, + "English,Indonesian": 0.6133333333333333, + "English,Spanish": 0.6066666666666667, + "English,Chinese": 0.44666666666666666, + "English,Filipino": 0.54, + "Vietnamese,Malay": 0.6933333333333334, + "Vietnamese,Indonesian": 0.6666666666666666, + "Vietnamese,Spanish": 0.6266666666666667, + "Vietnamese,Chinese": 0.4866666666666667, + "Vietnamese,Filipino": 0.6, + "Malay,Indonesian": 0.9, + "Malay,Spanish": 0.76, + "Malay,Chinese": 0.4533333333333333, + "Malay,Filipino": 0.7466666666666667, + "Indonesian,Spanish": 0.7933333333333333, + "Indonesian,Chinese": 0.4866666666666667, + "Indonesian,Filipino": 0.7066666666666667, + "Spanish,Chinese": 0.5266666666666666, + "Spanish,Filipino": 0.64, + "Chinese,Filipino": 0.46 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.43333333333333335, + "English,Vietnamese,Indonesian": 0.42, + "English,Vietnamese,Spanish": 0.42, + "English,Vietnamese,Chinese": 0.29333333333333333, + "English,Vietnamese,Filipino": 0.36, + "English,Malay,Indonesian": 0.5866666666666667, + "English,Malay,Spanish": 0.5266666666666666, + "English,Malay,Chinese": 0.3333333333333333, + "English,Malay,Filipino": 0.47333333333333333, + "English,Indonesian,Spanish": 0.5333333333333333, + "English,Indonesian,Chinese": 0.34, + "English,Indonesian,Filipino": 0.46, + "English,Spanish,Chinese": 0.34, + "English,Spanish,Filipino": 0.44, + "English,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.6333333333333333, + "Vietnamese,Malay,Spanish": 0.5666666666666667, + "Vietnamese,Malay,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Filipino": 0.5466666666666666, + "Vietnamese,Indonesian,Spanish": 0.58, + "Vietnamese,Indonesian,Chinese": 0.37333333333333335, + "Vietnamese,Indonesian,Filipino": 0.52, + "Vietnamese,Spanish,Chinese": 0.38, + "Vietnamese,Spanish,Filipino": 0.4866666666666667, + "Vietnamese,Chinese,Filipino": 0.35333333333333333, + "Malay,Indonesian,Spanish": 0.74, + "Malay,Indonesian,Chinese": 0.44, + "Malay,Indonesian,Filipino": 0.68, + "Malay,Spanish,Chinese": 0.4066666666666667, + "Malay,Spanish,Filipino": 0.6, + "Malay,Chinese,Filipino": 0.38, + "Indonesian,Spanish,Chinese": 0.43333333333333335, + "Indonesian,Spanish,Filipino": 0.6066666666666667, + "Indonesian,Chinese,Filipino": 0.3933333333333333, + "Spanish,Chinese,Filipino": 0.37333333333333335 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.41333333333333333, + "English,Vietnamese,Malay,Spanish": 0.4, + "English,Vietnamese,Malay,Chinese": 0.2733333333333333, + "English,Vietnamese,Malay,Filipino": 0.3466666666666667, + "English,Vietnamese,Indonesian,Spanish": 0.4, + "English,Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "English,Vietnamese,Indonesian,Filipino": 0.3333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.28, + "English,Vietnamese,Spanish,Filipino": 0.34, + "English,Vietnamese,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Indonesian,Spanish": 0.5133333333333333, + "English,Malay,Indonesian,Chinese": 0.32, + "English,Malay,Indonesian,Filipino": 0.44666666666666666, + "English,Malay,Spanish,Chinese": 0.30666666666666664, + "English,Malay,Spanish,Filipino": 0.41333333333333333, + "English,Malay,Chinese,Filipino": 0.28, + "English,Indonesian,Spanish,Chinese": 0.31333333333333335, + "English,Indonesian,Spanish,Filipino": 0.41333333333333333, + "English,Indonesian,Chinese,Filipino": 0.28, + "English,Spanish,Chinese,Filipino": 0.28, + "Vietnamese,Malay,Indonesian,Spanish": 0.56, + "Vietnamese,Malay,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,Filipino": 0.5, + "Vietnamese,Malay,Spanish,Chinese": 0.34, + "Vietnamese,Malay,Spanish,Filipino": 0.4666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.32, + "Vietnamese,Indonesian,Spanish,Chinese": 0.35333333333333333, + "Vietnamese,Indonesian,Spanish,Filipino": 0.48, + "Vietnamese,Indonesian,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Spanish,Chinese,Filipino": 0.31333333333333335, + "Malay,Indonesian,Spanish,Chinese": 0.4, + "Malay,Indonesian,Spanish,Filipino": 0.58, + "Malay,Indonesian,Chinese,Filipino": 0.36666666666666664, + "Malay,Spanish,Chinese,Filipino": 0.34, + "Indonesian,Spanish,Chinese,Filipino": 0.36 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.3933333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.26, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.32666666666666666, + "English,Vietnamese,Malay,Spanish,Chinese": 0.26666666666666666, + "English,Vietnamese,Malay,Spanish,Filipino": 0.3333333333333333, + "English,Vietnamese,Malay,Chinese,Filipino": 0.24, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.26666666666666666, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.3333333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.23333333333333334, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.24, + "English,Malay,Indonesian,Spanish,Chinese": 0.3, + "English,Malay,Indonesian,Spanish,Filipino": 0.4, + "English,Malay,Indonesian,Chinese,Filipino": 0.26666666666666666, + "English,Malay,Spanish,Chinese,Filipino": 0.26, + "English,Indonesian,Spanish,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.46, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.29333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.30666666666666664, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.3333333333333333 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.26, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.32666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.22666666666666666, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.23333333333333334, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.2866666666666667 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.22666666666666666 + } + }, + "AC3_2": 0.40185500319022294, + "AC3_3": 0.3627545052631654, + "AC3_4": 0.33049491778089396, + "AC3_5": 0.3023442069241343, + "AC3_6": 0.278160136236445, + "AC3_7": 0.25787439608621354 + }, + "prompt_4": { + "overall_acc": 0.3152380952380952, + "language_acc": { + "English": 0.44, + "Vietnamese": 0.30666666666666664, + "Malay": 0.26, + "Indonesian": 0.26666666666666666, + "Spanish": 0.30666666666666664, + "Chinese": 0.3466666666666667, + "Filipino": 0.28 + }, + "consistency_score_2": 0.5669841269841269, + "consistency_score_3": 0.3958095238095237, + "consistency_score_4": 0.29561904761904767, + "consistency_score_5": 0.22920634920634922, + "consistency_score_6": 0.18380952380952378, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4866666666666667, + "English,Malay": 0.6266666666666667, + "English,Indonesian": 0.58, + "English,Spanish": 0.58, + "English,Chinese": 0.4066666666666667, + "English,Filipino": 0.5666666666666667, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Indonesian": 0.54, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,Chinese": 0.4266666666666667, + "Vietnamese,Filipino": 0.58, + "Malay,Indonesian": 0.8666666666666667, + "Malay,Spanish": 0.7333333333333333, + "Malay,Chinese": 0.36, + "Malay,Filipino": 0.7466666666666667, + "Indonesian,Spanish": 0.74, + "Indonesian,Chinese": 0.4, + "Indonesian,Filipino": 0.7, + "Spanish,Chinese": 0.42, + "Spanish,Filipino": 0.6, + "Chinese,Filipino": 0.4066666666666667 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.38666666666666666, + "English,Vietnamese,Indonesian": 0.35333333333333333, + "English,Vietnamese,Spanish": 0.36, + "English,Vietnamese,Chinese": 0.24666666666666667, + "English,Vietnamese,Filipino": 0.36, + "English,Malay,Indonesian": 0.56, + "English,Malay,Spanish": 0.5066666666666667, + "English,Malay,Chinese": 0.26, + "English,Malay,Filipino": 0.5, + "English,Indonesian,Spanish": 0.49333333333333335, + "English,Indonesian,Chinese": 0.26666666666666666, + "English,Indonesian,Filipino": 0.4533333333333333, + "English,Spanish,Chinese": 0.28, + "English,Spanish,Filipino": 0.42, + "English,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Indonesian": 0.5066666666666667, + "Vietnamese,Malay,Spanish": 0.4666666666666667, + "Vietnamese,Malay,Chinese": 0.26, + "Vietnamese,Malay,Filipino": 0.48, + "Vietnamese,Indonesian,Spanish": 0.46, + "Vietnamese,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Indonesian,Filipino": 0.4266666666666667, + "Vietnamese,Spanish,Chinese": 0.26666666666666666, + "Vietnamese,Spanish,Filipino": 0.3933333333333333, + "Vietnamese,Chinese,Filipino": 0.2733333333333333, + "Malay,Indonesian,Spanish": 0.6933333333333334, + "Malay,Indonesian,Chinese": 0.34, + "Malay,Indonesian,Filipino": 0.66, + "Malay,Spanish,Chinese": 0.31333333333333335, + "Malay,Spanish,Filipino": 0.5666666666666667, + "Malay,Chinese,Filipino": 0.3, + "Indonesian,Spanish,Chinese": 0.3333333333333333, + "Indonesian,Spanish,Filipino": 0.5533333333333333, + "Indonesian,Chinese,Filipino": 0.31333333333333335, + "Spanish,Chinese,Filipino": 0.28 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.34, + "English,Vietnamese,Malay,Spanish": 0.3333333333333333, + "English,Vietnamese,Malay,Chinese": 0.19333333333333333, + "English,Vietnamese,Malay,Filipino": 0.32666666666666666, + "English,Vietnamese,Indonesian,Spanish": 0.32666666666666666, + "English,Vietnamese,Indonesian,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Filipino": 0.29333333333333333, + "English,Vietnamese,Spanish,Chinese": 0.2, + "English,Vietnamese,Spanish,Filipino": 0.29333333333333333, + "English,Vietnamese,Chinese,Filipino": 0.19333333333333333, + "English,Malay,Indonesian,Spanish": 0.48, + "English,Malay,Indonesian,Chinese": 0.24666666666666667, + "English,Malay,Indonesian,Filipino": 0.44, + "English,Malay,Spanish,Chinese": 0.23333333333333334, + "English,Malay,Spanish,Filipino": 0.4, + "English,Malay,Chinese,Filipino": 0.22, + "English,Indonesian,Spanish,Chinese": 0.24666666666666667, + "English,Indonesian,Spanish,Filipino": 0.38666666666666666, + "English,Indonesian,Chinese,Filipino": 0.22, + "English,Spanish,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.43333333333333335, + "Vietnamese,Malay,Indonesian,Chinese": 0.24, + "Vietnamese,Malay,Indonesian,Filipino": 0.4066666666666667, + "Vietnamese,Malay,Spanish,Chinese": 0.23333333333333334, + "Vietnamese,Malay,Spanish,Filipino": 0.38, + "Vietnamese,Malay,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese": 0.24666666666666667, + "Vietnamese,Indonesian,Spanish,Filipino": 0.36666666666666664, + "Vietnamese,Indonesian,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Spanish,Chinese,Filipino": 0.2, + "Malay,Indonesian,Spanish,Chinese": 0.30666666666666664, + "Malay,Indonesian,Spanish,Filipino": 0.5333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.28, + "Malay,Spanish,Chinese,Filipino": 0.25333333333333335, + "Indonesian,Spanish,Chinese,Filipino": 0.26666666666666666 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.31333333333333335, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.18, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.28, + "English,Vietnamese,Malay,Spanish,Chinese": 0.18, + "English,Vietnamese,Malay,Spanish,Filipino": 0.28, + "English,Vietnamese,Malay,Chinese,Filipino": 0.16666666666666666, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.2733333333333333, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.16666666666666666, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish,Chinese": 0.23333333333333334, + "English,Malay,Indonesian,Spanish,Filipino": 0.37333333333333335, + "English,Malay,Indonesian,Chinese,Filipino": 0.20666666666666667, + "English,Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "English,Indonesian,Spanish,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.19333333333333333, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.24666666666666667 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.18, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.26, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.18 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.15333333333333332 + } + }, + "AC3_2": 0.40519268660051827, + "AC3_3": 0.3509588865859282, + "AC3_4": 0.30511351651352137, + "AC3_5": 0.2654249617727118, + "AC3_6": 0.2322173754532861, + "AC3_7": 0.2063143630996009 + }, + "prompt_5": { + "overall_acc": 0.3180952380952381, + "language_acc": { + "English": 0.44666666666666666, + "Vietnamese": 0.3, + "Malay": 0.26666666666666666, + "Indonesian": 0.2733333333333333, + "Spanish": 0.3, + "Chinese": 0.3333333333333333, + "Filipino": 0.30666666666666664 + }, + "consistency_score_2": 0.5619047619047619, + "consistency_score_3": 0.3887619047619048, + "consistency_score_4": 0.2864761904761905, + "consistency_score_5": 0.2174603174603175, + "consistency_score_6": 0.16857142857142857, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.4533333333333333, + "English,Malay": 0.6, + "English,Indonesian": 0.5733333333333334, + "English,Spanish": 0.5533333333333333, + "English,Chinese": 0.42, + "English,Filipino": 0.5933333333333334, + "Vietnamese,Malay": 0.5266666666666666, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Spanish": 0.49333333333333335, + "Vietnamese,Chinese": 0.4533333333333333, + "Vietnamese,Filipino": 0.5066666666666667, + "Malay,Indonesian": 0.8933333333333333, + "Malay,Spanish": 0.7266666666666667, + "Malay,Chinese": 0.4, + "Malay,Filipino": 0.74, + "Indonesian,Spanish": 0.7266666666666667, + "Indonesian,Chinese": 0.4266666666666667, + "Indonesian,Filipino": 0.7266666666666667, + "Spanish,Chinese": 0.46, + "Spanish,Filipino": 0.6066666666666667, + "Chinese,Filipino": 0.42 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.34, + "English,Vietnamese,Indonesian": 0.32666666666666666, + "English,Vietnamese,Spanish": 0.3, + "English,Vietnamese,Chinese": 0.24, + "English,Vietnamese,Filipino": 0.3333333333333333, + "English,Malay,Indonesian": 0.5466666666666666, + "English,Malay,Spanish": 0.47333333333333333, + "English,Malay,Chinese": 0.26666666666666666, + "English,Malay,Filipino": 0.49333333333333335, + "English,Indonesian,Spanish": 0.4666666666666667, + "English,Indonesian,Chinese": 0.28, + "English,Indonesian,Filipino": 0.47333333333333333, + "English,Spanish,Chinese": 0.28, + "English,Spanish,Filipino": 0.4066666666666667, + "English,Chinese,Filipino": 0.2733333333333333, + "Vietnamese,Malay,Indonesian": 0.47333333333333333, + "Vietnamese,Malay,Spanish": 0.42, + "Vietnamese,Malay,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Filipino": 0.4066666666666667, + "Vietnamese,Indonesian,Spanish": 0.41333333333333333, + "Vietnamese,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Indonesian,Filipino": 0.3933333333333333, + "Vietnamese,Spanish,Chinese": 0.2866666666666667, + "Vietnamese,Spanish,Filipino": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.2733333333333333, + "Malay,Indonesian,Spanish": 0.6866666666666666, + "Malay,Indonesian,Chinese": 0.38, + "Malay,Indonesian,Filipino": 0.68, + "Malay,Spanish,Chinese": 0.35333333333333333, + "Malay,Spanish,Filipino": 0.56, + "Malay,Chinese,Filipino": 0.32, + "Indonesian,Spanish,Chinese": 0.36666666666666664, + "Indonesian,Spanish,Filipino": 0.56, + "Indonesian,Chinese,Filipino": 0.3333333333333333, + "Spanish,Chinese,Filipino": 0.30666666666666664 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.31333333333333335, + "English,Vietnamese,Malay,Spanish": 0.28, + "English,Vietnamese,Malay,Chinese": 0.18, + "English,Vietnamese,Malay,Filipino": 0.29333333333333333, + "English,Vietnamese,Indonesian,Spanish": 0.28, + "English,Vietnamese,Indonesian,Chinese": 0.18, + "English,Vietnamese,Indonesian,Filipino": 0.28, + "English,Vietnamese,Spanish,Chinese": 0.18, + "English,Vietnamese,Spanish,Filipino": 0.24666666666666667, + "English,Vietnamese,Chinese,Filipino": 0.18, + "English,Malay,Indonesian,Spanish": 0.44666666666666666, + "English,Malay,Indonesian,Chinese": 0.26, + "English,Malay,Indonesian,Filipino": 0.4533333333333333, + "English,Malay,Spanish,Chinese": 0.23333333333333334, + "English,Malay,Spanish,Filipino": 0.38, + "English,Malay,Chinese,Filipino": 0.22666666666666666, + "English,Indonesian,Spanish,Chinese": 0.24, + "English,Indonesian,Spanish,Filipino": 0.38, + "English,Indonesian,Chinese,Filipino": 0.23333333333333334, + "English,Spanish,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.3933333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Filipino": 0.36666666666666664, + "Vietnamese,Malay,Spanish,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Malay,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Indonesian,Spanish,Chinese": 0.25333333333333335, + "Vietnamese,Indonesian,Spanish,Filipino": 0.32666666666666666, + "Vietnamese,Indonesian,Chinese,Filipino": 0.22, + "Vietnamese,Spanish,Chinese,Filipino": 0.20666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.34, + "Malay,Indonesian,Spanish,Filipino": 0.5333333333333333, + "Malay,Indonesian,Chinese,Filipino": 0.30666666666666664, + "Malay,Spanish,Chinese,Filipino": 0.2733333333333333, + "Indonesian,Spanish,Chinese,Filipino": 0.2866666666666667 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.17333333333333334, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.26666666666666666, + "English,Vietnamese,Malay,Spanish,Chinese": 0.16666666666666666, + "English,Vietnamese,Malay,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Malay,Chinese,Filipino": 0.15333333333333332, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.16666666666666666, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.14666666666666667, + "English,Malay,Indonesian,Spanish,Chinese": 0.22666666666666666, + "English,Malay,Indonesian,Spanish,Filipino": 0.36, + "English,Malay,Indonesian,Chinese,Filipino": 0.22, + "English,Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "English,Indonesian,Spanish,Chinese,Filipino": 0.2, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.24, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.2, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.2, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.26666666666666666 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.16, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.22, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.14666666666666667, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.14, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.14, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.18666666666666668 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.13333333333333333 + } + }, + "AC3_2": 0.4062255204650728, + "AC3_3": 0.34989619017005297, + "AC3_4": 0.30145887713680225, + "AC3_5": 0.25832274813817696, + "AC3_6": 0.22036343299917047, + "AC3_7": 0.18790436001463443 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.27435064935064934, + "language_acc": { + "Vietnamese": 0.3125, + "Indonesian": 0.2556818181818182, + "Malay": 0.26704545454545453, + "English": 0.26704545454545453, + "Spanish": 0.2556818181818182, + "Filipino": 0.29545454545454547, + "Chinese": 0.26704545454545453 + }, + "consistency_score_2": 0.5367965367965367, + "consistency_score_3": 0.3603896103896104, + "consistency_score_4": 0.2576298701298702, + "consistency_score_5": 0.1899350649350649, + "consistency_score_6": 0.14285714285714285, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3806818181818182, + "Vietnamese,Malay": 0.5, + "Vietnamese,English": 0.5, + "Vietnamese,Spanish": 0.5340909090909091, + "Vietnamese,Filipino": 0.4772727272727273, + "Vietnamese,Chinese": 0.4147727272727273, + "Indonesian,Malay": 0.5454545454545454, + "Indonesian,English": 0.5795454545454546, + "Indonesian,Spanish": 0.5454545454545454, + "Indonesian,Filipino": 0.5, + "Indonesian,Chinese": 0.48295454545454547, + "Malay,English": 0.6306818181818182, + "Malay,Spanish": 0.5738636363636364, + "Malay,Filipino": 0.5170454545454546, + "Malay,Chinese": 0.5170454545454546, + "English,Spanish": 0.6590909090909091, + "English,Filipino": 0.625, + "English,Chinese": 0.625, + "Spanish,Filipino": 0.6306818181818182, + "Spanish,Chinese": 0.4943181818181818, + "Filipino,Chinese": 0.5397727272727273 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Indonesian,English": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Chinese": 0.2215909090909091, + "Vietnamese,Malay,English": 0.3806818181818182, + "Vietnamese,Malay,Spanish": 0.36363636363636365, + "Vietnamese,Malay,Filipino": 0.30113636363636365, + "Vietnamese,Malay,Chinese": 0.2727272727272727, + "Vietnamese,English,Spanish": 0.3977272727272727, + "Vietnamese,English,Filipino": 0.3522727272727273, + "Vietnamese,English,Chinese": 0.32954545454545453, + "Vietnamese,Spanish,Filipino": 0.3693181818181818, + "Vietnamese,Spanish,Chinese": 0.29545454545454547, + "Vietnamese,Filipino,Chinese": 0.2840909090909091, + "Indonesian,Malay,English": 0.4318181818181818, + "Indonesian,Malay,Spanish": 0.3977272727272727, + "Indonesian,Malay,Filipino": 0.3522727272727273, + "Indonesian,Malay,Chinese": 0.3352272727272727, + "Indonesian,English,Spanish": 0.4318181818181818, + "Indonesian,English,Filipino": 0.39204545454545453, + "Indonesian,English,Chinese": 0.3977272727272727, + "Indonesian,Spanish,Filipino": 0.3806818181818182, + "Indonesian,Spanish,Chinese": 0.32386363636363635, + "Indonesian,Filipino,Chinese": 0.3181818181818182, + "Malay,English,Spanish": 0.4602272727272727, + "Malay,English,Filipino": 0.42613636363636365, + "Malay,English,Chinese": 0.42613636363636365, + "Malay,Spanish,Filipino": 0.4147727272727273, + "Malay,Spanish,Chinese": 0.3409090909090909, + "Malay,Filipino,Chinese": 0.3409090909090909, + "English,Spanish,Filipino": 0.4943181818181818, + "English,Spanish,Chinese": 0.4375, + "English,Filipino,Chinese": 0.4375, + "Spanish,Filipino,Chinese": 0.3806818181818182 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.25, + "Vietnamese,Indonesian,Malay,Spanish": 0.25, + "Vietnamese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,English,Spanish": 0.25, + "Vietnamese,Indonesian,English,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,English,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Spanish,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,Malay,English,Spanish": 0.3068181818181818, + "Vietnamese,Malay,English,Filipino": 0.2556818181818182, + "Vietnamese,Malay,English,Chinese": 0.2556818181818182, + "Vietnamese,Malay,Spanish,Filipino": 0.26136363636363635, + "Vietnamese,Malay,Spanish,Chinese": 0.2159090909090909, + "Vietnamese,Malay,Filipino,Chinese": 0.1875, + "Vietnamese,English,Spanish,Filipino": 0.30113636363636365, + "Vietnamese,English,Spanish,Chinese": 0.26136363636363635, + "Vietnamese,English,Filipino,Chinese": 0.23863636363636365, + "Vietnamese,Spanish,Filipino,Chinese": 0.22727272727272727, + "Indonesian,Malay,English,Spanish": 0.3352272727272727, + "Indonesian,Malay,English,Filipino": 0.29545454545454547, + "Indonesian,Malay,English,Chinese": 0.2897727272727273, + "Indonesian,Malay,Spanish,Filipino": 0.2840909090909091, + "Indonesian,Malay,Spanish,Chinese": 0.25, + "Indonesian,Malay,Filipino,Chinese": 0.23295454545454544, + "Indonesian,English,Spanish,Filipino": 0.32386363636363635, + "Indonesian,English,Spanish,Chinese": 0.29545454545454547, + "Indonesian,English,Filipino,Chinese": 0.26704545454545453, + "Indonesian,Spanish,Filipino,Chinese": 0.24431818181818182, + "Malay,English,Spanish,Filipino": 0.3465909090909091, + "Malay,English,Spanish,Chinese": 0.3125, + "Malay,English,Filipino,Chinese": 0.30113636363636365, + "Malay,Spanish,Filipino,Chinese": 0.26704545454545453, + "English,Spanish,Filipino,Chinese": 0.3352272727272727 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.2215909090909091, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.1875, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Malay,English,Spanish,Filipino": 0.22727272727272727, + "Vietnamese,Malay,English,Spanish,Chinese": 0.20454545454545456, + "Vietnamese,Malay,English,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.19886363636363635, + "Indonesian,Malay,English,Spanish,Filipino": 0.24431818181818182, + "Indonesian,Malay,English,Spanish,Chinese": 0.22727272727272727, + "Indonesian,Malay,English,Filipino,Chinese": 0.19886363636363635, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.1875, + "Indonesian,English,Spanish,Filipino,Chinese": 0.2159090909090909, + "Malay,English,Spanish,Filipino,Chinese": 0.23863636363636365 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.16477272727272727 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.10795454545454546 + } + }, + "AC3_2": 0.36311653652073667, + "AC3_3": 0.31153884473784527, + "AC3_4": 0.26572748272055935, + "AC3_5": 0.2244687130567296, + "AC3_6": 0.18788215670871883, + "AC3_7": 0.1549411310152526 + }, + "prompt_2": { + "overall_acc": 0.26866883116883117, + "language_acc": { + "Vietnamese": 0.30113636363636365, + "Indonesian": 0.2727272727272727, + "Malay": 0.23863636363636365, + "English": 0.26704545454545453, + "Spanish": 0.26704545454545453, + "Filipino": 0.2727272727272727, + "Chinese": 0.26136363636363635 + }, + "consistency_score_2": 0.5411255411255411, + "consistency_score_3": 0.3728896103896104, + "consistency_score_4": 0.2717532467532468, + "consistency_score_5": 0.2021103896103896, + "consistency_score_6": 0.15178571428571427, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.4602272727272727, + "Vietnamese,Malay": 0.5397727272727273, + "Vietnamese,English": 0.5795454545454546, + "Vietnamese,Spanish": 0.5738636363636364, + "Vietnamese,Filipino": 0.5284090909090909, + "Vietnamese,Chinese": 0.5170454545454546, + "Indonesian,Malay": 0.5340909090909091, + "Indonesian,English": 0.4659090909090909, + "Indonesian,Spanish": 0.4659090909090909, + "Indonesian,Filipino": 0.42045454545454547, + "Indonesian,Chinese": 0.4431818181818182, + "Malay,English": 0.5795454545454546, + "Malay,Spanish": 0.5568181818181818, + "Malay,Filipino": 0.5625, + "Malay,Chinese": 0.5397727272727273, + "English,Spanish": 0.6647727272727273, + "English,Filipino": 0.6079545454545454, + "English,Chinese": 0.5965909090909091, + "Spanish,Filipino": 0.6193181818181818, + "Spanish,Chinese": 0.5340909090909091, + "Filipino,Chinese": 0.5738636363636364 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.3522727272727273, + "Vietnamese,Indonesian,English": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino": 0.2784090909090909, + "Vietnamese,Indonesian,Chinese": 0.29545454545454547, + "Vietnamese,Malay,English": 0.42045454545454547, + "Vietnamese,Malay,Spanish": 0.38636363636363635, + "Vietnamese,Malay,Filipino": 0.35795454545454547, + "Vietnamese,Malay,Chinese": 0.36363636363636365, + "Vietnamese,English,Spanish": 0.4659090909090909, + "Vietnamese,English,Filipino": 0.4147727272727273, + "Vietnamese,English,Chinese": 0.4034090909090909, + "Vietnamese,Spanish,Filipino": 0.4090909090909091, + "Vietnamese,Spanish,Chinese": 0.375, + "Vietnamese,Filipino,Chinese": 0.3522727272727273, + "Indonesian,Malay,English": 0.35795454545454547, + "Indonesian,Malay,Spanish": 0.3522727272727273, + "Indonesian,Malay,Filipino": 0.3352272727272727, + "Indonesian,Malay,Chinese": 0.3465909090909091, + "Indonesian,English,Spanish": 0.35795454545454547, + "Indonesian,English,Filipino": 0.3125, + "Indonesian,English,Chinese": 0.32954545454545453, + "Indonesian,Spanish,Filipino": 0.3068181818181818, + "Indonesian,Spanish,Chinese": 0.3068181818181818, + "Indonesian,Filipino,Chinese": 0.30113636363636365, + "Malay,English,Spanish": 0.45454545454545453, + "Malay,English,Filipino": 0.4147727272727273, + "Malay,English,Chinese": 0.3977272727272727, + "Malay,Spanish,Filipino": 0.4090909090909091, + "Malay,Spanish,Chinese": 0.375, + "Malay,Filipino,Chinese": 0.3806818181818182, + "English,Spanish,Filipino": 0.4943181818181818, + "English,Spanish,Chinese": 0.45454545454545453, + "English,Filipino,Chinese": 0.42613636363636365, + "Spanish,Filipino,Chinese": 0.4147727272727273 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,English,Spanish": 0.2727272727272727, + "Vietnamese,Indonesian,English,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,English,Chinese": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish,Chinese": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Malay,English,Spanish": 0.3352272727272727, + "Vietnamese,Malay,English,Filipino": 0.29545454545454547, + "Vietnamese,Malay,English,Chinese": 0.2897727272727273, + "Vietnamese,Malay,Spanish,Filipino": 0.2840909090909091, + "Vietnamese,Malay,Spanish,Chinese": 0.26704545454545453, + "Vietnamese,Malay,Filipino,Chinese": 0.24431818181818182, + "Vietnamese,English,Spanish,Filipino": 0.3465909090909091, + "Vietnamese,English,Spanish,Chinese": 0.32386363636363635, + "Vietnamese,English,Filipino,Chinese": 0.29545454545454547, + "Vietnamese,Spanish,Filipino,Chinese": 0.2840909090909091, + "Indonesian,Malay,English,Spanish": 0.29545454545454547, + "Indonesian,Malay,English,Filipino": 0.25, + "Indonesian,Malay,English,Chinese": 0.26136363636363635, + "Indonesian,Malay,Spanish,Filipino": 0.24431818181818182, + "Indonesian,Malay,Spanish,Chinese": 0.2556818181818182, + "Indonesian,Malay,Filipino,Chinese": 0.25, + "Indonesian,English,Spanish,Filipino": 0.26136363636363635, + "Indonesian,English,Spanish,Chinese": 0.26704545454545453, + "Indonesian,English,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.22727272727272727, + "Malay,English,Spanish,Filipino": 0.3352272727272727, + "Malay,English,Spanish,Chinese": 0.3181818181818182, + "Malay,English,Filipino,Chinese": 0.2840909090909091, + "Malay,Spanish,Filipino,Chinese": 0.2784090909090909, + "English,Spanish,Filipino,Chinese": 0.3522727272727273 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.1875, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.21022727272727273, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Malay,English,Spanish,Filipino": 0.24431818181818182, + "Vietnamese,Malay,English,Spanish,Chinese": 0.23295454545454544, + "Vietnamese,Malay,English,Filipino,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.19318181818181818, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.25, + "Indonesian,Malay,English,Spanish,Filipino": 0.20454545454545456, + "Indonesian,Malay,English,Spanish,Chinese": 0.2215909090909091, + "Indonesian,Malay,English,Filipino,Chinese": 0.1875, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.18181818181818182, + "Indonesian,English,Spanish,Filipino,Chinese": 0.19886363636363635, + "Malay,English,Spanish,Filipino,Chinese": 0.22727272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.16477272727272727, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.1534090909090909 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.11363636363636363 + } + }, + "AC3_2": 0.3590629216647561, + "AC3_3": 0.3123139195843181, + "AC3_4": 0.27020223688398987, + "AC3_5": 0.23068461705803028, + "AC3_6": 0.19398097072054904, + "AC3_7": 0.15971820107769732 + }, + "prompt_3": { + "overall_acc": 0.27435064935064934, + "language_acc": { + "Vietnamese": 0.3181818181818182, + "Indonesian": 0.26136363636363635, + "Malay": 0.2556818181818182, + "English": 0.2840909090909091, + "Spanish": 0.2840909090909091, + "Filipino": 0.2556818181818182, + "Chinese": 0.26136363636363635 + }, + "consistency_score_2": 0.5413961038961039, + "consistency_score_3": 0.37646103896103894, + "consistency_score_4": 0.2816558441558441, + "consistency_score_5": 0.21753246753246755, + "consistency_score_6": 0.1712662337662338, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Malay": 0.5227272727272727, + "Vietnamese,English": 0.5170454545454546, + "Vietnamese,Spanish": 0.5170454545454546, + "Vietnamese,Filipino": 0.4659090909090909, + "Vietnamese,Chinese": 0.5, + "Indonesian,Malay": 0.5625, + "Indonesian,English": 0.5284090909090909, + "Indonesian,Spanish": 0.5738636363636364, + "Indonesian,Filipino": 0.5113636363636364, + "Indonesian,Chinese": 0.4602272727272727, + "Malay,English": 0.5738636363636364, + "Malay,Spanish": 0.5625, + "Malay,Filipino": 0.5397727272727273, + "Malay,Chinese": 0.48863636363636365, + "English,Spanish": 0.6477272727272727, + "English,Filipino": 0.625, + "English,Chinese": 0.6136363636363636, + "Spanish,Filipino": 0.6647727272727273, + "Spanish,Chinese": 0.5170454545454546, + "Filipino,Chinese": 0.5511363636363636 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.32954545454545453, + "Vietnamese,Indonesian,English": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,Chinese": 0.26704545454545453, + "Vietnamese,Malay,English": 0.3693181818181818, + "Vietnamese,Malay,Spanish": 0.36363636363636365, + "Vietnamese,Malay,Filipino": 0.32386363636363635, + "Vietnamese,Malay,Chinese": 0.3125, + "Vietnamese,English,Spanish": 0.4034090909090909, + "Vietnamese,English,Filipino": 0.3693181818181818, + "Vietnamese,English,Chinese": 0.375, + "Vietnamese,Spanish,Filipino": 0.3693181818181818, + "Vietnamese,Spanish,Chinese": 0.3409090909090909, + "Vietnamese,Filipino,Chinese": 0.3409090909090909, + "Indonesian,Malay,English": 0.3977272727272727, + "Indonesian,Malay,Spanish": 0.4147727272727273, + "Indonesian,Malay,Filipino": 0.3806818181818182, + "Indonesian,Malay,Chinese": 0.32954545454545453, + "Indonesian,English,Spanish": 0.42613636363636365, + "Indonesian,English,Filipino": 0.4034090909090909, + "Indonesian,English,Chinese": 0.3693181818181818, + "Indonesian,Spanish,Filipino": 0.4375, + "Indonesian,Spanish,Chinese": 0.3409090909090909, + "Indonesian,Filipino,Chinese": 0.35795454545454547, + "Malay,English,Spanish": 0.4375, + "Malay,English,Filipino": 0.4090909090909091, + "Malay,English,Chinese": 0.4034090909090909, + "Malay,Spanish,Filipino": 0.4318181818181818, + "Malay,Spanish,Chinese": 0.35795454545454547, + "Malay,Filipino,Chinese": 0.3693181818181818, + "English,Spanish,Filipino": 0.5170454545454546, + "English,Spanish,Chinese": 0.4375, + "English,Filipino,Chinese": 0.44886363636363635, + "Spanish,Filipino,Chinese": 0.42613636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.25, + "Vietnamese,Indonesian,Malay,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,Chinese": 0.19886363636363635, + "Vietnamese,Indonesian,English,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,English,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,English,Chinese": 0.22727272727272727, + "Vietnamese,Indonesian,Spanish,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,Malay,English,Spanish": 0.3125, + "Vietnamese,Malay,English,Filipino": 0.26136363636363635, + "Vietnamese,Malay,English,Chinese": 0.2727272727272727, + "Vietnamese,Malay,Spanish,Filipino": 0.2727272727272727, + "Vietnamese,Malay,Spanish,Chinese": 0.2556818181818182, + "Vietnamese,Malay,Filipino,Chinese": 0.25, + "Vietnamese,English,Spanish,Filipino": 0.3068181818181818, + "Vietnamese,English,Spanish,Chinese": 0.29545454545454547, + "Vietnamese,English,Filipino,Chinese": 0.2897727272727273, + "Vietnamese,Spanish,Filipino,Chinese": 0.2840909090909091, + "Indonesian,Malay,English,Spanish": 0.3352272727272727, + "Indonesian,Malay,English,Filipino": 0.3068181818181818, + "Indonesian,Malay,English,Chinese": 0.2840909090909091, + "Indonesian,Malay,Spanish,Filipino": 0.32954545454545453, + "Indonesian,Malay,Spanish,Chinese": 0.26704545454545453, + "Indonesian,Malay,Filipino,Chinese": 0.26704545454545453, + "Indonesian,English,Spanish,Filipino": 0.35795454545454547, + "Indonesian,English,Spanish,Chinese": 0.2840909090909091, + "Indonesian,English,Filipino,Chinese": 0.30113636363636365, + "Indonesian,Spanish,Filipino,Chinese": 0.30113636363636365, + "Malay,English,Spanish,Filipino": 0.3465909090909091, + "Malay,English,Spanish,Chinese": 0.3181818181818182, + "Malay,English,Filipino,Chinese": 0.32386363636363635, + "Malay,Spanish,Filipino,Chinese": 0.30113636363636365, + "English,Spanish,Filipino,Chinese": 0.3693181818181818 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.1875, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.20454545454545456, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.1875, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.19318181818181818, + "Vietnamese,Malay,English,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,Malay,English,Spanish,Chinese": 0.23295454545454544, + "Vietnamese,Malay,English,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.2159090909090909, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.24431818181818182, + "Indonesian,Malay,English,Spanish,Filipino": 0.2727272727272727, + "Indonesian,Malay,English,Spanish,Chinese": 0.23295454545454544, + "Indonesian,Malay,English,Filipino,Chinese": 0.23863636363636365, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.22727272727272727, + "Indonesian,English,Spanish,Filipino,Chinese": 0.2556818181818182, + "Malay,English,Spanish,Filipino,Chinese": 0.2727272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.19318181818181818, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.20454545454545456 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.13636363636363635 + } + }, + "AC3_2": 0.36416295143174265, + "AC3_3": 0.31739543813738985, + "AC3_4": 0.2779552563724183, + "AC3_5": 0.2426599802344098, + "AC3_6": 0.21088519854279228, + "AC3_7": 0.18217750624382195 + }, + "prompt_4": { + "overall_acc": 0.2784090909090909, + "language_acc": { + "Vietnamese": 0.2840909090909091, + "Indonesian": 0.2556818181818182, + "Malay": 0.26704545454545453, + "English": 0.3068181818181818, + "Spanish": 0.29545454545454547, + "Filipino": 0.2784090909090909, + "Chinese": 0.26136363636363635 + }, + "consistency_score_2": 0.5232683982683982, + "consistency_score_3": 0.3470779220779221, + "consistency_score_4": 0.2475649350649351, + "consistency_score_5": 0.1818181818181818, + "consistency_score_6": 0.13392857142857142, + "consistency_score_7": 0.09659090909090909, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.375, + "Vietnamese,Malay": 0.44886363636363635, + "Vietnamese,English": 0.5, + "Vietnamese,Spanish": 0.4943181818181818, + "Vietnamese,Filipino": 0.4659090909090909, + "Vietnamese,Chinese": 0.44886363636363635, + "Indonesian,Malay": 0.5852272727272727, + "Indonesian,English": 0.5681818181818182, + "Indonesian,Spanish": 0.5, + "Indonesian,Filipino": 0.4602272727272727, + "Indonesian,Chinese": 0.5170454545454546, + "Malay,English": 0.6193181818181818, + "Malay,Spanish": 0.5340909090909091, + "Malay,Filipino": 0.5340909090909091, + "Malay,Chinese": 0.4772727272727273, + "English,Spanish": 0.6306818181818182, + "English,Filipino": 0.625, + "English,Chinese": 0.625, + "Spanish,Filipino": 0.5965909090909091, + "Spanish,Chinese": 0.4943181818181818, + "Filipino,Chinese": 0.48863636363636365 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,English": 0.2840909090909091, + "Vietnamese,Indonesian,Spanish": 0.25, + "Vietnamese,Indonesian,Filipino": 0.24431818181818182, + "Vietnamese,Indonesian,Chinese": 0.25, + "Vietnamese,Malay,English": 0.3409090909090909, + "Vietnamese,Malay,Spanish": 0.29545454545454547, + "Vietnamese,Malay,Filipino": 0.2897727272727273, + "Vietnamese,Malay,Chinese": 0.2556818181818182, + "Vietnamese,English,Spanish": 0.3522727272727273, + "Vietnamese,English,Filipino": 0.3522727272727273, + "Vietnamese,English,Chinese": 0.3409090909090909, + "Vietnamese,Spanish,Filipino": 0.3352272727272727, + "Vietnamese,Spanish,Chinese": 0.2897727272727273, + "Vietnamese,Filipino,Chinese": 0.2784090909090909, + "Indonesian,Malay,English": 0.44886363636363635, + "Indonesian,Malay,Spanish": 0.375, + "Indonesian,Malay,Filipino": 0.3693181818181818, + "Indonesian,Malay,Chinese": 0.3522727272727273, + "Indonesian,English,Spanish": 0.39204545454545453, + "Indonesian,English,Filipino": 0.375, + "Indonesian,English,Chinese": 0.4090909090909091, + "Indonesian,Spanish,Filipino": 0.3409090909090909, + "Indonesian,Spanish,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Chinese": 0.30113636363636365, + "Malay,English,Spanish": 0.4318181818181818, + "Malay,English,Filipino": 0.4318181818181818, + "Malay,English,Chinese": 0.42045454545454547, + "Malay,Spanish,Filipino": 0.39204545454545453, + "Malay,Spanish,Chinese": 0.3409090909090909, + "Malay,Filipino,Chinese": 0.3352272727272727, + "English,Spanish,Filipino": 0.4943181818181818, + "English,Spanish,Chinese": 0.4090909090909091, + "English,Filipino,Chinese": 0.42045454545454547, + "Spanish,Filipino,Chinese": 0.36363636363636365 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Chinese": 0.17613636363636365, + "Vietnamese,Indonesian,English,Spanish": 0.2159090909090909, + "Vietnamese,Indonesian,English,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,English,Chinese": 0.2159090909090909, + "Vietnamese,Indonesian,Spanish,Filipino": 0.1875, + "Vietnamese,Indonesian,Spanish,Chinese": 0.18181818181818182, + "Vietnamese,Indonesian,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Malay,English,Spanish": 0.25, + "Vietnamese,Malay,English,Filipino": 0.23295454545454544, + "Vietnamese,Malay,English,Chinese": 0.23863636363636365, + "Vietnamese,Malay,Spanish,Filipino": 0.2159090909090909, + "Vietnamese,Malay,Spanish,Chinese": 0.19886363636363635, + "Vietnamese,Malay,Filipino,Chinese": 0.1875, + "Vietnamese,English,Spanish,Filipino": 0.2840909090909091, + "Vietnamese,English,Spanish,Chinese": 0.25, + "Vietnamese,English,Filipino,Chinese": 0.25, + "Vietnamese,Spanish,Filipino,Chinese": 0.21022727272727273, + "Indonesian,Malay,English,Spanish": 0.32954545454545453, + "Indonesian,Malay,English,Filipino": 0.3181818181818182, + "Indonesian,Malay,English,Chinese": 0.3181818181818182, + "Indonesian,Malay,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Malay,Spanish,Chinese": 0.26136363636363635, + "Indonesian,Malay,Filipino,Chinese": 0.24431818181818182, + "Indonesian,English,Spanish,Filipino": 0.30113636363636365, + "Indonesian,English,Spanish,Chinese": 0.2840909090909091, + "Indonesian,English,Filipino,Chinese": 0.26704545454545453, + "Indonesian,Spanish,Filipino,Chinese": 0.23295454545454544, + "Malay,English,Spanish,Filipino": 0.3465909090909091, + "Malay,English,Spanish,Chinese": 0.3181818181818182, + "Malay,English,Filipino,Chinese": 0.3068181818181818, + "Malay,Spanish,Filipino,Chinese": 0.2784090909090909, + "English,Spanish,Filipino,Chinese": 0.3352272727272727 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.16477272727272727, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14772727272727273, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.125, + "Vietnamese,Malay,English,Spanish,Filipino": 0.19318181818181818, + "Vietnamese,Malay,English,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Malay,English,Filipino,Chinese": 0.17045454545454544, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.19886363636363635, + "Indonesian,Malay,English,Spanish,Filipino": 0.2556818181818182, + "Indonesian,Malay,English,Spanish,Chinese": 0.24431818181818182, + "Indonesian,Malay,English,Filipino,Chinese": 0.22727272727272727, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.20454545454545456, + "Indonesian,English,Spanish,Filipino,Chinese": 0.2215909090909091, + "Malay,English,Spanish,Filipino,Chinese": 0.26136363636363635 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.125, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.09659090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.125, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.14772727272727273, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.19318181818181818 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.09659090909090909 + } + }, + "AC3_2": 0.3634446046243788, + "AC3_3": 0.3089741169314676, + "AC3_4": 0.2620826318244705, + "AC3_5": 0.21997755326308902, + "AC3_6": 0.1808562991687372, + "AC3_7": 0.14342286497552797 + }, + "prompt_5": { + "overall_acc": 0.28977272727272724, + "language_acc": { + "Vietnamese": 0.30113636363636365, + "Indonesian": 0.3068181818181818, + "Malay": 0.2840909090909091, + "English": 0.3125, + "Spanish": 0.2897727272727273, + "Filipino": 0.26704545454545453, + "Chinese": 0.26704545454545453 + }, + "consistency_score_2": 0.5338203463203464, + "consistency_score_3": 0.35876623376623373, + "consistency_score_4": 0.2577922077922078, + "consistency_score_5": 0.19101731601731603, + "consistency_score_6": 0.1436688311688312, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Indonesian": 0.3977272727272727, + "Vietnamese,Malay": 0.48863636363636365, + "Vietnamese,English": 0.48863636363636365, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Filipino": 0.4375, + "Vietnamese,Chinese": 0.42613636363636365, + "Indonesian,Malay": 0.5852272727272727, + "Indonesian,English": 0.6022727272727273, + "Indonesian,Spanish": 0.5397727272727273, + "Indonesian,Filipino": 0.5454545454545454, + "Indonesian,Chinese": 0.5340909090909091, + "Malay,English": 0.6193181818181818, + "Malay,Spanish": 0.5284090909090909, + "Malay,Filipino": 0.5397727272727273, + "Malay,Chinese": 0.4772727272727273, + "English,Spanish": 0.6477272727272727, + "English,Filipino": 0.6477272727272727, + "English,Chinese": 0.5568181818181818, + "Spanish,Filipino": 0.625, + "Spanish,Chinese": 0.48295454545454547, + "Filipino,Chinese": 0.5397727272727273 + }, + "3_combine": { + "Vietnamese,Indonesian,Malay": 0.2897727272727273, + "Vietnamese,Indonesian,English": 0.30113636363636365, + "Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Vietnamese,Indonesian,Chinese": 0.26136363636363635, + "Vietnamese,Malay,English": 0.3409090909090909, + "Vietnamese,Malay,Spanish": 0.3181818181818182, + "Vietnamese,Malay,Filipino": 0.2897727272727273, + "Vietnamese,Malay,Chinese": 0.2784090909090909, + "Vietnamese,English,Spanish": 0.3693181818181818, + "Vietnamese,English,Filipino": 0.32954545454545453, + "Vietnamese,English,Chinese": 0.29545454545454547, + "Vietnamese,Spanish,Filipino": 0.3409090909090909, + "Vietnamese,Spanish,Chinese": 0.2784090909090909, + "Vietnamese,Filipino,Chinese": 0.2784090909090909, + "Indonesian,Malay,English": 0.4602272727272727, + "Indonesian,Malay,Spanish": 0.38636363636363635, + "Indonesian,Malay,Filipino": 0.3977272727272727, + "Indonesian,Malay,Chinese": 0.3465909090909091, + "Indonesian,English,Spanish": 0.4431818181818182, + "Indonesian,English,Filipino": 0.4375, + "Indonesian,English,Chinese": 0.4034090909090909, + "Indonesian,Spanish,Filipino": 0.4090909090909091, + "Indonesian,Spanish,Chinese": 0.3465909090909091, + "Indonesian,Filipino,Chinese": 0.36363636363636365, + "Malay,English,Spanish": 0.4375, + "Malay,English,Filipino": 0.4431818181818182, + "Malay,English,Chinese": 0.375, + "Malay,Spanish,Filipino": 0.4090909090909091, + "Malay,Spanish,Chinese": 0.32954545454545453, + "Malay,Filipino,Chinese": 0.35795454545454547, + "English,Spanish,Filipino": 0.5113636363636364, + "English,Spanish,Chinese": 0.38636363636363635, + "English,Filipino,Chinese": 0.4090909090909091, + "Spanish,Filipino,Chinese": 0.38636363636363635 + }, + "4_combine": { + "Vietnamese,Indonesian,Malay,English": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.23295454545454544, + "Vietnamese,Indonesian,Malay,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,English,Spanish": 0.25, + "Vietnamese,Indonesian,English,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,English,Chinese": 0.20454545454545456, + "Vietnamese,Indonesian,Spanish,Filipino": 0.23295454545454544, + "Vietnamese,Indonesian,Spanish,Chinese": 0.19318181818181818, + "Vietnamese,Indonesian,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,Malay,English,Spanish": 0.26136363636363635, + "Vietnamese,Malay,English,Filipino": 0.23295454545454544, + "Vietnamese,Malay,English,Chinese": 0.2215909090909091, + "Vietnamese,Malay,Spanish,Filipino": 0.24431818181818182, + "Vietnamese,Malay,Spanish,Chinese": 0.21022727272727273, + "Vietnamese,Malay,Filipino,Chinese": 0.21022727272727273, + "Vietnamese,English,Spanish,Filipino": 0.2897727272727273, + "Vietnamese,English,Spanish,Chinese": 0.2215909090909091, + "Vietnamese,English,Filipino,Chinese": 0.20454545454545456, + "Vietnamese,Spanish,Filipino,Chinese": 0.2159090909090909, + "Indonesian,Malay,English,Spanish": 0.3465909090909091, + "Indonesian,Malay,English,Filipino": 0.3409090909090909, + "Indonesian,Malay,English,Chinese": 0.2840909090909091, + "Indonesian,Malay,Spanish,Filipino": 0.3181818181818182, + "Indonesian,Malay,Spanish,Chinese": 0.26136363636363635, + "Indonesian,Malay,Filipino,Chinese": 0.2727272727272727, + "Indonesian,English,Spanish,Filipino": 0.3522727272727273, + "Indonesian,English,Spanish,Chinese": 0.30113636363636365, + "Indonesian,English,Filipino,Chinese": 0.29545454545454547, + "Indonesian,Spanish,Filipino,Chinese": 0.2784090909090909, + "Malay,English,Spanish,Filipino": 0.3522727272727273, + "Malay,English,Spanish,Chinese": 0.2727272727272727, + "Malay,English,Filipino,Chinese": 0.2840909090909091, + "Malay,Spanish,Filipino,Chinese": 0.2784090909090909, + "English,Spanish,Filipino,Chinese": 0.3125 + }, + "5_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,English,Filipino": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,English,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Spanish,Chinese": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Indonesian,English,Spanish,Filipino": 0.19886363636363635, + "Vietnamese,Indonesian,English,Spanish,Chinese": 0.16477272727272727, + "Vietnamese,Indonesian,English,Filipino,Chinese": 0.14204545454545456, + "Vietnamese,Indonesian,Spanish,Filipino,Chinese": 0.1534090909090909, + "Vietnamese,Malay,English,Spanish,Filipino": 0.21022727272727273, + "Vietnamese,Malay,English,Spanish,Chinese": 0.17045454545454544, + "Vietnamese,Malay,English,Filipino,Chinese": 0.1590909090909091, + "Vietnamese,Malay,Spanish,Filipino,Chinese": 0.17613636363636365, + "Vietnamese,English,Spanish,Filipino,Chinese": 0.17045454545454544, + "Indonesian,Malay,English,Spanish,Filipino": 0.2784090909090909, + "Indonesian,Malay,English,Spanish,Chinese": 0.2215909090909091, + "Indonesian,Malay,English,Filipino,Chinese": 0.2215909090909091, + "Indonesian,Malay,Spanish,Filipino,Chinese": 0.2215909090909091, + "Indonesian,English,Spanish,Filipino,Chinese": 0.23863636363636365, + "Malay,English,Spanish,Filipino,Chinese": 0.22727272727272727 + }, + "6_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,English,Spanish,Chinese": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,English,Filipino,Chinese": 0.11931818181818182, + "Vietnamese,Indonesian,Malay,Spanish,Filipino,Chinese": 0.13636363636363635, + "Vietnamese,Indonesian,English,Spanish,Filipino,Chinese": 0.125, + "Vietnamese,Malay,English,Spanish,Filipino,Chinese": 0.14204545454545456, + "Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.18181818181818182 + }, + "7_combine": { + "Vietnamese,Indonesian,Malay,English,Spanish,Filipino,Chinese": 0.10795454545454546 + } + }, + "AC3_2": 0.37563836454643423, + "AC3_3": 0.3205996131033705, + "AC3_4": 0.2728485566706514, + "AC3_5": 0.23025272415531203, + "AC3_6": 0.1920965270241183, + "AC3_7": 0.15730519476564378 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39805825242718446 + }, + "prompt_2": { + "accuracy": 0.49514563106796117 + }, + "prompt_3": { + "accuracy": 0.44660194174757284 + }, + "prompt_4": { + "accuracy": 0.3592233009708738 + }, + "prompt_5": { + "accuracy": 0.4563106796116505 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.21904761904761905 + }, + "prompt_2": { + "accuracy": 0.21904761904761905 + }, + "prompt_3": { + "accuracy": 0.19047619047619047 + }, + "prompt_4": { + "accuracy": 0.21904761904761905 + }, + "prompt_5": { + "accuracy": 0.20952380952380953 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5233644859813084 + }, + "prompt_2": { + "accuracy": 0.45794392523364486 + }, + "prompt_3": { + "accuracy": 0.42990654205607476 + }, + "prompt_4": { + "accuracy": 0.4672897196261682 + }, + "prompt_5": { + "accuracy": 0.48598130841121495 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.6, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.5, + "culture": 0.3, + "film": 0.7, + "law": 0.2, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.4, + "politics": 0.5, + "culture": 0.4, + "film": 0.5, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.5, + "culture": 0.2, + "film": 0.7, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.7, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.5, + "culture": 0.0, + "film": 0.6, + "law": 0.4, + "geography": 0.7 + } + }, + "prompt_5": { + "accuracy": 0.39, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.6, + "culture": 0.3, + "film": 0.5, + "law": 0.4, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.19325979062685159 + }, + "prompt_2": { + "bleu_score": 0.19719902017779267 + }, + "prompt_3": { + "bleu_score": 0.20314300474815059 + }, + "prompt_4": { + "bleu_score": 0.19526345749622018 + }, + "prompt_5": { + "bleu_score": 0.09868754065607462 + } }, "indommlu": { "prompt_1": -1, @@ -12566,244 +111078,1819 @@ "prompt_5": -1 }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.036679748589490795 + }, + "prompt_2": { + "bleu_score": 0.04730824109026968 + }, + "prompt_3": { + "bleu_score": 0.03716223465101119 + }, + "prompt_4": { + "bleu_score": 0.03995567721020701 + }, + "prompt_5": { + "bleu_score": 0.03901277937421067 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.013686452890985893 + }, + "prompt_2": { + "bleu_score": 0.01536493026677872 + }, + "prompt_3": { + "bleu_score": 0.01063369404270174 + }, + "prompt_4": { + "bleu_score": 0.01510255811769833 + }, + "prompt_5": { + "bleu_score": 0.013364368736344938 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.04080414274998884 + }, + "prompt_2": { + "bleu_score": 0.059894275340086615 + }, + "prompt_3": { + "bleu_score": 0.04243299674633565 + }, + "prompt_4": { + "bleu_score": 0.04490739901907554 + }, + "prompt_5": { + "bleu_score": 0.04342511011220152 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.03291458077267792 + }, + "prompt_2": { + "bleu_score": 0.04186085104111973 + }, + "prompt_3": { + "bleu_score": 0.03437371102970113 + }, + "prompt_4": { + "bleu_score": 0.036268455238204725 + }, + "prompt_5": { + "bleu_score": 0.03607138242086422 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4807467911318553 + }, + "prompt_2": { + "accuracy": 0.47607934655775963 + }, + "prompt_3": { + "accuracy": 0.47724620770128356 + }, + "prompt_4": { + "accuracy": 0.5075845974329055 + }, + "prompt_5": { + "accuracy": 0.4982497082847141 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.455917053986414, + "category_acc": { + "high_school_european_history": 0.4451219512195122, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.5378787878787878, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.4729064039408867, + "high_school_physics": 0.26, + "high_school_world_history": 0.5042372881355932, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.4936708860759494, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.40404040404040403, + "high_school_biology": 0.5728155339805825, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.45806451612903226, + "professional_medicine": 0.36162361623616235, + "nutrition": 0.5540983606557377, + "global_facts": 0.24242424242424243, + "machine_learning": 0.43243243243243246, + "security_studies": 0.3524590163934426, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.4877250409165303, + "prehistory": 0.4551083591331269, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.5846153846153846, + "college_medicine": 0.43023255813953487, + "high_school_government_and_politics": 0.640625, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5679012345679012, + "high_school_geography": 0.6091370558375635, + "elementary_mathematics": 0.363395225464191, + "human_aging": 0.581081081081081, + "college_mathematics": 0.40404040404040403, + "high_school_psychology": 0.6415441176470589, + "formal_logic": 0.344, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.29739776951672864, + "high_school_computer_science": 0.47474747474747475, + "conceptual_physics": 0.45726495726495725, + "miscellaneous": 0.6112531969309463, + "high_school_chemistry": 0.31683168316831684, + "marketing": 0.7639484978540773, + "professional_law": 0.3039791258969341, + "management": 0.6568627450980392, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.5981308411214953, + "world_religions": 0.6588235294117647, + "sociology": 0.695, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.442159383033419, + "computer_security": 0.5151515151515151, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.5217391304347826, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.4900662251655629, + "college_biology": 0.4965034965034965 + } + }, + "prompt_2": { + "accuracy": 0.4519127636753665, + "category_acc": { + "high_school_european_history": 0.3780487804878049, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.553030303030303, + "medical_genetics": 0.5959595959595959, + "high_school_us_history": 0.42857142857142855, + "high_school_physics": 0.26, + "high_school_world_history": 0.4322033898305085, + "virology": 0.46060606060606063, + "high_school_microeconomics": 0.45569620253164556, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.5598705501618123, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3202846975088968, + "philosophy": 0.46774193548387094, + "professional_medicine": 0.35424354243542433, + "nutrition": 0.521311475409836, + "global_facts": 0.30303030303030304, + "machine_learning": 0.42342342342342343, + "security_studies": 0.3524590163934426, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.4795417348608838, + "prehistory": 0.4984520123839009, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.5384615384615384, + "college_medicine": 0.436046511627907, + "high_school_government_and_politics": 0.6510416666666666, + "college_chemistry": 0.31313131313131315, + "logical_fallacies": 0.5740740740740741, + "high_school_geography": 0.6243654822335025, + "elementary_mathematics": 0.35809018567639256, + "human_aging": 0.5945945945945946, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6433823529411765, + "formal_logic": 0.344, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.45726495726495725, + "miscellaneous": 0.6278772378516624, + "high_school_chemistry": 0.3316831683168317, + "marketing": 0.7639484978540773, + "professional_law": 0.29810828440965426, + "management": 0.6470588235294118, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.5794392523364486, + "world_religions": 0.6529411764705882, + "sociology": 0.68, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.46786632390745503, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.5246376811594203, + "electrical_engineering": 0.4791666666666667, + "astronomy": 0.5364238410596026, + "college_biology": 0.46153846153846156 + } + }, + "prompt_3": { + "accuracy": 0.454486950303897, + "category_acc": { + "high_school_european_history": 0.39634146341463417, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.5681818181818182, + "medical_genetics": 0.5656565656565656, + "high_school_us_history": 0.42857142857142855, + "high_school_physics": 0.25333333333333335, + "high_school_world_history": 0.423728813559322, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.47257383966244726, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.5631067961165048, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.35231316725978645, + "philosophy": 0.49032258064516127, + "professional_medicine": 0.35424354243542433, + "nutrition": 0.521311475409836, + "global_facts": 0.36363636363636365, + "machine_learning": 0.42342342342342343, + "security_studies": 0.36885245901639346, + "public_relations": 0.5045871559633027, + "professional_psychology": 0.4844517184942717, + "prehistory": 0.4984520123839009, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5153846153846153, + "college_medicine": 0.4186046511627907, + "high_school_government_and_politics": 0.640625, + "college_chemistry": 0.3333333333333333, + "logical_fallacies": 0.5679012345679012, + "high_school_geography": 0.6243654822335025, + "elementary_mathematics": 0.3448275862068966, + "human_aging": 0.5855855855855856, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6544117647058824, + "formal_logic": 0.32, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.44871794871794873, + "miscellaneous": 0.6278772378516624, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.7553648068669528, + "professional_law": 0.3039791258969341, + "management": 0.6372549019607843, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.5514018691588785, + "world_religions": 0.6294117647058823, + "sociology": 0.72, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.45758354755784064, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.2348993288590604, + "moral_disputes": 0.5014492753623189, + "electrical_engineering": 0.5, + "astronomy": 0.5165562913907285, + "college_biology": 0.4755244755244755 + } + }, + "prompt_4": { + "accuracy": 0.4616374687164819, + "category_acc": { + "high_school_european_history": 0.4451219512195122, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.5454545454545454, + "medical_genetics": 0.5555555555555556, + "high_school_us_history": 0.4876847290640394, + "high_school_physics": 0.28, + "high_school_world_history": 0.5084745762711864, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.47257383966244726, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.3333333333333333, + "high_school_biology": 0.5728155339805825, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.4838709677419355, + "professional_medicine": 0.3763837638376384, + "nutrition": 0.5540983606557377, + "global_facts": 0.3333333333333333, + "machine_learning": 0.40540540540540543, + "security_studies": 0.39344262295081966, + "public_relations": 0.47706422018348627, + "professional_psychology": 0.4877250409165303, + "prehistory": 0.5170278637770898, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.5923076923076923, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.6510416666666666, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5802469135802469, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.4005305039787798, + "human_aging": 0.5720720720720721, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.6415441176470589, + "formal_logic": 0.328, + "high_school_statistics": 0.30697674418604654, + "international_law": 0.7, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6150895140664961, + "high_school_chemistry": 0.36633663366336633, + "marketing": 0.7553648068669528, + "professional_law": 0.3020221787345075, + "management": 0.5980392156862745, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.5607476635514018, + "world_religions": 0.6470588235294118, + "sociology": 0.7, + "us_foreign_policy": 0.7272727272727273, + "high_school_macroeconomics": 0.455012853470437, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.5304347826086957, + "electrical_engineering": 0.4861111111111111, + "astronomy": 0.5231788079470199, + "college_biology": 0.5104895104895105 + } + }, + "prompt_5": { + "accuracy": 0.46485520200214514, + "category_acc": { + "high_school_european_history": 0.4573170731707317, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.5378787878787878, + "medical_genetics": 0.5757575757575758, + "high_school_us_history": 0.47783251231527096, + "high_school_physics": 0.26, + "high_school_world_history": 0.5084745762711864, + "virology": 0.4484848484848485, + "high_school_microeconomics": 0.48945147679324896, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.3838383838383838, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.3736654804270463, + "philosophy": 0.4967741935483871, + "professional_medicine": 0.3800738007380074, + "nutrition": 0.5639344262295082, + "global_facts": 0.29292929292929293, + "machine_learning": 0.4144144144144144, + "security_studies": 0.4016393442622951, + "public_relations": 0.46788990825688076, + "professional_psychology": 0.49754500818330605, + "prehistory": 0.5108359133126935, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.6, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.6770833333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6049382716049383, + "high_school_geography": 0.6395939086294417, + "elementary_mathematics": 0.3819628647214854, + "human_aging": 0.581081081081081, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.6617647058823529, + "formal_logic": 0.304, + "high_school_statistics": 0.2930232558139535, + "international_law": 0.6666666666666666, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.46464646464646464, + "conceptual_physics": 0.4444444444444444, + "miscellaneous": 0.6368286445012787, + "high_school_chemistry": 0.32673267326732675, + "marketing": 0.7467811158798283, + "professional_law": 0.30919765166340507, + "management": 0.6274509803921569, + "college_physics": 0.25742574257425743, + "jurisprudence": 0.616822429906542, + "world_religions": 0.6647058823529411, + "sociology": 0.705, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.46786632390745503, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.23825503355704697, + "moral_disputes": 0.5362318840579711, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.5231788079470199, + "college_biology": 0.5244755244755245 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2689450222882615 + }, + "prompt_2": { + "accuracy": 0.25928677563150077 + }, + "prompt_3": { + "accuracy": 0.2578008915304606 + }, + "prompt_4": { + "accuracy": 0.2674591381872214 + }, + "prompt_5": { + "accuracy": 0.2637444279346211 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.26089663760896636, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.25, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.42857142857142855, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.10344827586206896, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.16666666666666666, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.18421052631578946, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.25925925925925924, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.32, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.25, + "sports_science": 0.25, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.2222222222222222, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.19444444444444445, + "tax_accountant": 0.12962962962962962, + "physician": 0.2037037037037037 + } + }, + "prompt_2": { + "accuracy": 0.24906600249066002, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.30952380952380953, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.125, + "high_school_biology": 0.20833333333333334, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.35, + "business_administration": 0.15789473684210525, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.24489795918367346, + "high_school_politics": 0.25, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.10714285714285714, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.2962962962962963, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.32, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.3076923076923077, + "sports_science": 0.20833333333333334, + "plant_protection": 0.2962962962962963, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.2222222222222222, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.16666666666666666, + "tax_accountant": 0.16666666666666666, + "physician": 0.2222222222222222 + } + }, + "prompt_3": { + "accuracy": 0.2459526774595268, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.25, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.2857142857142857, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.13793103448275862, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.08333333333333333, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.2916666666666667, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.25, + "college_economics": 0.35, + "business_administration": 0.15789473684210525, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.2653061224489796, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.25925925925925924, + "law": 0.13793103448275862, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.32, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.28846153846153844, + "sports_science": 0.20833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.19607843137254902, + "accountant": 0.2222222222222222, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.16666666666666666, + "tax_accountant": 0.16666666666666666, + "physician": 0.24074074074074073 + } + }, + "prompt_4": { + "accuracy": 0.24346201743462018, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.35714285714285715, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.16666666666666666, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.14285714285714285, + "college_economics": 0.35, + "business_administration": 0.13157894736842105, + "marxism": 0.25, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.29411764705882354, + "teacher_qualification": 0.2857142857142857, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.25, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.2222222222222222, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.25, + "high_school_history": 0.24, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.23076923076923078, + "sports_science": 0.16666666666666666, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.125, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.21568627450980393, + "accountant": 0.18518518518518517, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.14814814814814814, + "physician": 0.2222222222222222 + } + }, + "prompt_5": { + "accuracy": 0.25404732254047324, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.25, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.38095238095238093, + "college_physics": 0.25, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.20833333333333334, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.13793103448275862, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.25, + "high_school_chemistry": 0.125, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.15384615384615385, + "middle_school_physics": 0.125, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.18421052631578946, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.30612244897959184, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.3333333333333333, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.11764705882352941, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.25925925925925924, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.25, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.36, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.21153846153846154, + "sports_science": 0.20833333333333334, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.125, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.2037037037037037, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.12962962962962962, + "physician": 0.2222222222222222 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.25806451612903225 + }, + "prompt_2": { + "accuracy": 0.24731182795698925 + }, + "prompt_3": { + "accuracy": 0.2724014336917563 + }, + "prompt_4": { + "accuracy": 0.27956989247311825 + }, + "prompt_5": { + "accuracy": 0.2616487455197133 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2672250043170437, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.2682926829268293, + "arts": 0.26875, + "astronomy": 0.2727272727272727, + "business_ethics": 0.27751196172248804, + "chinese_civil_service_exam": 0.23125, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.19626168224299065, + "chinese_history": 0.23839009287925697, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.3364485981308411, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.2037037037037037, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.2893772893772894, + "computer_science": 0.29901960784313725, + "computer_security": 0.23976608187134502, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.28776978417266186, + "economics": 0.27672955974842767, + "education": 0.26993865030674846, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.32936507936507936, + "elementary_commonsense": 0.2777777777777778, + "elementary_information_and_technology": 0.2647058823529412, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.3111111111111111, + "food_science": 0.25874125874125875, + "genetics": 0.25, + "global_facts": 0.2080536912751678, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.18292682926829268, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.27972027972027974, + "human_sexuality": 0.23809523809523808, + "international_law": 0.2756756756756757, + "journalism": 0.27325581395348836, + "jurisprudence": 0.2798053527980535, + "legal_and_moral_basis": 0.308411214953271, + "logical": 0.2764227642276423, + "machine_learning": 0.22131147540983606, + "management": 0.2619047619047619, + "marketing": 0.31666666666666665, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.3448275862068966, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.28, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.24468085106382978, + "professional_psychology": 0.2629310344827586, + "public_relations": 0.25287356321839083, + "security_study": 0.23703703703703705, + "sociology": 0.26548672566371684, + "sports_science": 0.296969696969697, + "traditional_chinese_medicine": 0.22702702702702704, + "virology": 0.2603550295857988, + "world_history": 0.2857142857142857, + "world_religions": 0.2625 + } + }, + "prompt_2": { + "accuracy": 0.25884993956138835, + "category_acc": { + "agronomy": 0.23076923076923078, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.25609756097560976, + "arts": 0.2125, + "astronomy": 0.24848484848484848, + "business_ethics": 0.23923444976076555, + "chinese_civil_service_exam": 0.2, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.19117647058823528, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.25386996904024767, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.35514018691588783, + "college_engineering_hydrology": 0.32075471698113206, + "college_law": 0.16666666666666666, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.2600732600732601, + "computer_science": 0.3088235294117647, + "computer_security": 0.26900584795321636, + "conceptual_physics": 0.2857142857142857, + "construction_project_management": 0.30935251798561153, + "economics": 0.27044025157232704, + "education": 0.24539877300613497, + "electrical_engineering": 0.27325581395348836, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.28991596638655465, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.24444444444444444, + "food_science": 0.23776223776223776, + "genetics": 0.24431818181818182, + "global_facts": 0.2080536912751678, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.25, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.20121951219512196, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.26573426573426573, + "human_sexuality": 0.2777777777777778, + "international_law": 0.24864864864864866, + "journalism": 0.29069767441860467, + "jurisprudence": 0.25790754257907544, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.2682926829268293, + "machine_learning": 0.27049180327868855, + "management": 0.2619047619047619, + "marketing": 0.31666666666666665, + "marxist_theory": 0.2857142857142857, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2896551724137931, + "philosophy": 0.29523809523809524, + "professional_accounting": 0.28, + "professional_law": 0.22274881516587677, + "professional_medicine": 0.24202127659574468, + "professional_psychology": 0.22844827586206898, + "public_relations": 0.25862068965517243, + "security_study": 0.26666666666666666, + "sociology": 0.252212389380531, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.28402366863905326, + "world_history": 0.2857142857142857, + "world_religions": 0.2125 + } + }, + "prompt_3": { + "accuracy": 0.2623899153859437, + "category_acc": { + "agronomy": 0.25443786982248523, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.24390243902439024, + "arts": 0.24375, + "astronomy": 0.24848484848484848, + "business_ethics": 0.23923444976076555, + "chinese_civil_service_exam": 0.23125, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.19117647058823528, + "chinese_foreign_policy": 0.27102803738317754, + "chinese_history": 0.26006191950464397, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.24472573839662448, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.3177570093457944, + "college_engineering_hydrology": 0.27358490566037735, + "college_law": 0.2037037037037037, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.29245283018867924, + "college_medicine": 0.26373626373626374, + "computer_science": 0.30392156862745096, + "computer_security": 0.26900584795321636, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.2733812949640288, + "economics": 0.27672955974842767, + "education": 0.25153374233128833, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.2222222222222222, + "elementary_information_and_technology": 0.2857142857142857, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.2740740740740741, + "food_science": 0.2867132867132867, + "genetics": 0.23863636363636365, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.24260355029585798, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.18902439024390244, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.21678321678321677, + "human_sexuality": 0.29365079365079366, + "international_law": 0.2864864864864865, + "journalism": 0.3023255813953488, + "jurisprudence": 0.2773722627737226, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.2682926829268293, + "machine_learning": 0.2786885245901639, + "management": 0.2619047619047619, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.2896551724137931, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.22274881516587677, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.24568965517241378, + "public_relations": 0.2413793103448276, + "security_study": 0.26666666666666666, + "sociology": 0.2610619469026549, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.21621621621621623, + "virology": 0.28402366863905326, + "world_history": 0.2732919254658385, + "world_religions": 0.2375 + } + }, + "prompt_4": { + "accuracy": 0.26221723363840443, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.2621951219512195, + "arts": 0.2625, + "astronomy": 0.24848484848484848, + "business_ethics": 0.2583732057416268, + "chinese_civil_service_exam": 0.25, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.23219814241486067, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.26256983240223464, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.23148148148148148, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.304029304029304, + "computer_science": 0.25, + "computer_security": 0.23976608187134502, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.302158273381295, + "economics": 0.27672955974842767, + "education": 0.25766871165644173, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.2896825396825397, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.27310924369747897, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.26666666666666666, + "food_science": 0.27972027972027974, + "genetics": 0.25, + "global_facts": 0.21476510067114093, + "high_school_biology": 0.25443786982248523, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.1951219512195122, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.25874125874125875, + "human_sexuality": 0.25396825396825395, + "international_law": 0.2810810810810811, + "journalism": 0.25, + "jurisprudence": 0.2846715328467153, + "legal_and_moral_basis": 0.2897196261682243, + "logical": 0.3089430894308943, + "machine_learning": 0.22950819672131148, + "management": 0.24761904761904763, + "marketing": 0.3055555555555556, + "marxist_theory": 0.2962962962962963, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2827586206896552, + "philosophy": 0.3238095238095238, + "professional_accounting": 0.2742857142857143, + "professional_law": 0.24170616113744076, + "professional_medicine": 0.23404255319148937, + "professional_psychology": 0.24568965517241378, + "public_relations": 0.25862068965517243, + "security_study": 0.2518518518518518, + "sociology": 0.28761061946902655, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.23243243243243245, + "virology": 0.28402366863905326, + "world_history": 0.2236024844720497, + "world_religions": 0.2375 + } + }, + "prompt_5": { + "accuracy": 0.2657572094629598, + "category_acc": { + "agronomy": 0.24260355029585798, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2865853658536585, + "arts": 0.25625, + "astronomy": 0.2787878787878788, + "business_ethics": 0.23923444976076555, + "chinese_civil_service_exam": 0.21875, + "chinese_driving_rule": 0.29770992366412213, + "chinese_food_culture": 0.22794117647058823, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.23219814241486067, + "chinese_literature": 0.25980392156862747, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.3177570093457944, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.2037037037037037, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.28205128205128205, + "computer_science": 0.30392156862745096, + "computer_security": 0.24561403508771928, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.33093525179856115, + "economics": 0.3081761006289308, + "education": 0.26380368098159507, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.2689075630252101, + "elementary_mathematics": 0.25217391304347825, + "ethnology": 0.25925925925925924, + "food_science": 0.3146853146853147, + "genetics": 0.2215909090909091, + "global_facts": 0.21476510067114093, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.2457627118644068, + "high_school_mathematics": 0.2073170731707317, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.2222222222222222, + "international_law": 0.3027027027027027, + "journalism": 0.3023255813953488, + "jurisprudence": 0.2846715328467153, + "legal_and_moral_basis": 0.308411214953271, + "logical": 0.2682926829268293, + "machine_learning": 0.21311475409836064, + "management": 0.24761904761904763, + "marketing": 0.29444444444444445, + "marxist_theory": 0.2698412698412698, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.32413793103448274, + "philosophy": 0.3047619047619048, + "professional_accounting": 0.2914285714285714, + "professional_law": 0.26066350710900477, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.28448275862068967, + "public_relations": 0.2413793103448276, + "security_study": 0.23703703703703705, + "sociology": 0.24336283185840707, + "sports_science": 0.2545454545454545, + "traditional_chinese_medicine": 0.24864864864864866, + "virology": 0.2603550295857988, + "world_history": 0.2795031055900621, + "world_religions": 0.25 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.15151515151515152 + }, + "prompt_2": { + "accuracy": 0.09090909090909091 + }, + "prompt_3": { + "accuracy": 0.09090909090909091 + }, + "prompt_4": { + "accuracy": 0.12121212121212122 + }, + "prompt_5": { + "accuracy": 0.12121212121212122 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.125 + }, + "prompt_2": { + "accuracy": 0.12272727272727273 + }, + "prompt_3": { + "accuracy": 0.1409090909090909 + }, + "prompt_4": { + "accuracy": 0.2818181818181818 + }, + "prompt_5": { + "accuracy": 0.29772727272727273 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3376271186440678 + }, + "prompt_2": { + "accuracy": 0.33220338983050846 + }, + "prompt_3": { + "accuracy": 0.33661016949152545 + }, + "prompt_4": { + "accuracy": 0.3494915254237288 + }, + "prompt_5": { + "accuracy": 0.3403389830508475 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.34741959611069556 + }, + "prompt_2": { + "accuracy": 0.3257292445774121 + }, + "prompt_3": { + "accuracy": 0.3182498130142109 + }, + "prompt_4": { + "accuracy": 0.35789080029917725 + }, + "prompt_5": { + "accuracy": 0.3365744203440538 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5428711415972562 + }, + "prompt_2": { + "accuracy": 0.5433610975012249 + }, + "prompt_3": { + "accuracy": 0.5276825085742283 + }, + "prompt_4": { + "accuracy": 0.5360117589416953 + }, + "prompt_5": { + "accuracy": 0.540421362077413 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.1834738916315244, + "rouge2": 0.05665372281442321, + "rougeL": 0.14171060377368988, + "avg_rouge": 0.1272794060732125 + }, + "prompt_2": { + "rouge1": 0.2032867878554549, + "rouge2": 0.06188850672720403, + "rougeL": 0.1558551917670449, + "avg_rouge": 0.1403434954499013 + }, + "prompt_3": { + "rouge1": 0.17331038811668037, + "rouge2": 0.05045380981171919, + "rougeL": 0.1330873797403758, + "avg_rouge": 0.11895052588959178 + }, + "prompt_4": { + "rouge1": 0.18607934494101233, + "rouge2": 0.056130347817802406, + "rougeL": 0.14212340633137646, + "avg_rouge": 0.12811103303006374 + }, + "prompt_5": { + "rouge1": 0.15816617371583883, + "rouge2": 0.04816674239806521, + "rougeL": 0.12182373964697867, + "avg_rouge": 0.10938555192029424 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.1617876971461384, + "rouge2": 0.03844699667915495, + "rougeL": 0.12295372467530369, + "avg_rouge": 0.10772947283353235 + }, + "prompt_2": { + "rouge1": 0.09796881476593486, + "rouge2": 0.022814276789812054, + "rougeL": 0.07132365775852015, + "avg_rouge": 0.0640355831047557 + }, + "prompt_3": { + "rouge1": 0.10265966264632512, + "rouge2": 0.025227346610254563, + "rougeL": 0.07437479368269946, + "avg_rouge": 0.06742060097975971 + }, + "prompt_4": { + "rouge1": 0.16375650195715208, + "rouge2": 0.04081621800175042, + "rougeL": 0.12385082995690865, + "avg_rouge": 0.1094745166386037 + }, + "prompt_5": { + "rouge1": 0.14604445803222393, + "rouge2": 0.03870558262534011, + "rougeL": 0.1124706671846648, + "avg_rouge": 0.09907356928074294 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.856651376146789 + }, + "prompt_2": { + "accuracy": 0.8715596330275229 + }, + "prompt_3": { + "accuracy": 0.8532110091743119 + }, + "prompt_4": { + "accuracy": 0.8669724770642202 + }, + "prompt_5": { + "accuracy": 0.8474770642201835 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5206136145733461 + }, + "prompt_2": { + "accuracy": 0.5417066155321189 + }, + "prompt_3": { + "accuracy": 0.5714285714285714 + }, + "prompt_4": { + "accuracy": 0.5848513902205177 + }, + "prompt_5": { + "accuracy": 0.5263662511984659 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5815 + }, + "prompt_2": { + "accuracy": 0.5745 + }, + "prompt_3": { + "accuracy": 0.5585 + }, + "prompt_4": { + "accuracy": 0.5395 + }, + "prompt_5": { + "accuracy": 0.549 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4275 + }, + "prompt_2": { + "accuracy": 0.421 + }, + "prompt_3": { + "accuracy": 0.382 + }, + "prompt_4": { + "accuracy": 0.4205 + }, + "prompt_5": { + "accuracy": 0.418 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.532 + }, + "prompt_2": { + "accuracy": 0.5165 + }, + "prompt_3": { + "accuracy": 0.5175 + }, + "prompt_4": { + "accuracy": 0.521 + }, + "prompt_5": { + "accuracy": 0.518 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49295774647887325 + }, + "prompt_2": { + "accuracy": 0.43661971830985913 + }, + "prompt_3": { + "accuracy": 0.4225352112676056 + }, + "prompt_4": { + "accuracy": 0.4225352112676056 + }, + "prompt_5": { + "accuracy": 0.43661971830985913 + } }, - "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "rte": { + "prompt_1": { + "accuracy": 0.5379061371841155 + }, + "prompt_2": { + "accuracy": 0.5451263537906137 + }, + "prompt_3": { + "accuracy": 0.5631768953068592 + }, + "prompt_4": { + "accuracy": 0.5342960288808665 + }, + "prompt_5": { + "accuracy": 0.555956678700361 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5857843137254902 + }, + "prompt_2": { + "accuracy": 0.5661764705882353 + }, + "prompt_3": { + "accuracy": 0.6225490196078431 + }, + "prompt_4": { + "accuracy": 0.553921568627451 + }, + "prompt_5": { + "accuracy": 0.5049019607843137 + } } }, "five_shot": { "cross_mmlu": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.26571428571428574, + "language_acc": { + "English": 0.2866666666666667, + "Vietnamese": 0.29333333333333333, + "Malay": 0.24, + "Indonesian": 0.26, + "Spanish": 0.2733333333333333, + "Chinese": 0.26666666666666666, + "Filipino": 0.24 + }, + "consistency_score_2": 0.5047619047619046, + "consistency_score_3": 0.32190476190476186, + "consistency_score_4": 0.21771428571428572, + "consistency_score_5": 0.14888888888888888, + "consistency_score_6": 0.10095238095238095, + "consistency_score_7": 0.06666666666666667, + "detailed_consistency_score": { + "2_combine": { + "English,Vietnamese": 0.42, + "English,Malay": 0.5333333333333333, + "English,Indonesian": 0.5733333333333334, + "English,Spanish": 0.58, + "English,Chinese": 0.5266666666666666, + "English,Filipino": 0.5133333333333333, + "Vietnamese,Malay": 0.4866666666666667, + "Vietnamese,Indonesian": 0.44666666666666666, + "Vietnamese,Spanish": 0.49333333333333335, + "Vietnamese,Chinese": 0.3933333333333333, + "Vietnamese,Filipino": 0.47333333333333333, + "Malay,Indonesian": 0.5266666666666666, + "Malay,Spanish": 0.49333333333333335, + "Malay,Chinese": 0.41333333333333333, + "Malay,Filipino": 0.4866666666666667, + "Indonesian,Spanish": 0.6133333333333333, + "Indonesian,Chinese": 0.54, + "Indonesian,Filipino": 0.4866666666666667, + "Spanish,Chinese": 0.5466666666666666, + "Spanish,Filipino": 0.5533333333333333, + "Chinese,Filipino": 0.5 + }, + "3_combine": { + "English,Vietnamese,Malay": 0.28, + "English,Vietnamese,Indonesian": 0.29333333333333333, + "English,Vietnamese,Spanish": 0.31333333333333335, + "English,Vietnamese,Chinese": 0.25333333333333335, + "English,Vietnamese,Filipino": 0.2733333333333333, + "English,Malay,Indonesian": 0.37333333333333335, + "English,Malay,Spanish": 0.35333333333333333, + "English,Malay,Chinese": 0.32666666666666666, + "English,Malay,Filipino": 0.31333333333333335, + "English,Indonesian,Spanish": 0.4266666666666667, + "English,Indonesian,Chinese": 0.36666666666666664, + "English,Indonesian,Filipino": 0.3466666666666667, + "English,Spanish,Chinese": 0.36666666666666664, + "English,Spanish,Filipino": 0.36666666666666664, + "English,Chinese,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese": 0.22, + "Vietnamese,Malay,Filipino": 0.28, + "Vietnamese,Indonesian,Spanish": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese": 0.28, + "Vietnamese,Indonesian,Filipino": 0.2866666666666667, + "Vietnamese,Spanish,Chinese": 0.29333333333333333, + "Vietnamese,Spanish,Filipino": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.26, + "Malay,Indonesian,Spanish": 0.36666666666666664, + "Malay,Indonesian,Chinese": 0.31333333333333335, + "Malay,Indonesian,Filipino": 0.32, + "Malay,Spanish,Chinese": 0.29333333333333333, + "Malay,Spanish,Filipino": 0.32, + "Malay,Chinese,Filipino": 0.29333333333333333, + "Indonesian,Spanish,Chinese": 0.41333333333333333, + "Indonesian,Spanish,Filipino": 0.38666666666666666, + "Indonesian,Chinese,Filipino": 0.32, + "Spanish,Chinese,Filipino": 0.3466666666666667 + }, + "4_combine": { + "English,Vietnamese,Malay,Indonesian": 0.20666666666666667, + "English,Vietnamese,Malay,Spanish": 0.19333333333333333, + "English,Vietnamese,Malay,Chinese": 0.16, + "English,Vietnamese,Malay,Filipino": 0.17333333333333334, + "English,Vietnamese,Indonesian,Spanish": 0.24666666666666667, + "English,Vietnamese,Indonesian,Chinese": 0.18666666666666668, + "English,Vietnamese,Indonesian,Filipino": 0.2, + "English,Vietnamese,Spanish,Chinese": 0.19333333333333333, + "English,Vietnamese,Spanish,Filipino": 0.23333333333333334, + "English,Vietnamese,Chinese,Filipino": 0.18, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Malay,Indonesian,Chinese": 0.24, + "English,Malay,Indonesian,Filipino": 0.22, + "English,Malay,Spanish,Chinese": 0.23333333333333334, + "English,Malay,Spanish,Filipino": 0.22, + "English,Malay,Chinese,Filipino": 0.22666666666666666, + "English,Indonesian,Spanish,Chinese": 0.3, + "English,Indonesian,Spanish,Filipino": 0.2866666666666667, + "English,Indonesian,Chinese,Filipino": 0.24, + "English,Spanish,Chinese,Filipino": 0.26, + "Vietnamese,Malay,Indonesian,Spanish": 0.22, + "Vietnamese,Malay,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Spanish,Chinese": 0.16, + "Vietnamese,Malay,Spanish,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino": 0.16, + "Vietnamese,Indonesian,Spanish,Chinese": 0.24, + "Vietnamese,Indonesian,Spanish,Filipino": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese,Filipino": 0.18, + "Vietnamese,Spanish,Chinese,Filipino": 0.20666666666666667, + "Malay,Indonesian,Spanish,Chinese": 0.23333333333333334, + "Malay,Indonesian,Spanish,Filipino": 0.24666666666666667, + "Malay,Indonesian,Chinese,Filipino": 0.20666666666666667, + "Malay,Spanish,Chinese,Filipino": 0.20666666666666667, + "Indonesian,Spanish,Chinese,Filipino": 0.25333333333333335 + }, + "5_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish": 0.16, + "English,Vietnamese,Malay,Indonesian,Chinese": 0.12, + "English,Vietnamese,Malay,Indonesian,Filipino": 0.12666666666666668, + "English,Vietnamese,Malay,Spanish,Chinese": 0.11333333333333333, + "English,Vietnamese,Malay,Spanish,Filipino": 0.14, + "English,Vietnamese,Malay,Chinese,Filipino": 0.11333333333333333, + "English,Vietnamese,Indonesian,Spanish,Chinese": 0.16, + "English,Vietnamese,Indonesian,Spanish,Filipino": 0.18666666666666668, + "English,Vietnamese,Indonesian,Chinese,Filipino": 0.13333333333333333, + "English,Vietnamese,Spanish,Chinese,Filipino": 0.15333333333333332, + "English,Malay,Indonesian,Spanish,Chinese": 0.18666666666666668, + "English,Malay,Indonesian,Spanish,Filipino": 0.17333333333333334, + "English,Malay,Indonesian,Chinese,Filipino": 0.15333333333333332, + "English,Malay,Spanish,Chinese,Filipino": 0.16, + "English,Indonesian,Spanish,Chinese,Filipino": 0.2, + "Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.11333333333333333, + "Vietnamese,Malay,Spanish,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.16, + "Malay,Indonesian,Spanish,Chinese,Filipino": 0.15333333333333332 + }, + "6_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese": 0.09333333333333334, + "English,Vietnamese,Malay,Indonesian,Spanish,Filipino": 0.11333333333333333, + "English,Vietnamese,Malay,Indonesian,Chinese,Filipino": 0.08, + "English,Vietnamese,Malay,Spanish,Chinese,Filipino": 0.08666666666666667, + "English,Vietnamese,Indonesian,Spanish,Chinese,Filipino": 0.12, + "English,Malay,Indonesian,Spanish,Chinese,Filipino": 0.12, + "Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.09333333333333334 + }, + "7_combine": { + "English,Vietnamese,Malay,Indonesian,Spanish,Chinese,Filipino": 0.06666666666666667 + } + }, + "AC3_2": 0.3481546882825545, + "AC3_3": 0.29112294507664377, + "AC3_4": 0.23933130694137442, + "AC3_5": 0.19084226641645075, + "AC3_6": 0.14631539884692327, + "AC3_7": 0.10659025784758747 + } }, "cross_logiqa": { "prompt_1": -1 }, "sg_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.20388349514563106 + } }, "cn_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.23809523809523808 + } }, "us_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3364485981308411 + } }, "ph_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.2, + "history": 0.2, + "literature": 0.3, + "politics": 0.5, + "culture": 0.2, + "film": 0.4, + "law": 0.3, + "geography": 0.3 + } + } }, "sing2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.05570283317972737 + } }, "indommlu": { "prompt_1": -1 }, "flores_ind2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.01389996930639142 + } }, "flores_vie2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.0068011457212340184 + } }, "flores_zho2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.02542052199358213 + } }, "flores_zsm2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.011344690364864867 + } }, "mmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3208868144690782 + } }, "mmlu_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2900965319985699, + "category_acc": { + "high_school_european_history": 0.3048780487804878, + "business_ethics": 0.3434343434343434, + "clinical_knowledge": 0.25757575757575757, + "medical_genetics": 0.3333333333333333, + "high_school_us_history": 0.32019704433497537, + "high_school_physics": 0.24, + "high_school_world_history": 0.3474576271186441, + "virology": 0.2909090909090909, + "high_school_microeconomics": 0.26582278481012656, + "econometrics": 0.2831858407079646, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.2524271844660194, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.23870967741935484, + "professional_medicine": 0.26199261992619927, + "nutrition": 0.32786885245901637, + "global_facts": 0.21212121212121213, + "machine_learning": 0.22522522522522523, + "security_studies": 0.27049180327868855, + "public_relations": 0.29357798165137616, + "professional_psychology": 0.32733224222585927, + "prehistory": 0.2724458204334365, + "anatomy": 0.19402985074626866, + "human_sexuality": 0.2923076923076923, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.3541666666666667, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.35802469135802467, + "high_school_geography": 0.3248730964467005, + "elementary_mathematics": 0.23872679045092837, + "human_aging": 0.34684684684684686, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.3069852941176471, + "formal_logic": 0.304, + "high_school_statistics": 0.20930232558139536, + "international_law": 0.36666666666666664, + "high_school_mathematics": 0.2825278810408922, + "high_school_computer_science": 0.29292929292929293, + "conceptual_physics": 0.28205128205128205, + "miscellaneous": 0.3145780051150895, + "high_school_chemistry": 0.2079207920792079, + "marketing": 0.38197424892703863, + "professional_law": 0.28897586431833006, + "management": 0.29411764705882354, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.35514018691588783, + "world_religions": 0.34705882352941175, + "sociology": 0.345, + "us_foreign_policy": 0.3333333333333333, + "high_school_macroeconomics": 0.29048843187660667, + "computer_security": 0.2828282828282828, + "moral_scenarios": 0.22706935123042504, + "moral_disputes": 0.3333333333333333, + "electrical_engineering": 0.3055555555555556, + "astronomy": 0.2582781456953642, + "college_biology": 0.34265734265734266 + } + } }, "c_eval": { "prompt_1": -1 }, "c_eval_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2559153175591532, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.20833333333333334, + "computer_architecture": 0.15384615384615385, + "college_programming": 0.2619047619047619, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.08333333333333333, + "high_school_chemistry": 0.25, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.2692307692307692, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.26666666666666666, + "business_administration": 0.2894736842105263, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.23529411764705882, + "teacher_qualification": 0.3673469387755102, + "high_school_politics": 0.375, + "high_school_geography": 0.25, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.3333333333333333, + "law": 0.3793103448275862, + "chinese_language_and_literature": 0.14285714285714285, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.20588235294117646, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.24, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.3076923076923077, + "sports_science": 0.08333333333333333, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.23529411764705882, + "accountant": 0.2037037037037037, + "fire_engineer": 0.1388888888888889, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.16666666666666666, + "physician": 0.18518518518518517 + } + } }, "cmmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.26523297491039427 + } }, "cmmlu_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2635986876187187, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.23780487804878048, + "arts": 0.2375, + "astronomy": 0.23636363636363636, + "business_ethics": 0.2966507177033493, + "chinese_civil_service_exam": 0.29375, + "chinese_driving_rule": 0.2900763358778626, + "chinese_food_culture": 0.16176470588235295, + "chinese_foreign_policy": 0.29906542056074764, + "chinese_history": 0.24148606811145512, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.24022346368715083, + "clinical_knowledge": 0.20675105485232068, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.27102803738317754, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.2962962962962963, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.22641509433962265, + "college_medicine": 0.2600732600732601, + "computer_science": 0.3235294117647059, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.22302158273381295, + "economics": 0.18238993710691823, + "education": 0.2822085889570552, + "electrical_engineering": 0.3081395348837209, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.26262626262626265, + "elementary_information_and_technology": 0.28991596638655465, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.21481481481481482, + "food_science": 0.2727272727272727, + "genetics": 0.32386363636363635, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.29545454545454547, + "high_school_geography": 0.2288135593220339, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.2857142857142857, + "international_law": 0.24864864864864866, + "journalism": 0.3023255813953488, + "jurisprudence": 0.2725060827250608, + "legal_and_moral_basis": 0.2803738317757009, + "logical": 0.24390243902439024, + "machine_learning": 0.27049180327868855, + "management": 0.2904761904761905, + "marketing": 0.28888888888888886, + "marxist_theory": 0.2804232804232804, + "modern_chinese": 0.1896551724137931, + "nutrition": 0.2, + "philosophy": 0.2761904761904762, + "professional_accounting": 0.28, + "professional_law": 0.23222748815165878, + "professional_medicine": 0.2393617021276596, + "professional_psychology": 0.23706896551724138, + "public_relations": 0.3218390804597701, + "security_study": 0.25925925925925924, + "sociology": 0.29646017699115046, + "sports_science": 0.2727272727272727, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.3076923076923077, + "world_history": 0.22981366459627328, + "world_religions": 0.2 + } + } }, "zbench": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + } }, "ind_emotion": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.19772727272727272 + } }, "ocnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.32372881355932204 + } }, "c3": { "prompt_1": -1 @@ -12812,34 +112899,60 @@ "prompt_1": -1 }, "samsum": { - "prompt_1": -1 + "prompt_1": { + "rouge1": 0.08369630228209772, + "rouge2": 0.020197250670869096, + "rougeL": 0.06846966598171371, + "avg_rouge": 0.05745440631156018 + } }, "dialogsum": { - "prompt_1": -1 + "prompt_1": { + "rouge1": 0.11491958059343398, + "rouge2": 0.026661245006470855, + "rougeL": 0.09373053538401332, + "avg_rouge": 0.07843712032797272 + } }, "sst2": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5034403669724771 + } }, "cola": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.37871524448705657 + } }, "qqp": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.597 + } }, "mnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3865 + } }, "qnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.492 + } }, "wnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5211267605633803 + } }, "rte": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.51985559566787 + } }, "mrpc": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.44607843137254904 + } } } }, @@ -12848,235 +112961,3194 @@ "model_link": "https://huggingface.co/google/gemma-2b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.35714285714285715, + "language_acc": { + "Vietnamese": 0.36, + "English": 0.36, + "Malay": 0.35333333333333333, + "Chinese": 0.34, + "Filipino": 0.38666666666666666, + "Indonesian": 0.36, + "Spanish": 0.34 + }, + "consistency_score_2": 0.5339682539682539, + "consistency_score_3": 0.3533333333333334, + "consistency_score_4": 0.2579047619047619, + "consistency_score_5": 0.20063492063492067, + "consistency_score_6": 0.1638095238095238, + "consistency_score_7": 0.14, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.42, + "Vietnamese,Malay": 0.48, + "Vietnamese,Chinese": 0.4266666666666667, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Spanish": 0.5066666666666667, + "English,Malay": 0.5333333333333333, + "English,Chinese": 0.52, + "English,Filipino": 0.54, + "English,Indonesian": 0.5066666666666667, + "English,Spanish": 0.6066666666666667, + "Malay,Chinese": 0.5333333333333333, + "Malay,Filipino": 0.64, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Spanish": 0.5533333333333333, + "Chinese,Filipino": 0.4533333333333333, + "Chinese,Indonesian": 0.5066666666666667, + "Chinese,Spanish": 0.54, + "Filipino,Indonesian": 0.6066666666666667, + "Filipino,Spanish": 0.64, + "Indonesian,Spanish": 0.5066666666666667 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.28, + "Vietnamese,English,Chinese": 0.26, + "Vietnamese,English,Filipino": 0.29333333333333333, + "Vietnamese,English,Indonesian": 0.29333333333333333, + "Vietnamese,English,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.37333333333333335, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Chinese,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian": 0.38, + "Vietnamese,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish": 0.34, + "English,Malay,Chinese": 0.37333333333333335, + "English,Malay,Filipino": 0.38666666666666666, + "English,Malay,Indonesian": 0.37333333333333335, + "English,Malay,Spanish": 0.3933333333333333, + "English,Chinese,Filipino": 0.31333333333333335, + "English,Chinese,Indonesian": 0.3, + "English,Chinese,Spanish": 0.38666666666666666, + "English,Filipino,Indonesian": 0.36666666666666664, + "English,Filipino,Spanish": 0.4266666666666667, + "English,Indonesian,Spanish": 0.34, + "Malay,Chinese,Filipino": 0.36666666666666664, + "Malay,Chinese,Indonesian": 0.36666666666666664, + "Malay,Chinese,Spanish": 0.37333333333333335, + "Malay,Filipino,Indonesian": 0.4666666666666667, + "Malay,Filipino,Spanish": 0.44666666666666666, + "Malay,Indonesian,Spanish": 0.3933333333333333, + "Chinese,Filipino,Indonesian": 0.3333333333333333, + "Chinese,Filipino,Spanish": 0.36666666666666664, + "Chinese,Indonesian,Spanish": 0.3466666666666667, + "Filipino,Indonesian,Spanish": 0.42 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino": 0.22, + "Vietnamese,English,Malay,Indonesian": 0.23333333333333334, + "Vietnamese,English,Malay,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,English,Chinese,Indonesian": 0.2, + "Vietnamese,English,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,English,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,English,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian,Spanish": 0.24, + "Vietnamese,Filipino,Indonesian,Spanish": 0.3, + "English,Malay,Chinese,Filipino": 0.26, + "English,Malay,Chinese,Indonesian": 0.26, + "English,Malay,Chinese,Spanish": 0.29333333333333333, + "English,Malay,Filipino,Indonesian": 0.31333333333333335, + "English,Malay,Filipino,Spanish": 0.31333333333333335, + "English,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Chinese,Filipino,Spanish": 0.28, + "English,Chinese,Indonesian,Spanish": 0.24666666666666667, + "English,Filipino,Indonesian,Spanish": 0.29333333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.3, + "Malay,Chinese,Filipino,Spanish": 0.30666666666666664, + "Malay,Chinese,Indonesian,Spanish": 0.2866666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.34, + "Chinese,Filipino,Indonesian,Spanish": 0.28 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.16, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.17333333333333334, + "Vietnamese,English,Malay,Chinese,Spanish": 0.16, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "English,Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "English,Malay,Chinese,Filipino,Spanish": 0.23333333333333334, + "English,Malay,Chinese,Indonesian,Spanish": 0.22, + "English,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.14666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14 + } + }, + "AC3_2": 0.4280116035959201, + "AC3_3": 0.355227881987535, + "AC3_4": 0.2995177630800899, + "AC3_5": 0.2569314577960376, + "AC3_6": 0.22460172364453324, + "AC3_7": 0.20114942524689522 + }, + "prompt_2": { + "overall_acc": 0.3695238095238095, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "English": 0.4666666666666667, + "Malay": 0.3333333333333333, + "Chinese": 0.3466666666666667, + "Filipino": 0.32666666666666666, + "Indonesian": 0.34, + "Spanish": 0.4 + }, + "consistency_score_2": 0.48920634920634914, + "consistency_score_3": 0.30133333333333334, + "consistency_score_4": 0.20552380952380955, + "consistency_score_5": 0.14761904761904762, + "consistency_score_6": 0.10857142857142858, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.41333333333333333, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Chinese": 0.4, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Indonesian": 0.4266666666666667, + "Vietnamese,Spanish": 0.4533333333333333, + "English,Malay": 0.52, + "English,Chinese": 0.4533333333333333, + "English,Filipino": 0.5133333333333333, + "English,Indonesian": 0.4533333333333333, + "English,Spanish": 0.5866666666666667, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.5866666666666667, + "Malay,Indonesian": 0.6066666666666667, + "Malay,Spanish": 0.5133333333333333, + "Chinese,Filipino": 0.4533333333333333, + "Chinese,Indonesian": 0.4266666666666667, + "Chinese,Spanish": 0.5066666666666667, + "Filipino,Indonesian": 0.49333333333333335, + "Filipino,Spanish": 0.5066666666666667, + "Indonesian,Spanish": 0.48 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.2866666666666667, + "Vietnamese,English,Chinese": 0.21333333333333335, + "Vietnamese,English,Filipino": 0.2866666666666667, + "Vietnamese,English,Indonesian": 0.26666666666666666, + "Vietnamese,English,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese": 0.26666666666666666, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.32, + "Vietnamese,Malay,Spanish": 0.31333333333333335, + "Vietnamese,Chinese,Filipino": 0.26, + "Vietnamese,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian": 0.28, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Indonesian,Spanish": 0.2733333333333333, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.35333333333333333, + "English,Malay,Indonesian": 0.34, + "English,Malay,Spanish": 0.3466666666666667, + "English,Chinese,Filipino": 0.26666666666666666, + "English,Chinese,Indonesian": 0.26, + "English,Chinese,Spanish": 0.30666666666666664, + "English,Filipino,Indonesian": 0.2866666666666667, + "English,Filipino,Spanish": 0.35333333333333333, + "English,Indonesian,Spanish": 0.32666666666666666, + "Malay,Chinese,Filipino": 0.32, + "Malay,Chinese,Indonesian": 0.32, + "Malay,Chinese,Spanish": 0.3333333333333333, + "Malay,Filipino,Indonesian": 0.37333333333333335, + "Malay,Filipino,Spanish": 0.35333333333333333, + "Malay,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Filipino,Indonesian": 0.2733333333333333, + "Chinese,Filipino,Spanish": 0.29333333333333333, + "Chinese,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,Spanish": 0.31333333333333335 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.16666666666666666, + "Vietnamese,English,Malay,Filipino": 0.21333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.22, + "Vietnamese,English,Malay,Spanish": 0.21333333333333335, + "Vietnamese,English,Chinese,Filipino": 0.16, + "Vietnamese,English,Chinese,Indonesian": 0.15333333333333332, + "Vietnamese,English,Chinese,Spanish": 0.16666666666666666, + "Vietnamese,English,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,English,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino": 0.20666666666666667, + "English,Malay,Chinese,Indonesian": 0.20666666666666667, + "English,Malay,Chinese,Spanish": 0.22, + "English,Malay,Filipino,Indonesian": 0.23333333333333334, + "English,Malay,Filipino,Spanish": 0.25333333333333335, + "English,Malay,Indonesian,Spanish": 0.24666666666666667, + "English,Chinese,Filipino,Indonesian": 0.16666666666666666, + "English,Chinese,Filipino,Spanish": 0.19333333333333333, + "English,Chinese,Indonesian,Spanish": 0.20666666666666667, + "English,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Malay,Chinese,Filipino,Spanish": 0.23333333333333334, + "Malay,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.24, + "Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.13333333333333333, + "Vietnamese,English,Malay,Chinese,Spanish": 0.13333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.16, + "Vietnamese,English,Malay,Filipino,Spanish": 0.16, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.10666666666666667, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.12, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.13333333333333333, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.13333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.13333333333333333, + "English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "English,Malay,Chinese,Filipino,Spanish": 0.16, + "English,Malay,Chinese,Indonesian,Spanish": 0.17333333333333334, + "English,Malay,Filipino,Indonesian,Spanish": 0.18, + "English,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.09333333333333334, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.1, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.09333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.10666666666666667, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.08 + } + }, + "AC3_2": 0.42102490973004, + "AC3_3": 0.33196289981801613, + "AC3_4": 0.2641379201105361, + "AC3_5": 0.21096202749581938, + "AC3_6": 0.1678315309836778, + "AC3_7": 0.13152542369955472 + }, + "prompt_3": { + "overall_acc": 0.36095238095238097, + "language_acc": { + "Vietnamese": 0.36, + "English": 0.46, + "Malay": 0.32666666666666666, + "Chinese": 0.34, + "Filipino": 0.32, + "Indonesian": 0.36, + "Spanish": 0.36 + }, + "consistency_score_2": 0.4714285714285714, + "consistency_score_3": 0.2857142857142857, + "consistency_score_4": 0.1952380952380952, + "consistency_score_5": 0.14285714285714285, + "consistency_score_6": 0.10952380952380951, + "consistency_score_7": 0.08666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.41333333333333333, + "Vietnamese,Malay": 0.54, + "Vietnamese,Chinese": 0.36, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Spanish": 0.4, + "English,Malay": 0.3933333333333333, + "English,Chinese": 0.4533333333333333, + "English,Filipino": 0.4666666666666667, + "English,Indonesian": 0.47333333333333333, + "English,Spanish": 0.5866666666666667, + "Malay,Chinese": 0.46, + "Malay,Filipino": 0.56, + "Malay,Indonesian": 0.66, + "Malay,Spanish": 0.38, + "Chinese,Filipino": 0.4266666666666667, + "Chinese,Indonesian": 0.4266666666666667, + "Chinese,Spanish": 0.47333333333333333, + "Filipino,Indonesian": 0.54, + "Filipino,Spanish": 0.4666666666666667, + "Indonesian,Spanish": 0.4266666666666667 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.25333333333333335, + "Vietnamese,English,Chinese": 0.22, + "Vietnamese,English,Filipino": 0.2733333333333333, + "Vietnamese,English,Indonesian": 0.26, + "Vietnamese,English,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Malay,Spanish": 0.24666666666666667, + "Vietnamese,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Chinese,Indonesian": 0.24, + "Vietnamese,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian": 0.34, + "Vietnamese,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish": 0.26, + "English,Malay,Chinese": 0.24, + "English,Malay,Filipino": 0.26666666666666666, + "English,Malay,Indonesian": 0.30666666666666664, + "English,Malay,Spanish": 0.25333333333333335, + "English,Chinese,Filipino": 0.24666666666666667, + "English,Chinese,Indonesian": 0.25333333333333335, + "English,Chinese,Spanish": 0.32666666666666666, + "English,Filipino,Indonesian": 0.31333333333333335, + "English,Filipino,Spanish": 0.34, + "English,Indonesian,Spanish": 0.3, + "Malay,Chinese,Filipino": 0.30666666666666664, + "Malay,Chinese,Indonesian": 0.35333333333333333, + "Malay,Chinese,Spanish": 0.26, + "Malay,Filipino,Indonesian": 0.4266666666666667, + "Malay,Filipino,Spanish": 0.26666666666666666, + "Malay,Indonesian,Spanish": 0.30666666666666664, + "Chinese,Filipino,Indonesian": 0.29333333333333333, + "Chinese,Filipino,Spanish": 0.26666666666666666, + "Chinese,Indonesian,Spanish": 0.26, + "Filipino,Indonesian,Spanish": 0.3 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.14666666666666667, + "Vietnamese,English,Malay,Filipino": 0.19333333333333333, + "Vietnamese,English,Malay,Indonesian": 0.21333333333333335, + "Vietnamese,English,Malay,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,English,Chinese,Indonesian": 0.14666666666666667, + "Vietnamese,English,Chinese,Spanish": 0.18, + "Vietnamese,English,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Chinese,Filipino,Indonesian": 0.18, + "Vietnamese,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Chinese,Indonesian": 0.18666666666666668, + "English,Malay,Chinese,Spanish": 0.18, + "English,Malay,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Filipino,Spanish": 0.19333333333333333, + "English,Malay,Indonesian,Spanish": 0.20666666666666667, + "English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "English,Chinese,Filipino,Spanish": 0.2, + "English,Chinese,Indonesian,Spanish": 0.18666666666666668, + "English,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Malay,Chinese,Filipino,Indonesian": 0.25333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.18, + "Malay,Chinese,Indonesian,Spanish": 0.22, + "Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.18 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.10666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.12, + "Vietnamese,English,Malay,Chinese,Spanish": 0.13333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Spanish": 0.14666666666666667, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.12, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.13333333333333333, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.12666666666666668, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668, + "English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "English,Malay,Chinese,Filipino,Spanish": 0.13333333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.16, + "English,Chinese,Filipino,Indonesian,Spanish": 0.14, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.09333333333333334, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.1, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.13333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.1, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.11333333333333333, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.11333333333333333 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.08666666666666667 + } + }, + "AC3_2": 0.40885910423332594, + "AC3_3": 0.31895644850948174, + "AC3_4": 0.25340834959566494, + "AC3_5": 0.2046988927489998, + "AC3_6": 0.16805475222955843, + "AC3_7": 0.13977304961416417 + }, + "prompt_4": { + "overall_acc": 0.3761904761904762, + "language_acc": { + "Vietnamese": 0.41333333333333333, + "English": 0.41333333333333333, + "Malay": 0.3466666666666667, + "Chinese": 0.36666666666666664, + "Filipino": 0.3333333333333333, + "Indonesian": 0.38666666666666666, + "Spanish": 0.37333333333333335 + }, + "consistency_score_2": 0.5361904761904762, + "consistency_score_3": 0.35790476190476195, + "consistency_score_4": 0.26761904761904765, + "consistency_score_5": 0.21523809523809526, + "consistency_score_6": 0.18190476190476187, + "consistency_score_7": 0.16, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.4866666666666667, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Chinese": 0.5133333333333333, + "Vietnamese,Filipino": 0.48, + "Vietnamese,Indonesian": 0.62, + "Vietnamese,Spanish": 0.5466666666666666, + "English,Malay": 0.4866666666666667, + "English,Chinese": 0.56, + "English,Filipino": 0.5466666666666666, + "English,Indonesian": 0.46, + "English,Spanish": 0.6266666666666667, + "Malay,Chinese": 0.5133333333333333, + "Malay,Filipino": 0.56, + "Malay,Indonesian": 0.68, + "Malay,Spanish": 0.5266666666666666, + "Chinese,Filipino": 0.4666666666666667, + "Chinese,Indonesian": 0.48, + "Chinese,Spanish": 0.58, + "Filipino,Indonesian": 0.5533333333333333, + "Filipino,Spanish": 0.5333333333333333, + "Indonesian,Spanish": 0.44666666666666666 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.34, + "Vietnamese,English,Chinese": 0.3466666666666667, + "Vietnamese,English,Filipino": 0.32, + "Vietnamese,English,Indonesian": 0.32666666666666666, + "Vietnamese,English,Spanish": 0.38, + "Vietnamese,Malay,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Filipino": 0.36666666666666664, + "Vietnamese,Malay,Indonesian": 0.48, + "Vietnamese,Malay,Spanish": 0.4, + "Vietnamese,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Chinese,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,Spanish": 0.35333333333333333, + "Vietnamese,Filipino,Indonesian": 0.36, + "Vietnamese,Filipino,Spanish": 0.35333333333333333, + "Vietnamese,Indonesian,Spanish": 0.36, + "English,Malay,Chinese": 0.34, + "English,Malay,Filipino": 0.36, + "English,Malay,Indonesian": 0.35333333333333333, + "English,Malay,Spanish": 0.37333333333333335, + "English,Chinese,Filipino": 0.3466666666666667, + "English,Chinese,Indonesian": 0.30666666666666664, + "English,Chinese,Spanish": 0.4266666666666667, + "English,Filipino,Indonesian": 0.32666666666666666, + "English,Filipino,Spanish": 0.38666666666666666, + "English,Indonesian,Spanish": 0.32666666666666666, + "Malay,Chinese,Filipino": 0.32, + "Malay,Chinese,Indonesian": 0.38666666666666666, + "Malay,Chinese,Spanish": 0.36, + "Malay,Filipino,Indonesian": 0.43333333333333335, + "Malay,Filipino,Spanish": 0.38666666666666666, + "Malay,Indonesian,Spanish": 0.38, + "Chinese,Filipino,Indonesian": 0.3, + "Chinese,Filipino,Spanish": 0.3466666666666667, + "Chinese,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,Spanish": 0.3333333333333333 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.26, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,English,Malay,Spanish": 0.28, + "Vietnamese,English,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,English,Chinese,Indonesian": 0.24, + "Vietnamese,English,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,English,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Indonesian": 0.3, + "Vietnamese,Malay,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Filipino,Spanish": 0.3, + "Vietnamese,Malay,Indonesian,Spanish": 0.32, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.26, + "Vietnamese,Chinese,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.25333333333333335, + "English,Malay,Chinese,Indonesian": 0.24666666666666667, + "English,Malay,Chinese,Spanish": 0.29333333333333333, + "English,Malay,Filipino,Indonesian": 0.2733333333333333, + "English,Malay,Filipino,Spanish": 0.3, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Chinese,Filipino,Indonesian": 0.23333333333333334, + "English,Chinese,Filipino,Spanish": 0.28, + "English,Chinese,Indonesian,Spanish": 0.24, + "English,Filipino,Indonesian,Spanish": 0.26, + "Malay,Chinese,Filipino,Indonesian": 0.24666666666666667, + "Malay,Chinese,Filipino,Spanish": 0.2733333333333333, + "Malay,Chinese,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.29333333333333333, + "Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.2, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "English,Malay,Chinese,Filipino,Indonesian": 0.19333333333333333, + "English,Malay,Chinese,Filipino,Spanish": 0.23333333333333334, + "English,Malay,Chinese,Indonesian,Spanish": 0.22, + "English,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16 + } + }, + "AC3_2": 0.4421612485846026, + "AC3_3": 0.36681987787525405, + "AC3_4": 0.3127500703938001, + "AC3_5": 0.27381335782503197, + "AC3_6": 0.24522996907681355, + "AC3_7": 0.2245115452512012 + }, + "prompt_5": { + "overall_acc": 0.36, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "English": 0.3933333333333333, + "Malay": 0.37333333333333335, + "Chinese": 0.34, + "Filipino": 0.31333333333333335, + "Indonesian": 0.36, + "Spanish": 0.36666666666666664 + }, + "consistency_score_2": 0.5231746031746033, + "consistency_score_3": 0.34476190476190477, + "consistency_score_4": 0.25390476190476197, + "consistency_score_5": 0.19936507936507936, + "consistency_score_6": 0.1638095238095238, + "consistency_score_7": 0.14, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.41333333333333333, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Chinese": 0.38666666666666666, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Spanish": 0.4866666666666667, + "English,Malay": 0.5333333333333333, + "English,Chinese": 0.62, + "English,Filipino": 0.49333333333333335, + "English,Indonesian": 0.4, + "English,Spanish": 0.5933333333333334, + "Malay,Chinese": 0.5, + "Malay,Filipino": 0.6133333333333333, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Spanish": 0.56, + "Chinese,Filipino": 0.46, + "Chinese,Indonesian": 0.44666666666666666, + "Chinese,Spanish": 0.5733333333333334, + "Filipino,Indonesian": 0.6133333333333333, + "Filipino,Spanish": 0.5133333333333333, + "Indonesian,Spanish": 0.47333333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.32666666666666666, + "Vietnamese,English,Chinese": 0.2866666666666667, + "Vietnamese,English,Filipino": 0.2733333333333333, + "Vietnamese,English,Indonesian": 0.24666666666666667, + "Vietnamese,English,Spanish": 0.32, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Filipino": 0.4, + "Vietnamese,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Malay,Spanish": 0.38666666666666666, + "Vietnamese,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian": 0.2866666666666667, + "Vietnamese,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,Filipino,Indonesian": 0.36, + "Vietnamese,Filipino,Spanish": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish": 0.32666666666666666, + "English,Malay,Chinese": 0.38, + "English,Malay,Filipino": 0.37333333333333335, + "English,Malay,Indonesian": 0.35333333333333333, + "English,Malay,Spanish": 0.4, + "English,Chinese,Filipino": 0.3466666666666667, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.43333333333333335, + "English,Filipino,Indonesian": 0.30666666666666664, + "English,Filipino,Spanish": 0.34, + "English,Indonesian,Spanish": 0.3, + "Malay,Chinese,Filipino": 0.3333333333333333, + "Malay,Chinese,Indonesian": 0.36666666666666664, + "Malay,Chinese,Spanish": 0.37333333333333335, + "Malay,Filipino,Indonesian": 0.47333333333333333, + "Malay,Filipino,Spanish": 0.42, + "Malay,Indonesian,Spanish": 0.42, + "Chinese,Filipino,Indonesian": 0.32, + "Chinese,Filipino,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Spanish": 0.31333333333333335, + "Filipino,Indonesian,Spanish": 0.35333333333333333 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.24, + "Vietnamese,English,Malay,Filipino": 0.23333333333333334, + "Vietnamese,English,Malay,Indonesian": 0.22666666666666666, + "Vietnamese,English,Malay,Spanish": 0.26666666666666666, + "Vietnamese,English,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,English,Chinese,Indonesian": 0.2, + "Vietnamese,English,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Filipino,Spanish": 0.22, + "Vietnamese,English,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Indonesian": 0.32, + "Vietnamese,Malay,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.29333333333333333, + "Vietnamese,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,Chinese,Indonesian,Spanish": 0.22, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24, + "English,Malay,Chinese,Filipino": 0.2733333333333333, + "English,Malay,Chinese,Indonesian": 0.28, + "English,Malay,Chinese,Spanish": 0.32, + "English,Malay,Filipino,Indonesian": 0.26666666666666666, + "English,Malay,Filipino,Spanish": 0.3, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Chinese,Filipino,Indonesian": 0.26, + "English,Chinese,Filipino,Spanish": 0.26, + "English,Chinese,Indonesian,Spanish": 0.26666666666666666, + "English,Filipino,Indonesian,Spanish": 0.24, + "Malay,Chinese,Filipino,Indonesian": 0.28, + "Malay,Chinese,Filipino,Spanish": 0.28, + "Malay,Chinese,Indonesian,Spanish": 0.29333333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.3333333333333333, + "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Spanish": 0.2, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Filipino,Spanish": 0.24, + "English,Malay,Chinese,Indonesian,Spanish": 0.24666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "English,Chinese,Filipino,Indonesian,Spanish": 0.22, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.24 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14 + } + }, + "AC3_2": 0.42651329973603463, + "AC3_3": 0.3522162161662396, + "AC3_4": 0.2977846726167121, + "AC3_5": 0.2566174800903323, + "AC3_6": 0.2251636363206506, + "AC3_7": 0.20159999995968 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.2987012987012987, + "language_acc": { + "Spanish": 0.26136363636363635, + "Chinese": 0.26136363636363635, + "Vietnamese": 0.3465909090909091, + "Indonesian": 0.2784090909090909, + "Malay": 0.3409090909090909, + "Filipino": 0.2784090909090909, + "English": 0.32386363636363635 + }, + "consistency_score_2": 0.4545454545454544, + "consistency_score_3": 0.25227272727272726, + "consistency_score_4": 0.15097402597402598, + "consistency_score_5": 0.09442640692640693, + "consistency_score_6": 0.06087662337662337, + "consistency_score_7": 0.03977272727272727, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.3806818181818182, + "Spanish,Vietnamese": 0.2897727272727273, + "Spanish,Indonesian": 0.32954545454545453, + "Spanish,Malay": 0.3465909090909091, + "Spanish,Filipino": 0.4147727272727273, + "Spanish,English": 0.5397727272727273, + "Chinese,Vietnamese": 0.3806818181818182, + "Chinese,Indonesian": 0.4375, + "Chinese,Malay": 0.4772727272727273, + "Chinese,Filipino": 0.4715909090909091, + "Chinese,English": 0.4772727272727273, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Malay": 0.4602272727272727, + "Vietnamese,Filipino": 0.5738636363636364, + "Vietnamese,English": 0.3465909090909091, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Filipino": 0.5852272727272727, + "Indonesian,English": 0.4147727272727273, + "Malay,Filipino": 0.6079545454545454, + "Malay,English": 0.4772727272727273, + "Filipino,English": 0.4147727272727273 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.16477272727272727, + "Spanish,Chinese,Indonesian": 0.16477272727272727, + "Spanish,Chinese,Malay": 0.19886363636363635, + "Spanish,Chinese,Filipino": 0.21022727272727273, + "Spanish,Chinese,English": 0.26704545454545453, + "Spanish,Vietnamese,Indonesian": 0.1534090909090909, + "Spanish,Vietnamese,Malay": 0.14204545454545456, + "Spanish,Vietnamese,Filipino": 0.19886363636363635, + "Spanish,Vietnamese,English": 0.18181818181818182, + "Spanish,Indonesian,Malay": 0.2159090909090909, + "Spanish,Indonesian,Filipino": 0.22727272727272727, + "Spanish,Indonesian,English": 0.21022727272727273, + "Spanish,Malay,Filipino": 0.2556818181818182, + "Spanish,Malay,English": 0.23863636363636365, + "Spanish,Filipino,English": 0.25, + "Chinese,Vietnamese,Indonesian": 0.23295454545454544, + "Chinese,Vietnamese,Malay": 0.23295454545454544, + "Chinese,Vietnamese,Filipino": 0.29545454545454547, + "Chinese,Vietnamese,English": 0.21022727272727273, + "Chinese,Indonesian,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino": 0.3181818181818182, + "Chinese,Indonesian,English": 0.24431818181818182, + "Chinese,Malay,Filipino": 0.3409090909090909, + "Chinese,Malay,English": 0.2840909090909091, + "Chinese,Filipino,English": 0.26136363636363635, + "Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,English": 0.20454545454545456, + "Vietnamese,Malay,Filipino": 0.3522727272727273, + "Vietnamese,Malay,English": 0.20454545454545456, + "Vietnamese,Filipino,English": 0.22727272727272727, + "Indonesian,Malay,Filipino": 0.4375, + "Indonesian,Malay,English": 0.3068181818181818, + "Indonesian,Filipino,English": 0.26136363636363635, + "Malay,Filipino,English": 0.2897727272727273 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.09659090909090909, + "Spanish,Chinese,Vietnamese,Malay": 0.09090909090909091, + "Spanish,Chinese,Vietnamese,Filipino": 0.125, + "Spanish,Chinese,Vietnamese,English": 0.11363636363636363, + "Spanish,Chinese,Indonesian,Malay": 0.125, + "Spanish,Chinese,Indonesian,Filipino": 0.125, + "Spanish,Chinese,Indonesian,English": 0.11931818181818182, + "Spanish,Chinese,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Malay,English": 0.14204545454545456, + "Spanish,Chinese,Filipino,English": 0.1534090909090909, + "Spanish,Vietnamese,Indonesian,Malay": 0.10795454545454546, + "Spanish,Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,English": 0.09090909090909091, + "Spanish,Vietnamese,Malay,Filipino": 0.13068181818181818, + "Spanish,Vietnamese,Malay,English": 0.07954545454545454, + "Spanish,Vietnamese,Filipino,English": 0.125, + "Spanish,Indonesian,Malay,Filipino": 0.18181818181818182, + "Spanish,Indonesian,Malay,English": 0.14772727272727273, + "Spanish,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,English": 0.13068181818181818, + "Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Filipino,English": 0.1534090909090909, + "Chinese,Indonesian,Malay,Filipino": 0.2556818181818182, + "Chinese,Indonesian,Malay,English": 0.19318181818181818, + "Chinese,Indonesian,Filipino,English": 0.17045454545454544, + "Chinese,Malay,Filipino,English": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,English": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.1534090909090909, + "Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Indonesian,Malay,Filipino,English": 0.2159090909090909 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.0625, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.07954545454545454, + "Spanish,Chinese,Vietnamese,Malay,English": 0.056818181818181816, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.08522727272727272, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Spanish,Chinese,Indonesian,Malay,English": 0.09090909090909091, + "Spanish,Chinese,Indonesian,Filipino,English": 0.08522727272727272, + "Spanish,Chinese,Malay,Filipino,English": 0.10227272727272728, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.10227272727272728, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.0625, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.07386363636363637, + "Spanish,Vietnamese,Malay,Filipino,English": 0.06818181818181818, + "Spanish,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.14772727272727273, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.10795454545454546, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.10227272727272728, + "Chinese,Vietnamese,Malay,Filipino,English": 0.10795454545454546, + "Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.125 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.06818181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.045454545454545456, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.05113636363636364, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.045454545454545456, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.07386363636363637, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.056818181818181816, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.08522727272727272 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.03977272727272727 + } + }, + "AC3_2": 0.36050156735025934, + "AC3_3": 0.27353082975696563, + "AC3_4": 0.20057199114110585, + "AC3_5": 0.14349174565758566, + "AC3_6": 0.10114039456599165, + "AC3_7": 0.07019838672440966 + }, + "prompt_2": { + "overall_acc": 0.3051948051948052, + "language_acc": { + "Spanish": 0.3181818181818182, + "Chinese": 0.29545454545454547, + "Vietnamese": 0.2784090909090909, + "Indonesian": 0.29545454545454547, + "Malay": 0.3125, + "Filipino": 0.3125, + "English": 0.32386363636363635 + }, + "consistency_score_2": 0.3977272727272727, + "consistency_score_3": 0.19431818181818178, + "consistency_score_4": 0.10292207792207793, + "consistency_score_5": 0.055194805194805185, + "consistency_score_6": 0.027597402597402596, + "consistency_score_7": 0.011363636363636364, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.3806818181818182, + "Spanish,Vietnamese": 0.25, + "Spanish,Indonesian": 0.4090909090909091, + "Spanish,Malay": 0.5284090909090909, + "Spanish,Filipino": 0.4090909090909091, + "Spanish,English": 0.5227272727272727, + "Chinese,Vietnamese": 0.22727272727272727, + "Chinese,Indonesian": 0.4090909090909091, + "Chinese,Malay": 0.39204545454545453, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,English": 0.39204545454545453, + "Vietnamese,Indonesian": 0.32386363636363635, + "Vietnamese,Malay": 0.29545454545454547, + "Vietnamese,Filipino": 0.2727272727272727, + "Vietnamese,English": 0.26704545454545453, + "Indonesian,Malay": 0.5227272727272727, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,English": 0.45454545454545453, + "Malay,Filipino": 0.48295454545454547, + "Malay,English": 0.4715909090909091, + "Filipino,English": 0.44886363636363635 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.07954545454545454, + "Spanish,Chinese,Indonesian": 0.18181818181818182, + "Spanish,Chinese,Malay": 0.24431818181818182, + "Spanish,Chinese,Filipino": 0.19886363636363635, + "Spanish,Chinese,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian": 0.10795454545454546, + "Spanish,Vietnamese,Malay": 0.1590909090909091, + "Spanish,Vietnamese,Filipino": 0.10795454545454546, + "Spanish,Vietnamese,English": 0.13068181818181818, + "Spanish,Indonesian,Malay": 0.2840909090909091, + "Spanish,Indonesian,Filipino": 0.22727272727272727, + "Spanish,Indonesian,English": 0.24431818181818182, + "Spanish,Malay,Filipino": 0.2727272727272727, + "Spanish,Malay,English": 0.32386363636363635, + "Spanish,Filipino,English": 0.25, + "Chinese,Vietnamese,Indonesian": 0.125, + "Chinese,Vietnamese,Malay": 0.10795454545454546, + "Chinese,Vietnamese,Filipino": 0.10227272727272728, + "Chinese,Vietnamese,English": 0.07386363636363637, + "Chinese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino": 0.23295454545454544, + "Chinese,Indonesian,English": 0.22727272727272727, + "Chinese,Malay,Filipino": 0.23863636363636365, + "Chinese,Malay,English": 0.22727272727272727, + "Chinese,Filipino,English": 0.20454545454545456, + "Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,English": 0.14204545454545456, + "Vietnamese,Malay,Filipino": 0.14204545454545456, + "Vietnamese,Malay,English": 0.14204545454545456, + "Vietnamese,Filipino,English": 0.11363636363636363, + "Indonesian,Malay,Filipino": 0.29545454545454547, + "Indonesian,Malay,English": 0.30113636363636365, + "Indonesian,Filipino,English": 0.2556818181818182, + "Malay,Filipino,English": 0.25 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.045454545454545456, + "Spanish,Chinese,Vietnamese,Malay": 0.0625, + "Spanish,Chinese,Vietnamese,Filipino": 0.045454545454545456, + "Spanish,Chinese,Vietnamese,English": 0.03977272727272727, + "Spanish,Chinese,Indonesian,Malay": 0.14204545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Spanish,Chinese,Indonesian,English": 0.13068181818181818, + "Spanish,Chinese,Malay,Filipino": 0.14772727272727273, + "Spanish,Chinese,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Malay": 0.08522727272727272, + "Spanish,Vietnamese,Indonesian,Filipino": 0.0625, + "Spanish,Vietnamese,Indonesian,English": 0.056818181818181816, + "Spanish,Vietnamese,Malay,Filipino": 0.07386363636363637, + "Spanish,Vietnamese,Malay,English": 0.09659090909090909, + "Spanish,Vietnamese,Filipino,English": 0.05113636363636364, + "Spanish,Indonesian,Malay,Filipino": 0.17613636363636365, + "Spanish,Indonesian,Malay,English": 0.1875, + "Spanish,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Malay,Filipino,English": 0.16477272727272727, + "Chinese,Vietnamese,Indonesian,Malay": 0.07954545454545454, + "Chinese,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "Chinese,Vietnamese,Indonesian,English": 0.05113636363636364, + "Chinese,Vietnamese,Malay,Filipino": 0.06818181818181818, + "Chinese,Vietnamese,Malay,English": 0.05113636363636364, + "Chinese,Vietnamese,Filipino,English": 0.03977272727272727, + "Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Malay,English": 0.1590909090909091, + "Chinese,Indonesian,Filipino,English": 0.13636363636363635, + "Chinese,Malay,Filipino,English": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,English": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,English": 0.07386363636363637, + "Vietnamese,Malay,Filipino,English": 0.0625, + "Indonesian,Malay,Filipino,English": 0.1875 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.03977272727272727, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.022727272727272728, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Malay,English": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.017045454545454544, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.09659090909090909, + "Spanish,Chinese,Indonesian,Malay,English": 0.10795454545454546, + "Spanish,Chinese,Indonesian,Filipino,English": 0.08522727272727272, + "Spanish,Chinese,Malay,Filipino,English": 0.09659090909090909, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.056818181818181816, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.05113636363636364, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.022727272727272728, + "Spanish,Vietnamese,Malay,Filipino,English": 0.028409090909090908, + "Spanish,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.0625, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.03977272727272727, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.03409090909090909, + "Chinese,Vietnamese,Malay,Filipino,English": 0.028409090909090908, + "Chinese,Indonesian,Malay,Filipino,English": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.05113636363636364 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.028409090909090908, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.022727272727272728, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.011363636363636364, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.011363636363636364, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.06818181818181818, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.022727272727272728, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.028409090909090908 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.011363636363636364 + } + }, + "AC3_2": 0.3453705647209693, + "AC3_3": 0.23745088186223393, + "AC3_4": 0.15393278158002802, + "AC3_5": 0.09348309345715401, + "AC3_6": 0.0506176749927089, + "AC3_7": 0.02191142190450015 + }, + "prompt_3": { + "overall_acc": 0.3003246753246754, + "language_acc": { + "Spanish": 0.3068181818181818, + "Chinese": 0.2840909090909091, + "Vietnamese": 0.2897727272727273, + "Indonesian": 0.2840909090909091, + "Malay": 0.3068181818181818, + "Filipino": 0.30113636363636365, + "English": 0.32954545454545453 + }, + "consistency_score_2": 0.35064935064935066, + "consistency_score_3": 0.14870129870129878, + "consistency_score_4": 0.06866883116883118, + "consistency_score_5": 0.032467532467532464, + "consistency_score_6": 0.01461038961038961, + "consistency_score_7": 0.005681818181818182, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.32386363636363635, + "Spanish,Vietnamese": 0.24431818181818182, + "Spanish,Indonesian": 0.29545454545454547, + "Spanish,Malay": 0.3693181818181818, + "Spanish,Filipino": 0.3181818181818182, + "Spanish,English": 0.4943181818181818, + "Chinese,Vietnamese": 0.26704545454545453, + "Chinese,Indonesian": 0.32954545454545453, + "Chinese,Malay": 0.3125, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,English": 0.375, + "Vietnamese,Indonesian": 0.45454545454545453, + "Vietnamese,Malay": 0.375, + "Vietnamese,Filipino": 0.375, + "Vietnamese,English": 0.26704545454545453, + "Indonesian,Malay": 0.4375, + "Indonesian,Filipino": 0.3693181818181818, + "Indonesian,English": 0.29545454545454547, + "Malay,Filipino": 0.35795454545454547, + "Malay,English": 0.39204545454545453, + "Filipino,English": 0.375 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.056818181818181816, + "Spanish,Chinese,Indonesian": 0.13068181818181818, + "Spanish,Chinese,Malay": 0.13636363636363635, + "Spanish,Chinese,Filipino": 0.13636363636363635, + "Spanish,Chinese,English": 0.20454545454545456, + "Spanish,Vietnamese,Indonesian": 0.11931818181818182, + "Spanish,Vietnamese,Malay": 0.10227272727272728, + "Spanish,Vietnamese,Filipino": 0.11931818181818182, + "Spanish,Vietnamese,English": 0.125, + "Spanish,Indonesian,Malay": 0.16477272727272727, + "Spanish,Indonesian,Filipino": 0.13068181818181818, + "Spanish,Indonesian,English": 0.13636363636363635, + "Spanish,Malay,Filipino": 0.13068181818181818, + "Spanish,Malay,English": 0.2215909090909091, + "Spanish,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian": 0.14772727272727273, + "Chinese,Vietnamese,Malay": 0.11363636363636363, + "Chinese,Vietnamese,Filipino": 0.13068181818181818, + "Chinese,Vietnamese,English": 0.08522727272727272, + "Chinese,Indonesian,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino": 0.13068181818181818, + "Chinese,Indonesian,English": 0.13636363636363635, + "Chinese,Malay,Filipino": 0.13068181818181818, + "Chinese,Malay,English": 0.17613636363636365, + "Chinese,Filipino,English": 0.14204545454545456, + "Vietnamese,Indonesian,Malay": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,English": 0.13636363636363635, + "Vietnamese,Malay,Filipino": 0.19318181818181818, + "Vietnamese,Malay,English": 0.125, + "Vietnamese,Filipino,English": 0.13636363636363635, + "Indonesian,Malay,Filipino": 0.19886363636363635, + "Indonesian,Malay,English": 0.16477272727272727, + "Indonesian,Filipino,English": 0.1534090909090909, + "Malay,Filipino,English": 0.16477272727272727 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Malay": 0.017045454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.028409090909090908, + "Spanish,Chinese,Vietnamese,English": 0.017045454545454544, + "Spanish,Chinese,Indonesian,Malay": 0.07386363636363637, + "Spanish,Chinese,Indonesian,Filipino": 0.0625, + "Spanish,Chinese,Indonesian,English": 0.07954545454545454, + "Spanish,Chinese,Malay,Filipino": 0.05113636363636364, + "Spanish,Chinese,Malay,English": 0.09659090909090909, + "Spanish,Chinese,Filipino,English": 0.09090909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "Spanish,Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "Spanish,Vietnamese,Indonesian,English": 0.03977272727272727, + "Spanish,Vietnamese,Malay,Filipino": 0.06818181818181818, + "Spanish,Vietnamese,Malay,English": 0.056818181818181816, + "Spanish,Vietnamese,Filipino,English": 0.06818181818181818, + "Spanish,Indonesian,Malay,Filipino": 0.07386363636363637, + "Spanish,Indonesian,Malay,English": 0.08522727272727272, + "Spanish,Indonesian,Filipino,English": 0.07954545454545454, + "Spanish,Malay,Filipino,English": 0.08522727272727272, + "Chinese,Vietnamese,Indonesian,Malay": 0.08522727272727272, + "Chinese,Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "Chinese,Vietnamese,Indonesian,English": 0.05113636363636364, + "Chinese,Vietnamese,Malay,Filipino": 0.0625, + "Chinese,Vietnamese,Malay,English": 0.05113636363636364, + "Chinese,Vietnamese,Filipino,English": 0.045454545454545456, + "Chinese,Indonesian,Malay,Filipino": 0.07386363636363637, + "Chinese,Indonesian,Malay,English": 0.07954545454545454, + "Chinese,Indonesian,Filipino,English": 0.06818181818181818, + "Chinese,Malay,Filipino,English": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,English": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,English": 0.08522727272727272, + "Vietnamese,Malay,Filipino,English": 0.08522727272727272, + "Indonesian,Malay,Filipino,English": 0.09090909090909091 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.017045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.011363636363636364, + "Spanish,Chinese,Vietnamese,Malay,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.011363636363636364, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.028409090909090908, + "Spanish,Chinese,Indonesian,Malay,English": 0.045454545454545456, + "Spanish,Chinese,Indonesian,Filipino,English": 0.045454545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.03977272727272727, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.05113636363636364, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.03409090909090909, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.03409090909090909, + "Spanish,Vietnamese,Malay,Filipino,English": 0.03977272727272727, + "Spanish,Indonesian,Malay,Filipino,English": 0.045454545454545456, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.045454545454545456, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.03409090909090909, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.028409090909090908, + "Chinese,Vietnamese,Malay,Filipino,English": 0.03409090909090909, + "Chinese,Indonesian,Malay,Filipino,English": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.0625 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.011363636363636364, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.005681818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.022727272727272728, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.028409090909090908, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.022727272727272728 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.005681818181818182 + } + }, + "AC3_2": 0.3235417948137895, + "AC3_3": 0.19891352316423525, + "AC3_4": 0.11177944357933094, + "AC3_5": 0.05859993663110856, + "AC3_6": 0.027865176052204437, + "AC3_7": 0.011152640459340715 + }, + "prompt_4": { + "overall_acc": 0.3319805194805195, + "language_acc": { + "Spanish": 0.32386363636363635, + "Chinese": 0.3352272727272727, + "Vietnamese": 0.3693181818181818, + "Indonesian": 0.32386363636363635, + "Malay": 0.3181818181818182, + "Filipino": 0.32954545454545453, + "English": 0.32386363636363635 + }, + "consistency_score_2": 0.50487012987013, + "consistency_score_3": 0.31233766233766236, + "consistency_score_4": 0.21298701298701297, + "consistency_score_5": 0.15503246753246755, + "consistency_score_6": 0.1176948051948052, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.5170454545454546, + "Spanish,Vietnamese": 0.36363636363636365, + "Spanish,Indonesian": 0.39204545454545453, + "Spanish,Malay": 0.38636363636363635, + "Spanish,Filipino": 0.44886363636363635, + "Spanish,English": 0.5625, + "Chinese,Vietnamese": 0.42613636363636365, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Malay": 0.5056818181818182, + "Chinese,Filipino": 0.4659090909090909, + "Chinese,English": 0.5454545454545454, + "Vietnamese,Indonesian": 0.6363636363636364, + "Vietnamese,Malay": 0.5909090909090909, + "Vietnamese,Filipino": 0.6306818181818182, + "Vietnamese,English": 0.3977272727272727, + "Indonesian,Malay": 0.6534090909090909, + "Indonesian,Filipino": 0.6193181818181818, + "Indonesian,English": 0.4602272727272727, + "Malay,Filipino": 0.6306818181818182, + "Malay,English": 0.4431818181818182, + "Filipino,English": 0.4659090909090909 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2556818181818182, + "Spanish,Chinese,Indonesian": 0.26704545454545453, + "Spanish,Chinese,Malay": 0.29545454545454547, + "Spanish,Chinese,Filipino": 0.2784090909090909, + "Spanish,Chinese,English": 0.3522727272727273, + "Spanish,Vietnamese,Indonesian": 0.26136363636363635, + "Spanish,Vietnamese,Malay": 0.2215909090909091, + "Spanish,Vietnamese,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,English": 0.23863636363636365, + "Spanish,Indonesian,Malay": 0.2727272727272727, + "Spanish,Indonesian,Filipino": 0.29545454545454547, + "Spanish,Indonesian,English": 0.2784090909090909, + "Spanish,Malay,Filipino": 0.2784090909090909, + "Spanish,Malay,English": 0.26704545454545453, + "Spanish,Filipino,English": 0.29545454545454547, + "Chinese,Vietnamese,Indonesian": 0.32386363636363635, + "Chinese,Vietnamese,Malay": 0.3125, + "Chinese,Vietnamese,Filipino": 0.3125, + "Chinese,Vietnamese,English": 0.2556818181818182, + "Chinese,Indonesian,Malay": 0.35795454545454547, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,English": 0.2897727272727273, + "Chinese,Malay,Filipino": 0.3409090909090909, + "Chinese,Malay,English": 0.3068181818181818, + "Chinese,Filipino,English": 0.30113636363636365, + "Vietnamese,Indonesian,Malay": 0.4659090909090909, + "Vietnamese,Indonesian,Filipino": 0.4659090909090909, + "Vietnamese,Indonesian,English": 0.3068181818181818, + "Vietnamese,Malay,Filipino": 0.4602272727272727, + "Vietnamese,Malay,English": 0.2556818181818182, + "Vietnamese,Filipino,English": 0.29545454545454547, + "Indonesian,Malay,Filipino": 0.4772727272727273, + "Indonesian,Malay,English": 0.3181818181818182, + "Indonesian,Filipino,English": 0.3181818181818182, + "Malay,Filipino,English": 0.3068181818181818 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Malay": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.19886363636363635, + "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Filipino,English": 0.19886363636363635, + "Spanish,Vietnamese,Indonesian,Malay": 0.1875, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Spanish,Vietnamese,Indonesian,English": 0.18181818181818182, + "Spanish,Vietnamese,Malay,Filipino": 0.18181818181818182, + "Spanish,Vietnamese,Malay,English": 0.1590909090909091, + "Spanish,Vietnamese,Filipino,English": 0.19318181818181818, + "Spanish,Indonesian,Malay,Filipino": 0.2159090909090909, + "Spanish,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Indonesian,Filipino,English": 0.19318181818181818, + "Spanish,Malay,Filipino,English": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "Chinese,Vietnamese,Indonesian,Filipino": 0.26136363636363635, + "Chinese,Vietnamese,Indonesian,English": 0.20454545454545456, + "Chinese,Vietnamese,Malay,Filipino": 0.24431818181818182, + "Chinese,Vietnamese,Malay,English": 0.19318181818181818, + "Chinese,Vietnamese,Filipino,English": 0.21022727272727273, + "Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Chinese,Indonesian,Malay,English": 0.22727272727272727, + "Chinese,Indonesian,Filipino,English": 0.21022727272727273, + "Chinese,Malay,Filipino,English": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Malay,English": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.23863636363636365, + "Vietnamese,Malay,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino,English": 0.26136363636363635 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,English": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.14772727272727273, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.14772727272727273, + "Spanish,Chinese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.13636363636363635, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.13068181818181818, + "Spanish,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.17613636363636365, + "Chinese,Vietnamese,Malay,Filipino,English": 0.16477272727272727, + "Chinese,Indonesian,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.19318181818181818 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.10227272727272728, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.10795454545454546, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.10795454545454546, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.14772727272727273 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09090909090909091 + } + }, + "AC3_2": 0.40056621356393457, + "AC3_3": 0.3218596721498547, + "AC3_4": 0.2594926669517532, + "AC3_5": 0.2113609306925313, + "AC3_6": 0.17378041629592222, + "AC3_7": 0.14273250738205706 + }, + "prompt_5": { + "overall_acc": 0.29626623376623373, + "language_acc": { + "Spanish": 0.30113636363636365, + "Chinese": 0.30113636363636365, + "Vietnamese": 0.30113636363636365, + "Indonesian": 0.26136363636363635, + "Malay": 0.2840909090909091, + "Filipino": 0.29545454545454547, + "English": 0.32954545454545453 + }, + "consistency_score_2": 0.49648268398268397, + "consistency_score_3": 0.30000000000000004, + "consistency_score_4": 0.20227272727272722, + "consistency_score_5": 0.14853896103896105, + "consistency_score_6": 0.11850649350649352, + "consistency_score_7": 0.10227272727272728, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.48863636363636365, + "Spanish,Vietnamese": 0.4659090909090909, + "Spanish,Indonesian": 0.42613636363636365, + "Spanish,Malay": 0.4431818181818182, + "Spanish,Filipino": 0.5113636363636364, + "Spanish,English": 0.5511363636363636, + "Chinese,Vietnamese": 0.375, + "Chinese,Indonesian": 0.45454545454545453, + "Chinese,Malay": 0.4318181818181818, + "Chinese,Filipino": 0.4659090909090909, + "Chinese,English": 0.5397727272727273, + "Vietnamese,Indonesian": 0.5681818181818182, + "Vietnamese,Malay": 0.6590909090909091, + "Vietnamese,Filipino": 0.6590909090909091, + "Vietnamese,English": 0.3068181818181818, + "Indonesian,Malay": 0.6704545454545454, + "Indonesian,Filipino": 0.5965909090909091, + "Indonesian,English": 0.4147727272727273, + "Malay,Filipino": 0.6818181818181818, + "Malay,English": 0.3409090909090909, + "Filipino,English": 0.375 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.25, + "Spanish,Chinese,Indonesian": 0.2727272727272727, + "Spanish,Chinese,Malay": 0.25, + "Spanish,Chinese,Filipino": 0.2840909090909091, + "Spanish,Chinese,English": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian": 0.2784090909090909, + "Spanish,Vietnamese,Malay": 0.3125, + "Spanish,Vietnamese,Filipino": 0.35795454545454547, + "Spanish,Vietnamese,English": 0.2215909090909091, + "Spanish,Indonesian,Malay": 0.32954545454545453, + "Spanish,Indonesian,Filipino": 0.3181818181818182, + "Spanish,Indonesian,English": 0.26136363636363635, + "Spanish,Malay,Filipino": 0.36363636363636365, + "Spanish,Malay,English": 0.23295454545454544, + "Spanish,Filipino,English": 0.26136363636363635, + "Chinese,Vietnamese,Indonesian": 0.2897727272727273, + "Chinese,Vietnamese,Malay": 0.2784090909090909, + "Chinese,Vietnamese,Filipino": 0.30113636363636365, + "Chinese,Vietnamese,English": 0.20454545454545456, + "Chinese,Indonesian,Malay": 0.3409090909090909, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,English": 0.29545454545454547, + "Chinese,Malay,Filipino": 0.3352272727272727, + "Chinese,Malay,English": 0.25, + "Chinese,Filipino,English": 0.2556818181818182, + "Vietnamese,Indonesian,Malay": 0.4772727272727273, + "Vietnamese,Indonesian,Filipino": 0.4318181818181818, + "Vietnamese,Indonesian,English": 0.2159090909090909, + "Vietnamese,Malay,Filipino": 0.5170454545454546, + "Vietnamese,Malay,English": 0.19886363636363635, + "Vietnamese,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino": 0.48863636363636365, + "Indonesian,Malay,English": 0.2556818181818182, + "Indonesian,Filipino,English": 0.25, + "Malay,Filipino,English": 0.25 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.1875, + "Spanish,Chinese,Vietnamese,Malay": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Filipino": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.20454545454545456, + "Spanish,Chinese,Malay,Filipino": 0.19886363636363635, + "Spanish,Chinese,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Filipino,English": 0.18181818181818182, + "Spanish,Vietnamese,Indonesian,Malay": 0.24431818181818182, + "Spanish,Vietnamese,Indonesian,Filipino": 0.25, + "Spanish,Vietnamese,Indonesian,English": 0.1534090909090909, + "Spanish,Vietnamese,Malay,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,Malay,English": 0.14204545454545456, + "Spanish,Vietnamese,Filipino,English": 0.16477272727272727, + "Spanish,Indonesian,Malay,Filipino": 0.2727272727272727, + "Spanish,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Indonesian,Filipino,English": 0.17613636363636365, + "Spanish,Malay,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay": 0.24431818181818182, + "Chinese,Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,English": 0.17613636363636365, + "Chinese,Vietnamese,Malay,Filipino": 0.24431818181818182, + "Chinese,Vietnamese,Malay,English": 0.14772727272727273, + "Chinese,Vietnamese,Filipino,English": 0.1534090909090909, + "Chinese,Indonesian,Malay,Filipino": 0.2727272727272727, + "Chinese,Indonesian,Malay,English": 0.20454545454545456, + "Chinese,Indonesian,Filipino,English": 0.19318181818181818, + "Chinese,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino": 0.3806818181818182, + "Vietnamese,Indonesian,Malay,English": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Vietnamese,Malay,Filipino,English": 0.17613636363636365, + "Indonesian,Malay,Filipino,English": 0.19318181818181818 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Malay,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.125, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Malay,Filipino,English": 0.125, + "Spanish,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.13636363636363635, + "Chinese,Vietnamese,Malay,Filipino,English": 0.125, + "Chinese,Indonesian,Malay,Filipino,English": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.14204545454545456 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.14204545454545456, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.11363636363636363, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.10227272727272728, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.11931818181818182, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11931818181818182 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.10227272727272728 + } + }, + "AC3_2": 0.3710911528276186, + "AC3_3": 0.29812142657673757, + "AC3_4": 0.2404088096376954, + "AC3_5": 0.19787124367529896, + "AC3_6": 0.16929499068274584, + "AC3_7": 0.15205517492944545 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4368932038834951 + }, + "prompt_2": { + "accuracy": 0.3300970873786408 + }, + "prompt_3": { + "accuracy": 0.3106796116504854 + }, + "prompt_4": { + "accuracy": 0.30097087378640774 + }, + "prompt_5": { + "accuracy": 0.4368932038834951 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2 + }, + "prompt_2": { + "accuracy": 0.23809523809523808 + }, + "prompt_3": { + "accuracy": 0.2857142857142857 + }, + "prompt_4": { + "accuracy": 0.24761904761904763 + }, + "prompt_5": { + "accuracy": 0.22857142857142856 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3644859813084112 + }, + "prompt_2": { + "accuracy": 0.308411214953271 + }, + "prompt_3": { + "accuracy": 0.29906542056074764 + }, + "prompt_4": { + "accuracy": 0.2897196261682243 + }, + "prompt_5": { + "accuracy": 0.3177570093457944 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.21, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.3, + "history": 0.2, + "literature": 0.2, + "politics": 0.3, + "culture": 0.2, + "film": 0.3, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_2": { + "accuracy": 0.27, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.3, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.2, + "geography": 0.3 + } + }, + "prompt_3": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.0, + "demographics": 0.2, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.5, + "culture": 0.5, + "film": 0.3, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_4": { + "accuracy": 0.25, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.1, + "history": 0.26666666666666666, + "literature": 0.0, + "politics": 0.6, + "culture": 0.4, + "film": 0.5, + "law": 0.2, + "geography": 0.2 + } + }, + "prompt_5": { + "accuracy": 0.27, + "category_acc": { + "brand": 0.1, + "demographics": 0.0, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.3, + "culture": 0.5, + "film": 0.4, + "law": 0.3, + "geography": 0.3 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.13444666259992166 + }, + "prompt_2": { + "bleu_score": 0.10881707423553691 + }, + "prompt_3": { + "bleu_score": 0.07602585561725173 + }, + "prompt_4": { + "bleu_score": 0.10747774441127018 + }, + "prompt_5": { + "bleu_score": 0.04960135353094971 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29574737966486414, + "category_acc": { + "History": 0.2791164658634538, + "Geography": 0.23265306122448978, + "Lampungic": 0.29931972789115646, + "Social science": 0.34557595993322204, + "Balinese": 0.3078556263269639, + "Makassarese": 0.3010752688172043, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.2364963503649635, + "Biology": 0.27218934911242604, + "Science": 0.3065015479876161, + "Christian religion": 0.3034825870646766, + "Art": 0.30282861896838603, + "Islam religion": 0.31436699857752487, + "Hindu religion": 0.30666666666666664, + "Madurese": 0.3220338983050847, + "Sport": 0.33783783783783783, + "Indonesian language": 0.3103985056039851, + "Physics": 0.25252525252525254, + "Minangkabau culture": 0.2914572864321608, + "Dayak language": 0.30275229357798167, + "Sociology": 0.2842741935483871, + "Economy": 0.23770491803278687, + "Sundanese": 0.30337078651685395, + "Javanese": 0.2862903225806452, + "Civic education": 0.32474964234620884 + } + }, + "prompt_2": { + "accuracy": 0.27705454302690435, + "category_acc": { + "History": 0.25100401606425704, + "Geography": 0.21836734693877552, + "Lampungic": 0.30612244897959184, + "Social science": 0.34056761268781305, + "Balinese": 0.29723991507430997, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.2916666666666667, + "Chemistry": 0.2291970802919708, + "Biology": 0.24378698224852072, + "Science": 0.27450980392156865, + "Christian religion": 0.2835820895522388, + "Art": 0.2778702163061564, + "Islam religion": 0.28733997155049784, + "Hindu religion": 0.28, + "Madurese": 0.2847457627118644, + "Sport": 0.3783783783783784, + "Indonesian language": 0.29950186799501866, + "Physics": 0.21616161616161617, + "Minangkabau culture": 0.25125628140703515, + "Dayak language": 0.22935779816513763, + "Sociology": 0.2318548387096774, + "Economy": 0.22131147540983606, + "Sundanese": 0.29472774416594644, + "Javanese": 0.2772177419354839, + "Civic education": 0.296137339055794 + } + }, + "prompt_3": { + "accuracy": 0.2833967554576407, + "category_acc": { + "History": 0.28714859437751006, + "Geography": 0.24897959183673468, + "Lampungic": 0.30612244897959184, + "Social science": 0.335559265442404, + "Balinese": 0.28874734607218683, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.22335766423357664, + "Biology": 0.26153846153846155, + "Science": 0.2714138286893705, + "Christian religion": 0.24378109452736318, + "Art": 0.26788685524126454, + "Islam religion": 0.28733997155049784, + "Hindu religion": 0.24666666666666667, + "Madurese": 0.2847457627118644, + "Sport": 0.32432432432432434, + "Indonesian language": 0.3107098381070984, + "Physics": 0.22828282828282828, + "Minangkabau culture": 0.2562814070351759, + "Dayak language": 0.22935779816513763, + "Sociology": 0.24798387096774194, + "Economy": 0.21721311475409835, + "Sundanese": 0.30423509075194466, + "Javanese": 0.2782258064516129, + "Civic education": 0.32474964234620884 + } + }, + "prompt_4": { + "accuracy": 0.2908071299819748, + "category_acc": { + "History": 0.30120481927710846, + "Geography": 0.25918367346938775, + "Lampungic": 0.29931972789115646, + "Social science": 0.35225375626043404, + "Balinese": 0.3227176220806794, + "Makassarese": 0.3064516129032258, + "Banjarese": 0.3263888888888889, + "Chemistry": 0.23065693430656933, + "Biology": 0.26153846153846155, + "Science": 0.27450980392156865, + "Christian religion": 0.34328358208955223, + "Art": 0.2961730449251248, + "Islam religion": 0.3129445234708393, + "Hindu religion": 0.31333333333333335, + "Madurese": 0.28135593220338984, + "Sport": 0.3310810810810811, + "Indonesian language": 0.3010585305105853, + "Physics": 0.24646464646464647, + "Minangkabau culture": 0.2864321608040201, + "Dayak language": 0.25688073394495414, + "Sociology": 0.28225806451612906, + "Economy": 0.24180327868852458, + "Sundanese": 0.29213483146067415, + "Javanese": 0.28125, + "Civic education": 0.3261802575107296 + } + }, + "prompt_5": { + "accuracy": 0.2861339208224848, + "category_acc": { + "History": 0.24899598393574296, + "Geography": 0.23469387755102042, + "Lampungic": 0.3197278911564626, + "Social science": 0.35392320534223703, + "Balinese": 0.3057324840764331, + "Makassarese": 0.3064516129032258, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.24087591240875914, + "Biology": 0.26745562130177514, + "Science": 0.2961816305469556, + "Christian religion": 0.3383084577114428, + "Art": 0.2911813643926789, + "Islam religion": 0.28733997155049784, + "Hindu religion": 0.3333333333333333, + "Madurese": 0.2847457627118644, + "Sport": 0.33783783783783783, + "Indonesian language": 0.3116438356164384, + "Physics": 0.24848484848484848, + "Minangkabau culture": 0.27638190954773867, + "Dayak language": 0.27522935779816515, + "Sociology": 0.2620967741935484, + "Economy": 0.20491803278688525, + "Sundanese": 0.2929991356957649, + "Javanese": 0.26814516129032256, + "Civic education": 0.28183118741058655 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.24466587100169057 + }, + "prompt_2": { + "bleu_score": 0.08501608363069053 + }, + "prompt_3": { + "bleu_score": 0.12303144338815364 + }, + "prompt_4": { + "bleu_score": 0.06867174863637551 + }, + "prompt_5": { + "bleu_score": 0.08939676338672865 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1667920055400222 + }, + "prompt_2": { + "bleu_score": 0.10244283792447458 + }, + "prompt_3": { + "bleu_score": 0.18346059882564225 + }, + "prompt_4": { + "bleu_score": 0.053284045320169914 + }, + "prompt_5": { + "bleu_score": 0.08388357106350783 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07362845589698015 + }, + "prompt_2": { + "bleu_score": 0.06935691300850896 + }, + "prompt_3": { + "bleu_score": 0.07149693910765932 + }, + "prompt_4": { + "bleu_score": 0.05131044525837725 + }, + "prompt_5": { + "bleu_score": 0.056164865879979924 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11021772578152905 + }, + "prompt_2": { + "bleu_score": 0.09813002522565224 + }, + "prompt_3": { + "bleu_score": 0.1178949163145004 + }, + "prompt_4": { + "bleu_score": 0.06753740480974892 + }, + "prompt_5": { + "bleu_score": 0.08004736830282513 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.367561260210035 + }, + "prompt_2": { + "accuracy": 0.3652275379229872 + }, + "prompt_3": { + "accuracy": 0.35122520420070014 + }, + "prompt_4": { + "accuracy": 0.3372228704784131 + }, + "prompt_5": { + "accuracy": 0.35822637106184363 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.35645334286735786, + "category_acc": { + "high_school_european_history": 0.3780487804878049, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.35984848484848486, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.3842364532019704, + "high_school_physics": 0.24, + "high_school_world_history": 0.3983050847457627, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.2869198312236287, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.20202020202020202, + "high_school_biology": 0.37216828478964403, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.38387096774193546, + "professional_medicine": 0.22878228782287824, + "nutrition": 0.43278688524590164, + "global_facts": 0.32323232323232326, + "machine_learning": 0.36936936936936937, + "security_studies": 0.44672131147540983, + "public_relations": 0.3486238532110092, + "professional_psychology": 0.36661211129296234, + "prehistory": 0.4117647058823529, + "anatomy": 0.39552238805970147, + "human_sexuality": 0.3923076923076923, + "college_medicine": 0.28488372093023256, + "high_school_government_and_politics": 0.4322916666666667, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.37037037037037035, + "high_school_geography": 0.3604060913705584, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.39338235294117646, + "formal_logic": 0.2, + "high_school_statistics": 0.22790697674418606, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.45012787723785164, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.5622317596566524, + "professional_law": 0.3333333333333333, + "management": 0.3333333333333333, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.42990654205607476, + "world_religions": 0.5058823529411764, + "sociology": 0.385, + "us_foreign_policy": 0.5454545454545454, + "high_school_macroeconomics": 0.35218508997429304, + "computer_security": 0.5050505050505051, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.3681159420289855, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.3443708609271523, + "college_biology": 0.3706293706293706 + } + }, + "prompt_2": { + "accuracy": 0.36539149088308903, + "category_acc": { + "high_school_european_history": 0.4024390243902439, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.3712121212121212, + "medical_genetics": 0.41414141414141414, + "high_school_us_history": 0.4039408866995074, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.4110169491525424, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.31223628691983124, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.20202020202020202, + "high_school_biology": 0.4110032362459547, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.2846975088967972, + "philosophy": 0.3903225806451613, + "professional_medicine": 0.23616236162361623, + "nutrition": 0.39344262295081966, + "global_facts": 0.31313131313131315, + "machine_learning": 0.2972972972972973, + "security_studies": 0.44672131147540983, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.4179566563467492, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.36923076923076925, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.4114583333333333, + "college_chemistry": 0.20202020202020202, + "logical_fallacies": 0.38271604938271603, + "high_school_geography": 0.39086294416243655, + "elementary_mathematics": 0.28116710875331563, + "human_aging": 0.4369369369369369, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.4264705882352941, + "formal_logic": 0.24, + "high_school_statistics": 0.2558139534883721, + "international_law": 0.43333333333333335, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3547008547008547, + "miscellaneous": 0.49104859335038364, + "high_school_chemistry": 0.32673267326732675, + "marketing": 0.5836909871244635, + "professional_law": 0.32289628180039137, + "management": 0.38235294117647056, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.38317757009345793, + "world_religions": 0.5058823529411764, + "sociology": 0.49, + "us_foreign_policy": 0.5959595959595959, + "high_school_macroeconomics": 0.3444730077120823, + "computer_security": 0.46464646464646464, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.4028985507246377, + "electrical_engineering": 0.4861111111111111, + "astronomy": 0.33774834437086093, + "college_biology": 0.34965034965034963 + } + }, + "prompt_3": { + "accuracy": 0.35774043618162316, + "category_acc": { + "high_school_european_history": 0.4451219512195122, + "business_ethics": 0.3333333333333333, + "clinical_knowledge": 0.3446969696969697, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.4088669950738916, + "high_school_physics": 0.26, + "high_school_world_history": 0.3728813559322034, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3291139240506329, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.3851132686084142, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.3238434163701068, + "philosophy": 0.3935483870967742, + "professional_medicine": 0.2767527675276753, + "nutrition": 0.40327868852459015, + "global_facts": 0.3333333333333333, + "machine_learning": 0.36036036036036034, + "security_studies": 0.45491803278688525, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.36824877250409166, + "prehistory": 0.4148606811145511, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.36153846153846153, + "college_medicine": 0.3488372093023256, + "high_school_government_and_politics": 0.3802083333333333, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.4074074074074074, + "high_school_geography": 0.3096446700507614, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.4099099099099099, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.39705882352941174, + "formal_logic": 0.2, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.48333333333333334, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.3974358974358974, + "miscellaneous": 0.45524296675191817, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.48497854077253216, + "professional_law": 0.31637312459230266, + "management": 0.3431372549019608, + "college_physics": 0.297029702970297, + "jurisprudence": 0.3925233644859813, + "world_religions": 0.49411764705882355, + "sociology": 0.455, + "us_foreign_policy": 0.45454545454545453, + "high_school_macroeconomics": 0.3444730077120823, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.37681159420289856, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.33112582781456956, + "college_biology": 0.36363636363636365 + } + }, + "prompt_4": { + "accuracy": 0.34944583482302466, + "category_acc": { + "high_school_european_history": 0.40853658536585363, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.375, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.4039408866995074, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.4152542372881356, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.31223628691983124, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.18181818181818182, + "high_school_biology": 0.4045307443365696, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.28113879003558717, + "philosophy": 0.41935483870967744, + "professional_medicine": 0.23985239852398524, + "nutrition": 0.39344262295081966, + "global_facts": 0.30303030303030304, + "machine_learning": 0.3333333333333333, + "security_studies": 0.4672131147540984, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.353518821603928, + "prehistory": 0.38699690402476783, + "anatomy": 0.373134328358209, + "human_sexuality": 0.3230769230769231, + "college_medicine": 0.3081395348837209, + "high_school_government_and_politics": 0.40625, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.3888888888888889, + "high_school_geography": 0.29441624365482233, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.38738738738738737, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.38786764705882354, + "formal_logic": 0.24, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.475, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.4117647058823529, + "high_school_chemistry": 0.39603960396039606, + "marketing": 0.4892703862660944, + "professional_law": 0.3242009132420091, + "management": 0.3431372549019608, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.40186915887850466, + "world_religions": 0.48823529411764705, + "sociology": 0.385, + "us_foreign_policy": 0.494949494949495, + "high_school_macroeconomics": 0.3213367609254499, + "computer_security": 0.48484848484848486, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.3072463768115942, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.3509933774834437, + "college_biology": 0.3986013986013986 + } + }, + "prompt_5": { + "accuracy": 0.36396138720057203, + "category_acc": { + "high_school_european_history": 0.43902439024390244, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.39015151515151514, + "medical_genetics": 0.36363636363636365, + "high_school_us_history": 0.3891625615763547, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.4491525423728814, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3291139240506329, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.20202020202020202, + "high_school_biology": 0.39158576051779936, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.4129032258064516, + "professional_medicine": 0.25461254612546125, + "nutrition": 0.4131147540983607, + "global_facts": 0.29292929292929293, + "machine_learning": 0.32432432432432434, + "security_studies": 0.4713114754098361, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.3567921440261866, + "prehistory": 0.4117647058823529, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.4270833333333333, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.37037037037037035, + "high_school_geography": 0.3553299492385787, + "elementary_mathematics": 0.29973474801061006, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.4338235294117647, + "formal_logic": 0.24, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.475, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.4616368286445013, + "high_school_chemistry": 0.38613861386138615, + "marketing": 0.5407725321888412, + "professional_law": 0.319634703196347, + "management": 0.37254901960784315, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.5588235294117647, + "sociology": 0.43, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.3264781491002571, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.3652173913043478, + "electrical_engineering": 0.4305555555555556, + "astronomy": 0.40397350993377484, + "college_biology": 0.34265734265734266 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.29420505200594355 + }, + "prompt_2": { + "accuracy": 0.2674591381872214 + }, + "prompt_3": { + "accuracy": 0.287518573551263 + }, + "prompt_4": { + "accuracy": 0.3031203566121842 + }, + "prompt_5": { + "accuracy": 0.2511144130757801 + } }, "c_eval_full": { "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_2": { + "accuracy": 0.2671232876712329, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2619047619047619, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.14285714285714285, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.25, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.16666666666666666, + "business_administration": 0.23684210526315788, + "marxism": 0.375, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.125, + "logic": 0.2222222222222222, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.375, + "high_school_history": 0.12, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.34615384615384615, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.2777777777777778, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.24074074074074073, + "physician": 0.3148148148148148 + } + }, + "prompt_3": { + "accuracy": 0.27895392278953923, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2857142857142857, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.25, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.25, + "college_economics": 0.31666666666666665, + "business_administration": 0.34210526315789475, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.11538461538461539, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.2916666666666667, + "logic": 0.25925925925925924, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.32, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.28846153846153844, + "sports_science": 0.375, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.27450980392156865, + "accountant": 0.3333333333333333, + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.16666666666666666, + "physician": 0.2222222222222222 + } + }, + "prompt_4": { + "accuracy": 0.28019925280199254, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.5, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.23809523809523808, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.1724137931034483, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.041666666666666664, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.3, + "business_administration": 0.23684210526315788, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.25, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.2692307692307692, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.10714285714285714, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.28846153846153844, + "sports_science": 0.20833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.2777777777777778, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.14814814814814814, + "physician": 0.25925925925925924 + } + }, + "prompt_5": { + "accuracy": 0.25965130759651306, + "category_acc": { + "computer_network": 0.16666666666666666, + "operating_system": 0.25, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.21428571428571427, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3103448275862069, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.125, + "high_school_chemistry": 0.125, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.34210526315789475, + "marxism": 0.375, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.375, + "high_school_geography": 0.125, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.125, + "logic": 0.25925925925925924, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.25, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.4, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.2692307692307692, + "sports_science": 0.3333333333333333, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.1568627450980392, + "accountant": 0.35185185185185186, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.1111111111111111, + "physician": 0.16666666666666666 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23297491039426524 + }, + "prompt_2": { + "accuracy": 0.25448028673835127 + }, + "prompt_3": { + "accuracy": 0.25448028673835127 + }, + "prompt_4": { + "accuracy": 0.3118279569892473 + }, + "prompt_5": { + "accuracy": 0.2974910394265233 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2617855292695562, + "category_acc": { + "agronomy": 0.2485207100591716, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.21951219512195122, + "arts": 0.25, + "astronomy": 0.24242424242424243, + "business_ethics": 0.2535885167464115, + "chinese_civil_service_exam": 0.2125, + "chinese_driving_rule": 0.2595419847328244, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.2631578947368421, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.20675105485232068, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.2358490566037736, + "college_law": 0.2037037037037037, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.2830188679245283, + "college_medicine": 0.2857142857142857, + "computer_science": 0.30392156862745096, + "computer_security": 0.21052631578947367, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.33093525179856115, + "economics": 0.2641509433962264, + "education": 0.2883435582822086, + "electrical_engineering": 0.29069767441860467, + "elementary_chinese": 0.21428571428571427, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.23109243697478993, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.26666666666666666, + "food_science": 0.2867132867132867, + "genetics": 0.2556818181818182, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.20121951219512196, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.23076923076923078, + "human_sexuality": 0.2777777777777778, + "international_law": 0.2972972972972973, + "journalism": 0.31976744186046513, + "jurisprudence": 0.2360097323600973, + "legal_and_moral_basis": 0.3037383177570093, + "logical": 0.2682926829268293, + "machine_learning": 0.2786885245901639, + "management": 0.23333333333333334, + "marketing": 0.25, + "marxist_theory": 0.2751322751322751, + "modern_chinese": 0.19827586206896552, + "nutrition": 0.25517241379310346, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.27155172413793105, + "public_relations": 0.29310344827586204, + "security_study": 0.2814814814814815, + "sociology": 0.252212389380531, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.28994082840236685, + "world_history": 0.2795031055900621, + "world_religions": 0.25625 + } + }, + "prompt_2": { + "accuracy": 0.2583318943187705, + "category_acc": { + "agronomy": 0.2603550295857988, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.21951219512195122, + "arts": 0.2375, + "astronomy": 0.24242424242424243, + "business_ethics": 0.3014354066985646, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.25386996904024767, + "chinese_literature": 0.23529411764705882, + "chinese_teacher_qualification": 0.24022346368715083, + "clinical_knowledge": 0.27848101265822783, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.1588785046728972, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.25925925925925924, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.2490842490842491, + "computer_science": 0.27941176470588236, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2446043165467626, + "economics": 0.3081761006289308, + "education": 0.22699386503067484, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.23015873015873015, + "elementary_commonsense": 0.2878787878787879, + "elementary_information_and_technology": 0.23949579831932774, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.24444444444444444, + "food_science": 0.3076923076923077, + "genetics": 0.26136363636363635, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.15254237288135594, + "high_school_mathematics": 0.25, + "high_school_physics": 0.22727272727272727, + "high_school_politics": 0.3076923076923077, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2810810810810811, + "journalism": 0.25, + "jurisprudence": 0.22871046228710462, + "legal_and_moral_basis": 0.24766355140186916, + "logical": 0.2601626016260163, + "machine_learning": 0.319672131147541, + "management": 0.2571428571428571, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2962962962962963, + "modern_chinese": 0.25, + "nutrition": 0.25517241379310346, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.22988505747126436, + "security_study": 0.21481481481481482, + "sociology": 0.2168141592920354, + "sports_science": 0.30303030303030304, + "traditional_chinese_medicine": 0.21081081081081082, + "virology": 0.26627218934911245, + "world_history": 0.32298136645962733, + "world_religions": 0.25625 + } + }, + "prompt_3": { + "accuracy": 0.26817475392850976, + "category_acc": { + "agronomy": 0.27218934911242604, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.27439024390243905, + "arts": 0.25, + "astronomy": 0.22424242424242424, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.3969465648854962, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.26625386996904027, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.26256983240223464, + "clinical_knowledge": 0.20675105485232068, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.2523364485981308, + "college_engineering_hydrology": 0.2358490566037736, + "college_law": 0.2962962962962963, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.28205128205128205, + "computer_science": 0.29901960784313725, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.2949640287769784, + "economics": 0.3270440251572327, + "education": 0.20245398773006135, + "electrical_engineering": 0.28488372093023256, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.2773109243697479, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2740740740740741, + "food_science": 0.34265734265734266, + "genetics": 0.21022727272727273, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.3146853146853147, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2918918918918919, + "journalism": 0.2558139534883721, + "jurisprudence": 0.22384428223844283, + "legal_and_moral_basis": 0.2523364485981308, + "logical": 0.2682926829268293, + "machine_learning": 0.29508196721311475, + "management": 0.2714285714285714, + "marketing": 0.23333333333333334, + "marxist_theory": 0.328042328042328, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2896551724137931, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.30851063829787234, + "professional_psychology": 0.28879310344827586, + "public_relations": 0.27586206896551724, + "security_study": 0.1925925925925926, + "sociology": 0.2743362831858407, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.26627218934911245, + "world_history": 0.2546583850931677, + "world_religions": 0.2875 + } + }, + "prompt_4": { + "accuracy": 0.27689518217924364, + "category_acc": { + "agronomy": 0.22485207100591717, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.23780487804878048, + "arts": 0.23125, + "astronomy": 0.24242424242424243, + "business_ethics": 0.3062200956937799, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.3053435114503817, + "chinese_food_culture": 0.22794117647058823, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.30030959752321984, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.2777777777777778, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.28205128205128205, + "computer_science": 0.31862745098039214, + "computer_security": 0.23391812865497075, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.26618705035971224, + "economics": 0.3270440251572327, + "education": 0.3558282208588957, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.2222222222222222, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.3025210084033613, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.2518518518518518, + "food_science": 0.3006993006993007, + "genetics": 0.2727272727272727, + "global_facts": 0.31543624161073824, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3006993006993007, + "human_sexuality": 0.25396825396825395, + "international_law": 0.2756756756756757, + "journalism": 0.29069767441860467, + "jurisprudence": 0.24330900243309003, + "legal_and_moral_basis": 0.37850467289719625, + "logical": 0.2764227642276423, + "machine_learning": 0.26229508196721313, + "management": 0.2857142857142857, + "marketing": 0.32222222222222224, + "marxist_theory": 0.291005291005291, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.3103448275862069, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.28, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.2632978723404255, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.3390804597701149, + "security_study": 0.2740740740740741, + "sociology": 0.252212389380531, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.2958579881656805, + "world_history": 0.2670807453416149, + "world_religions": 0.23125 + } + }, + "prompt_5": { + "accuracy": 0.2729235019858401, + "category_acc": { + "agronomy": 0.26627218934911245, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.23170731707317074, + "arts": 0.24375, + "astronomy": 0.23636363636363636, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.3435114503816794, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.29102167182662536, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.21495327102803738, + "college_engineering_hydrology": 0.2169811320754717, + "college_law": 0.21296296296296297, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.326007326007326, + "computer_science": 0.3284313725490196, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2949640287769784, + "economics": 0.2578616352201258, + "education": 0.24539877300613497, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.22268907563025211, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.23703703703703705, + "food_science": 0.27972027972027974, + "genetics": 0.2897727272727273, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.3136094674556213, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.2619047619047619, + "international_law": 0.34054054054054056, + "journalism": 0.27906976744186046, + "jurisprudence": 0.26763990267639903, + "legal_and_moral_basis": 0.35514018691588783, + "logical": 0.3008130081300813, + "machine_learning": 0.28688524590163933, + "management": 0.21428571428571427, + "marketing": 0.28888888888888886, + "marxist_theory": 0.30687830687830686, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.27586206896551724, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.28, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.28191489361702127, + "professional_psychology": 0.3017241379310345, + "public_relations": 0.26436781609195403, + "security_study": 0.3111111111111111, + "sociology": 0.26991150442477874, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.3254437869822485, + "world_history": 0.2919254658385093, + "world_religions": 0.2625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.15151515151515152 + }, + "prompt_4": { + "accuracy": 0.24242424242424243 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.23863636363636365 + }, + "prompt_2": { + "accuracy": 0.14545454545454545 + }, + "prompt_3": { + "accuracy": 0.17272727272727273 + }, + "prompt_4": { + "accuracy": 0.24772727272727274 + }, + "prompt_5": { + "accuracy": 0.2545454545454545 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3149152542372881 + }, + "prompt_2": { + "accuracy": 0.31864406779661014 + }, + "prompt_3": { + "accuracy": 0.30847457627118646 + }, + "prompt_4": { + "accuracy": 0.30135593220338985 + }, + "prompt_5": { + "accuracy": 0.31559322033898307 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3582647718773373 + }, + "prompt_2": { + "accuracy": 0.36200448765893795 + }, + "prompt_3": { + "accuracy": 0.3556469708302169 + }, + "prompt_4": { + "accuracy": 0.3582647718773373 + }, + "prompt_5": { + "accuracy": 0.3631264023934181 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49044585987261147 + }, + "prompt_2": { + "accuracy": 0.4679078882900539 + }, + "prompt_3": { + "accuracy": 0.48799608035276826 + }, + "prompt_4": { + "accuracy": 0.4977951984321411 + }, + "prompt_5": { + "accuracy": 0.5036746692797648 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.23810951058317034, + "rouge2": 0.0857002142135992, + "rougeL": 0.1885274416117672, + "avg_rouge": 0.17077905546951225 + }, + "prompt_2": { + "rouge1": 0.250523637973491, + "rouge2": 0.089277880693473, + "rougeL": 0.19747352513811894, + "avg_rouge": 0.17909168126836095 + }, + "prompt_3": { + "rouge1": 0.20650395584271117, + "rouge2": 0.06638490581079505, + "rougeL": 0.1627505483788398, + "avg_rouge": 0.14521313667744867 + }, + "prompt_4": { + "rouge1": 0.2732959822452368, + "rouge2": 0.10311655456011995, + "rougeL": 0.21324113219553317, + "avg_rouge": 0.19655122300029668 + }, + "prompt_5": { + "rouge1": 0.2701425342096417, + "rouge2": 0.10132507177481839, + "rougeL": 0.21413842412577347, + "avg_rouge": 0.1952020100367445 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.21296063981820385, + "rouge2": 0.059261153885824085, + "rougeL": 0.16162505046260914, + "avg_rouge": 0.14461561472221238 + }, + "prompt_2": { + "rouge1": 0.21047103992181337, + "rouge2": 0.057955935982241955, + "rougeL": 0.15820991924545352, + "avg_rouge": 0.14221229838316962 + }, + "prompt_3": { + "rouge1": 0.20411808050595426, + "rouge2": 0.056710375076034165, + "rougeL": 0.15459172140442987, + "avg_rouge": 0.1384733923288061 + }, + "prompt_4": { + "rouge1": 0.2228297744624097, + "rouge2": 0.06374052414669532, + "rougeL": 0.1659809274302705, + "avg_rouge": 0.15085040867979183 + }, + "prompt_5": { + "rouge1": 0.21136870425728, + "rouge2": 0.05689872790476053, + "rougeL": 0.15741589496925315, + "avg_rouge": 0.1418944423770979 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6743119266055045 + }, + "prompt_2": { + "accuracy": 0.5424311926605505 + }, + "prompt_3": { + "accuracy": 0.6100917431192661 + }, + "prompt_4": { + "accuracy": 0.5286697247706422 + }, + "prompt_5": { + "accuracy": 0.841743119266055 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6414189837008629 + }, + "prompt_2": { + "accuracy": 0.675934803451582 + }, + "prompt_3": { + "accuracy": 0.6596356663470757 + }, + "prompt_4": { + "accuracy": 0.5110258868648131 + }, + "prompt_5": { + "accuracy": 0.6222435282837967 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.414 + }, + "prompt_2": { + "accuracy": 0.471 + }, + "prompt_3": { + "accuracy": 0.5035 + }, + "prompt_4": { + "accuracy": 0.4315 + }, + "prompt_5": { + "accuracy": 0.4255 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3235 + }, + "prompt_2": { + "accuracy": 0.325 + }, + "prompt_3": { + "accuracy": 0.322 + }, + "prompt_4": { + "accuracy": 0.333 + }, + "prompt_5": { + "accuracy": 0.33 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.511 + }, + "prompt_2": { + "accuracy": 0.542 + }, + "prompt_3": { + "accuracy": 0.516 + }, + "prompt_4": { + "accuracy": 0.5275 + }, + "prompt_5": { + "accuracy": 0.5215 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4507042253521127 + }, + "prompt_2": { + "accuracy": 0.647887323943662 + }, + "prompt_3": { + "accuracy": 0.4647887323943662 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.43661971830985913 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5090252707581228 + }, + "prompt_2": { + "accuracy": 0.5342960288808665 + }, + "prompt_3": { + "accuracy": 0.5306859205776173 + }, + "prompt_4": { + "accuracy": 0.5306859205776173 + }, + "prompt_5": { + "accuracy": 0.5090252707581228 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49754901960784315 + }, + "prompt_2": { + "accuracy": 0.4632352941176471 + }, + "prompt_3": { + "accuracy": 0.4485294117647059 + }, + "prompt_4": { + "accuracy": 0.4852941176470588 + }, + "prompt_5": { + "accuracy": 0.5490196078431373 + } } }, "five_shot": { @@ -13186,235 +116258,3250 @@ "model_link": "https://huggingface.co/google/gemma-2b-it", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.37142857142857144, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "Malay": 0.36666666666666664, + "Filipino": 0.31333333333333335, + "Indonesian": 0.35333333333333333, + "Chinese": 0.38666666666666666, + "Spanish": 0.36, + "English": 0.44666666666666666 + }, + "consistency_score_2": 0.5914285714285714, + "consistency_score_3": 0.4316190476190477, + "consistency_score_4": 0.3413333333333333, + "consistency_score_5": 0.28095238095238095, + "consistency_score_6": 0.2361904761904762, + "consistency_score_7": 0.2, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5666666666666667, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Chinese": 0.6066666666666667, + "Vietnamese,Spanish": 0.5866666666666667, + "Vietnamese,English": 0.5733333333333334, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6533333333333333, + "Malay,Chinese": 0.5933333333333334, + "Malay,Spanish": 0.62, + "Malay,English": 0.56, + "Filipino,Indonesian": 0.5666666666666667, + "Filipino,Chinese": 0.52, + "Filipino,Spanish": 0.5333333333333333, + "Filipino,English": 0.52, + "Indonesian,Chinese": 0.64, + "Indonesian,Spanish": 0.68, + "Indonesian,English": 0.6333333333333333, + "Chinese,Spanish": 0.6333333333333333, + "Chinese,English": 0.6533333333333333, + "Spanish,English": 0.7133333333333334 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian": 0.44, + "Vietnamese,Malay,Chinese": 0.44, + "Vietnamese,Malay,Spanish": 0.42, + "Vietnamese,Malay,English": 0.41333333333333333, + "Vietnamese,Filipino,Indonesian": 0.37333333333333335, + "Vietnamese,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Filipino,Spanish": 0.37333333333333335, + "Vietnamese,Filipino,English": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese": 0.4533333333333333, + "Vietnamese,Indonesian,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian,English": 0.44, + "Vietnamese,Chinese,Spanish": 0.46, + "Vietnamese,Chinese,English": 0.4666666666666667, + "Vietnamese,Spanish,English": 0.48, + "Malay,Filipino,Indonesian": 0.38666666666666666, + "Malay,Filipino,Chinese": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.36, + "Malay,Filipino,English": 0.32, + "Malay,Indonesian,Chinese": 0.4666666666666667, + "Malay,Indonesian,Spanish": 0.52, + "Malay,Indonesian,English": 0.46, + "Malay,Chinese,Spanish": 0.48, + "Malay,Chinese,English": 0.44666666666666666, + "Malay,Spanish,English": 0.4866666666666667, + "Filipino,Indonesian,Chinese": 0.42, + "Filipino,Indonesian,Spanish": 0.44, + "Filipino,Indonesian,English": 0.4066666666666667, + "Filipino,Chinese,Spanish": 0.38666666666666666, + "Filipino,Chinese,English": 0.4, + "Filipino,Spanish,English": 0.42, + "Indonesian,Chinese,Spanish": 0.5133333333333333, + "Indonesian,Chinese,English": 0.49333333333333335, + "Indonesian,Spanish,English": 0.5533333333333333, + "Chinese,Spanish,English": 0.5266666666666666 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.28, + "Vietnamese,Malay,Filipino,English": 0.26, + "Vietnamese,Malay,Indonesian,Chinese": 0.36, + "Vietnamese,Malay,Indonesian,Spanish": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.35333333333333333, + "Vietnamese,Malay,Chinese,Spanish": 0.36, + "Vietnamese,Malay,Chinese,English": 0.36, + "Vietnamese,Malay,Spanish,English": 0.36, + "Vietnamese,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish": 0.32, + "Vietnamese,Filipino,Indonesian,English": 0.3, + "Vietnamese,Filipino,Chinese,Spanish": 0.3, + "Vietnamese,Filipino,Chinese,English": 0.30666666666666664, + "Vietnamese,Filipino,Spanish,English": 0.30666666666666664, + "Vietnamese,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese,English": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish,English": 0.4, + "Vietnamese,Chinese,Spanish,English": 0.4066666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.31333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.34, + "Malay,Filipino,Indonesian,English": 0.29333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.3, + "Malay,Filipino,Chinese,English": 0.28, + "Malay,Filipino,Spanish,English": 0.29333333333333333, + "Malay,Indonesian,Chinese,Spanish": 0.41333333333333333, + "Malay,Indonesian,Chinese,English": 0.37333333333333335, + "Malay,Indonesian,Spanish,English": 0.4266666666666667, + "Malay,Chinese,Spanish,English": 0.4, + "Filipino,Indonesian,Chinese,Spanish": 0.3466666666666667, + "Filipino,Indonesian,Chinese,English": 0.34, + "Filipino,Indonesian,Spanish,English": 0.36, + "Filipino,Chinese,Spanish,English": 0.3333333333333333, + "Indonesian,Chinese,Spanish,English": 0.44 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.24, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.32, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.35333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.28, + "Malay,Filipino,Indonesian,Chinese,English": 0.25333333333333335, + "Malay,Filipino,Indonesian,Spanish,English": 0.28, + "Malay,Filipino,Chinese,Spanish,English": 0.25333333333333335, + "Malay,Indonesian,Chinese,Spanish,English": 0.35333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.3 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.3, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.24, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.24 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.2 + } + }, + "AC3_2": 0.4562950402239117, + "AC3_3": 0.3992680942847889, + "AC3_4": 0.3557455905433548, + "AC3_5": 0.319916579721556, + "AC3_6": 0.2887595162982002, + "AC3_7": 0.2599999999545 + }, + "prompt_2": { + "overall_acc": 0.3676190476190476, + "language_acc": { + "Vietnamese": 0.35333333333333333, + "Malay": 0.36666666666666664, + "Filipino": 0.3333333333333333, + "Indonesian": 0.34, + "Chinese": 0.38, + "Spanish": 0.36666666666666664, + "English": 0.43333333333333335 + }, + "consistency_score_2": 0.5628571428571428, + "consistency_score_3": 0.389142857142857, + "consistency_score_4": 0.29504761904761906, + "consistency_score_5": 0.2361904761904762, + "consistency_score_6": 0.1961904761904762, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.6066666666666667, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese": 0.5866666666666667, + "Vietnamese,Spanish": 0.48, + "Vietnamese,English": 0.6333333333333333, + "Malay,Filipino": 0.5066666666666667, + "Malay,Indonesian": 0.64, + "Malay,Chinese": 0.6066666666666667, + "Malay,Spanish": 0.5666666666666667, + "Malay,English": 0.5933333333333334, + "Filipino,Indonesian": 0.54, + "Filipino,Chinese": 0.5466666666666666, + "Filipino,Spanish": 0.48, + "Filipino,English": 0.5533333333333333, + "Indonesian,Chinese": 0.5733333333333334, + "Indonesian,Spanish": 0.5866666666666667, + "Indonesian,English": 0.52, + "Chinese,Spanish": 0.5533333333333333, + "Chinese,English": 0.6, + "Spanish,English": 0.6266666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Malay,Chinese": 0.44666666666666666, + "Vietnamese,Malay,Spanish": 0.37333333333333335, + "Vietnamese,Malay,English": 0.4533333333333333, + "Vietnamese,Filipino,Indonesian": 0.3333333333333333, + "Vietnamese,Filipino,Chinese": 0.36, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Filipino,English": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish": 0.34, + "Vietnamese,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese,English": 0.4533333333333333, + "Vietnamese,Spanish,English": 0.42, + "Malay,Filipino,Indonesian": 0.38, + "Malay,Filipino,Chinese": 0.3933333333333333, + "Malay,Filipino,Spanish": 0.32666666666666666, + "Malay,Filipino,English": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.44666666666666666, + "Malay,Indonesian,Spanish": 0.44666666666666666, + "Malay,Indonesian,English": 0.41333333333333333, + "Malay,Chinese,Spanish": 0.41333333333333333, + "Malay,Chinese,English": 0.43333333333333335, + "Malay,Spanish,English": 0.44666666666666666, + "Filipino,Indonesian,Chinese": 0.37333333333333335, + "Filipino,Indonesian,Spanish": 0.3466666666666667, + "Filipino,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Spanish": 0.3466666666666667, + "Filipino,Chinese,English": 0.37333333333333335, + "Filipino,Spanish,English": 0.36, + "Indonesian,Chinese,Spanish": 0.3933333333333333, + "Indonesian,Chinese,English": 0.38666666666666666, + "Indonesian,Spanish,English": 0.4266666666666667, + "Chinese,Spanish,English": 0.44 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.26, + "Vietnamese,Malay,Filipino,English": 0.32, + "Vietnamese,Malay,Indonesian,Chinese": 0.32, + "Vietnamese,Malay,Indonesian,Spanish": 0.3, + "Vietnamese,Malay,Indonesian,English": 0.32, + "Vietnamese,Malay,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese,English": 0.35333333333333333, + "Vietnamese,Malay,Spanish,English": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,English": 0.26, + "Vietnamese,Filipino,Chinese,Spanish": 0.26, + "Vietnamese,Filipino,Chinese,English": 0.3, + "Vietnamese,Filipino,Spanish,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.3, + "Vietnamese,Indonesian,Spanish,English": 0.3, + "Vietnamese,Chinese,Spanish,English": 0.3333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,English": 0.28, + "Malay,Filipino,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Chinese,English": 0.29333333333333333, + "Malay,Filipino,Spanish,English": 0.2733333333333333, + "Malay,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Malay,Indonesian,Chinese,English": 0.32, + "Malay,Indonesian,Spanish,English": 0.35333333333333333, + "Malay,Chinese,Spanish,English": 0.3466666666666667, + "Filipino,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Filipino,Indonesian,Chinese,English": 0.26, + "Filipino,Indonesian,Spanish,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,English": 0.28, + "Indonesian,Chinese,Spanish,English": 0.32 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,English": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Spanish,English": 0.24, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.26, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.2, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.20666666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.23333333333333334, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.24666666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.22666666666666666, + "Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish,English": 0.2733333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.18, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.16666666666666666 + } + }, + "AC3_2": 0.4447550811044165, + "AC3_3": 0.37807486241453286, + "AC3_4": 0.32735953128828565, + "AC3_5": 0.28760102143351657, + "AC3_6": 0.25584298579760834, + "AC3_7": 0.2293523469564789 + }, + "prompt_3": { + "overall_acc": 0.3771428571428571, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "Malay": 0.36666666666666664, + "Filipino": 0.30666666666666664, + "Indonesian": 0.37333333333333335, + "Chinese": 0.38, + "Spanish": 0.4066666666666667, + "English": 0.43333333333333335 + }, + "consistency_score_2": 0.5596825396825397, + "consistency_score_3": 0.38380952380952377, + "consistency_score_4": 0.2866666666666666, + "consistency_score_5": 0.2241269841269841, + "consistency_score_6": 0.18, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.58, + "Vietnamese,Filipino": 0.4666666666666667, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese": 0.54, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,English": 0.56, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Chinese": 0.5466666666666666, + "Malay,Spanish": 0.5666666666666667, + "Malay,English": 0.58, + "Filipino,Indonesian": 0.5133333333333333, + "Filipino,Chinese": 0.5133333333333333, + "Filipino,Spanish": 0.48, + "Filipino,English": 0.54, + "Indonesian,Chinese": 0.62, + "Indonesian,Spanish": 0.62, + "Indonesian,English": 0.6066666666666667, + "Chinese,Spanish": 0.58, + "Chinese,English": 0.6, + "Spanish,English": 0.6866666666666666 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Malay,Chinese": 0.38, + "Vietnamese,Malay,Spanish": 0.38666666666666666, + "Vietnamese,Malay,English": 0.3933333333333333, + "Vietnamese,Filipino,Indonesian": 0.3, + "Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Filipino,Spanish": 0.32, + "Vietnamese,Filipino,English": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.36, + "Vietnamese,Indonesian,Spanish": 0.38, + "Vietnamese,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Chinese,English": 0.38666666666666666, + "Vietnamese,Spanish,English": 0.44, + "Malay,Filipino,Indonesian": 0.36666666666666664, + "Malay,Filipino,Chinese": 0.32666666666666666, + "Malay,Filipino,Spanish": 0.32666666666666666, + "Malay,Filipino,English": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.4266666666666667, + "Malay,Indonesian,Spanish": 0.4533333333333333, + "Malay,Indonesian,English": 0.44666666666666666, + "Malay,Chinese,Spanish": 0.4066666666666667, + "Malay,Chinese,English": 0.3933333333333333, + "Malay,Spanish,English": 0.46, + "Filipino,Indonesian,Chinese": 0.36666666666666664, + "Filipino,Indonesian,Spanish": 0.36, + "Filipino,Indonesian,English": 0.37333333333333335, + "Filipino,Chinese,Spanish": 0.34, + "Filipino,Chinese,English": 0.36, + "Filipino,Spanish,English": 0.38, + "Indonesian,Chinese,Spanish": 0.43333333333333335, + "Indonesian,Chinese,English": 0.44, + "Indonesian,Spanish,English": 0.5, + "Chinese,Spanish,English": 0.48 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese": 0.22, + "Vietnamese,Malay,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Filipino,English": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,English": 0.3, + "Vietnamese,Malay,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese,English": 0.28, + "Vietnamese,Malay,Spanish,English": 0.3333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Filipino,Chinese,Spanish": 0.24, + "Vietnamese,Filipino,Chinese,English": 0.24, + "Vietnamese,Filipino,Spanish,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese,English": 0.3, + "Vietnamese,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.32, + "Malay,Filipino,Indonesian,Chinese": 0.26666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,English": 0.2866666666666667, + "Malay,Filipino,Chinese,Spanish": 0.25333333333333335, + "Malay,Filipino,Chinese,English": 0.26, + "Malay,Filipino,Spanish,English": 0.2866666666666667, + "Malay,Indonesian,Chinese,Spanish": 0.3466666666666667, + "Malay,Indonesian,Chinese,English": 0.32666666666666666, + "Malay,Indonesian,Spanish,English": 0.38666666666666666, + "Malay,Chinese,Spanish,English": 0.34, + "Filipino,Indonesian,Chinese,Spanish": 0.28, + "Filipino,Indonesian,Chinese,English": 0.29333333333333333, + "Filipino,Indonesian,Spanish,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,English": 0.29333333333333333, + "Indonesian,Chinese,Spanish,English": 0.37333333333333335 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.2, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Chinese,English": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Spanish,English": 0.22, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.19333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.22, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.26, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.22, + "Malay,Filipino,Indonesian,Chinese,English": 0.22, + "Malay,Filipino,Indonesian,Spanish,English": 0.24, + "Malay,Filipino,Chinese,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish,English": 0.29333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.18, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.18, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.2 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.14666666666666667 + } + }, + "AC3_2": 0.4506288424750139, + "AC3_3": 0.38044698725556436, + "AC3_4": 0.32573888086914976, + "AC3_5": 0.28116457982953924, + "AC3_6": 0.24369230764856797, + "AC3_7": 0.21119999995968003 + }, + "prompt_4": { + "overall_acc": 0.379047619047619, + "language_acc": { + "Vietnamese": 0.38666666666666666, + "Malay": 0.38, + "Filipino": 0.29333333333333333, + "Indonesian": 0.38, + "Chinese": 0.38666666666666666, + "Spanish": 0.38, + "English": 0.44666666666666666 + }, + "consistency_score_2": 0.5869841269841272, + "consistency_score_3": 0.41942857142857143, + "consistency_score_4": 0.32514285714285723, + "consistency_score_5": 0.2625396825396826, + "consistency_score_6": 0.21619047619047618, + "consistency_score_7": 0.18, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.58, + "Vietnamese,Filipino": 0.48, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.5933333333333334, + "Vietnamese,English": 0.6266666666666667, + "Malay,Filipino": 0.5, + "Malay,Indonesian": 0.66, + "Malay,Chinese": 0.5733333333333334, + "Malay,Spanish": 0.5933333333333334, + "Malay,English": 0.58, + "Filipino,Indonesian": 0.5333333333333333, + "Filipino,Chinese": 0.5066666666666667, + "Filipino,Spanish": 0.52, + "Filipino,English": 0.54, + "Indonesian,Chinese": 0.64, + "Indonesian,Spanish": 0.64, + "Indonesian,English": 0.6266666666666667, + "Chinese,Spanish": 0.6133333333333333, + "Chinese,English": 0.64, + "Spanish,English": 0.7466666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.32666666666666666, + "Vietnamese,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Malay,Chinese": 0.4, + "Vietnamese,Malay,Spanish": 0.42, + "Vietnamese,Malay,English": 0.44, + "Vietnamese,Filipino,Indonesian": 0.34, + "Vietnamese,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Filipino,Spanish": 0.3333333333333333, + "Vietnamese,Filipino,English": 0.36, + "Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian,English": 0.4533333333333333, + "Vietnamese,Chinese,Spanish": 0.4266666666666667, + "Vietnamese,Chinese,English": 0.4533333333333333, + "Vietnamese,Spanish,English": 0.5133333333333333, + "Malay,Filipino,Indonesian": 0.3933333333333333, + "Malay,Filipino,Chinese": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.3466666666666667, + "Malay,Filipino,English": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.47333333333333333, + "Malay,Indonesian,Spanish": 0.4866666666666667, + "Malay,Indonesian,English": 0.4666666666666667, + "Malay,Chinese,Spanish": 0.44, + "Malay,Chinese,English": 0.44, + "Malay,Spanish,English": 0.48, + "Filipino,Indonesian,Chinese": 0.3933333333333333, + "Filipino,Indonesian,Spanish": 0.4, + "Filipino,Indonesian,English": 0.4, + "Filipino,Chinese,Spanish": 0.36, + "Filipino,Chinese,English": 0.38666666666666666, + "Filipino,Spanish,English": 0.42, + "Indonesian,Chinese,Spanish": 0.48, + "Indonesian,Chinese,English": 0.49333333333333335, + "Indonesian,Spanish,English": 0.5333333333333333, + "Chinese,Spanish,English": 0.5066666666666667 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.24, + "Vietnamese,Malay,Filipino,Spanish": 0.26, + "Vietnamese,Malay,Filipino,English": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.32, + "Vietnamese,Malay,Chinese,English": 0.34, + "Vietnamese,Malay,Spanish,English": 0.38, + "Vietnamese,Filipino,Indonesian,Chinese": 0.26, + "Vietnamese,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.30666666666666664, + "Vietnamese,Filipino,Chinese,Spanish": 0.26, + "Vietnamese,Filipino,Chinese,English": 0.28, + "Vietnamese,Filipino,Spanish,English": 0.3, + "Vietnamese,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish,English": 0.4, + "Vietnamese,Chinese,Spanish,English": 0.38, + "Malay,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Malay,Filipino,Indonesian,English": 0.32, + "Malay,Filipino,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Chinese,English": 0.2866666666666667, + "Malay,Filipino,Spanish,English": 0.3, + "Malay,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Malay,Indonesian,Chinese,English": 0.38, + "Malay,Indonesian,Spanish,English": 0.41333333333333333, + "Malay,Chinese,Spanish,English": 0.37333333333333335, + "Filipino,Indonesian,Chinese,Spanish": 0.32, + "Filipino,Indonesian,Chinese,English": 0.32666666666666666, + "Filipino,Indonesian,Spanish,English": 0.3466666666666667, + "Filipino,Chinese,Spanish,English": 0.3, + "Indonesian,Chinese,Spanish,English": 0.41333333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Spanish,English": 0.24, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.3, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.29333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.23333333333333334, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.32, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.25333333333333335, + "Malay,Filipino,Indonesian,Chinese,English": 0.26, + "Malay,Filipino,Indonesian,Spanish,English": 0.28, + "Malay,Filipino,Chinese,Spanish,English": 0.24, + "Malay,Indonesian,Chinese,Spanish,English": 0.3333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.28 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.2, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.22, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22666666666666666 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.18 + } + }, + "AC3_2": 0.46063690275813163, + "AC3_3": 0.3982170119457658, + "AC3_4": 0.3500320722869366, + "AC3_5": 0.31021512212124897, + "AC3_6": 0.2753401904299333, + "AC3_7": 0.24408858598700287 + }, + "prompt_5": { + "overall_acc": 0.37809523809523815, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "Malay": 0.4, + "Filipino": 0.2866666666666667, + "Indonesian": 0.36666666666666664, + "Chinese": 0.38, + "Spanish": 0.37333333333333335, + "English": 0.4666666666666667 + }, + "consistency_score_2": 0.5488888888888889, + "consistency_score_3": 0.3712380952380952, + "consistency_score_4": 0.2773333333333333, + "consistency_score_5": 0.22031746031746033, + "consistency_score_6": 0.18190476190476187, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.54, + "Vietnamese,Filipino": 0.4533333333333333, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.56, + "Vietnamese,English": 0.58, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6266666666666667, + "Malay,Chinese": 0.5133333333333333, + "Malay,Spanish": 0.5333333333333333, + "Malay,English": 0.5533333333333333, + "Filipino,Indonesian": 0.5, + "Filipino,Chinese": 0.46, + "Filipino,Spanish": 0.5, + "Filipino,English": 0.52, + "Indonesian,Chinese": 0.52, + "Indonesian,Spanish": 0.6133333333333333, + "Indonesian,English": 0.6, + "Chinese,Spanish": 0.5533333333333333, + "Chinese,English": 0.62, + "Spanish,English": 0.7066666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.29333333333333333, + "Vietnamese,Malay,Indonesian": 0.4, + "Vietnamese,Malay,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Spanish": 0.37333333333333335, + "Vietnamese,Malay,English": 0.4, + "Vietnamese,Filipino,Indonesian": 0.3, + "Vietnamese,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Filipino,English": 0.32666666666666666, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Indonesian,Spanish": 0.41333333333333333, + "Vietnamese,Indonesian,English": 0.4266666666666667, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,English": 0.4266666666666667, + "Vietnamese,Spanish,English": 0.4533333333333333, + "Malay,Filipino,Indonesian": 0.34, + "Malay,Filipino,Chinese": 0.2733333333333333, + "Malay,Filipino,Spanish": 0.3, + "Malay,Filipino,English": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.36666666666666664, + "Malay,Indonesian,Spanish": 0.43333333333333335, + "Malay,Indonesian,English": 0.4266666666666667, + "Malay,Chinese,Spanish": 0.37333333333333335, + "Malay,Chinese,English": 0.3933333333333333, + "Malay,Spanish,English": 0.4266666666666667, + "Filipino,Indonesian,Chinese": 0.29333333333333333, + "Filipino,Indonesian,Spanish": 0.3466666666666667, + "Filipino,Indonesian,English": 0.36, + "Filipino,Chinese,Spanish": 0.3, + "Filipino,Chinese,English": 0.34, + "Filipino,Spanish,English": 0.38666666666666666, + "Indonesian,Chinese,Spanish": 0.38, + "Indonesian,Chinese,English": 0.41333333333333333, + "Indonesian,Spanish,English": 0.49333333333333335, + "Chinese,Spanish,English": 0.47333333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese": 0.2, + "Vietnamese,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,English": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Spanish": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.34, + "Vietnamese,Malay,Chinese,Spanish": 0.3, + "Vietnamese,Malay,Chinese,English": 0.32, + "Vietnamese,Malay,Spanish,English": 0.3333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Filipino,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Filipino,Spanish,English": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.32, + "Vietnamese,Indonesian,Spanish,English": 0.36, + "Vietnamese,Chinese,Spanish,English": 0.34, + "Malay,Filipino,Indonesian,Chinese": 0.20666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Malay,Filipino,Indonesian,English": 0.26666666666666666, + "Malay,Filipino,Chinese,Spanish": 0.21333333333333335, + "Malay,Filipino,Chinese,English": 0.23333333333333334, + "Malay,Filipino,Spanish,English": 0.26, + "Malay,Indonesian,Chinese,Spanish": 0.30666666666666664, + "Malay,Indonesian,Chinese,English": 0.32, + "Malay,Indonesian,Spanish,English": 0.36, + "Malay,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Filipino,Indonesian,Chinese,English": 0.26, + "Filipino,Indonesian,Spanish,English": 0.3, + "Filipino,Chinese,Spanish,English": 0.26, + "Indonesian,Chinese,Spanish,English": 0.3466666666666667 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Chinese,English": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Spanish,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.22, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.2733333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.18, + "Malay,Filipino,Indonesian,Chinese,English": 0.2, + "Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.19333333333333333, + "Malay,Indonesian,Chinese,Spanish,English": 0.28, + "Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.16, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.17333333333333334 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.15333333333333332 + } + }, + "AC3_2": 0.44775799081927725, + "AC3_3": 0.3746352917891597, + "AC3_4": 0.3199690012105056, + "AC3_5": 0.2784064670495981, + "AC3_6": 0.24563330089556026, + "AC3_7": 0.2181839904009988 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.33360389610389607, + "language_acc": { + "English": 0.38636363636363635, + "Filipino": 0.26136363636363635, + "Vietnamese": 0.2840909090909091, + "Chinese": 0.3693181818181818, + "Indonesian": 0.32386363636363635, + "Malay": 0.35795454545454547, + "Spanish": 0.3522727272727273 + }, + "consistency_score_2": 0.5551948051948052, + "consistency_score_3": 0.37646103896103883, + "consistency_score_4": 0.27532467532467536, + "consistency_score_5": 0.20887445887445888, + "consistency_score_6": 0.16152597402597402, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.48295454545454547, + "English,Vietnamese": 0.5738636363636364, + "English,Chinese": 0.6363636363636364, + "English,Indonesian": 0.6761363636363636, + "English,Malay": 0.5965909090909091, + "English,Spanish": 0.6420454545454546, + "Filipino,Vietnamese": 0.4147727272727273, + "Filipino,Chinese": 0.42613636363636365, + "Filipino,Indonesian": 0.48863636363636365, + "Filipino,Malay": 0.48295454545454547, + "Filipino,Spanish": 0.45454545454545453, + "Vietnamese,Chinese": 0.5568181818181818, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Spanish": 0.5397727272727273, + "Chinese,Indonesian": 0.6022727272727273, + "Chinese,Malay": 0.5284090909090909, + "Chinese,Spanish": 0.5568181818181818, + "Indonesian,Malay": 0.6477272727272727, + "Indonesian,Spanish": 0.6420454545454546, + "Malay,Spanish": 0.5681818181818182 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese": 0.32386363636363635, + "English,Filipino,Indonesian": 0.3522727272727273, + "English,Filipino,Malay": 0.3125, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese": 0.42045454545454547, + "English,Vietnamese,Indonesian": 0.45454545454545453, + "English,Vietnamese,Malay": 0.3977272727272727, + "English,Vietnamese,Spanish": 0.4318181818181818, + "English,Chinese,Indonesian": 0.48863636363636365, + "English,Chinese,Malay": 0.42045454545454547, + "English,Chinese,Spanish": 0.4715909090909091, + "English,Indonesian,Malay": 0.4772727272727273, + "English,Indonesian,Spanish": 0.5227272727272727, + "English,Malay,Spanish": 0.45454545454545453, + "Filipino,Vietnamese,Chinese": 0.2727272727272727, + "Filipino,Vietnamese,Indonesian": 0.30113636363636365, + "Filipino,Vietnamese,Malay": 0.26136363636363635, + "Filipino,Vietnamese,Spanish": 0.2727272727272727, + "Filipino,Chinese,Indonesian": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.26704545454545453, + "Filipino,Chinese,Spanish": 0.26704545454545453, + "Filipino,Indonesian,Malay": 0.3465909090909091, + "Filipino,Indonesian,Spanish": 0.32954545454545453, + "Filipino,Malay,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian": 0.4147727272727273, + "Vietnamese,Chinese,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Spanish": 0.375, + "Vietnamese,Indonesian,Malay": 0.4147727272727273, + "Vietnamese,Indonesian,Spanish": 0.4318181818181818, + "Vietnamese,Malay,Spanish": 0.36363636363636365, + "Chinese,Indonesian,Malay": 0.4147727272727273, + "Chinese,Indonesian,Spanish": 0.4431818181818182, + "Chinese,Malay,Spanish": 0.38636363636363635, + "Indonesian,Malay,Spanish": 0.4715909090909091 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2215909090909091, + "English,Filipino,Vietnamese,Indonesian": 0.25, + "English,Filipino,Vietnamese,Malay": 0.20454545454545456, + "English,Filipino,Vietnamese,Spanish": 0.23295454545454544, + "English,Filipino,Chinese,Indonesian": 0.26704545454545453, + "English,Filipino,Chinese,Malay": 0.2215909090909091, + "English,Filipino,Chinese,Spanish": 0.23863636363636365, + "English,Filipino,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Indonesian,Spanish": 0.2840909090909091, + "English,Filipino,Malay,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian": 0.3465909090909091, + "English,Vietnamese,Chinese,Malay": 0.29545454545454547, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.3352272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.3806818181818182, + "English,Vietnamese,Malay,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.36363636363636365, + "English,Chinese,Indonesian,Spanish": 0.4034090909090909, + "English,Chinese,Malay,Spanish": 0.3409090909090909, + "English,Indonesian,Malay,Spanish": 0.38636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "Filipino,Vietnamese,Chinese,Malay": 0.1875, + "Filipino,Vietnamese,Chinese,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Indonesian,Spanish": 0.2159090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.19318181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.23863636363636365, + "Filipino,Chinese,Indonesian,Spanish": 0.23295454545454544, + "Filipino,Chinese,Malay,Spanish": 0.20454545454545456, + "Filipino,Indonesian,Malay,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Malay,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,Spanish": 0.32386363636363635, + "Chinese,Indonesian,Malay,Spanish": 0.3352272727272727 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.19318181818181818, + "English,Filipino,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.1875, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "English,Filipino,Vietnamese,Malay,Spanish": 0.17613636363636365, + "English,Filipino,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Malay,Spanish": 0.1875, + "English,Filipino,Indonesian,Malay,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "English,Vietnamese,Chinese,Malay,Spanish": 0.24431818181818182, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian,Malay,Spanish": 0.3125, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.1875, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.23295454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + } + }, + "AC3_2": 0.41677637426372877, + "AC3_3": 0.35373911063373725, + "AC3_4": 0.3016753973336666, + "AC3_5": 0.25689995785010744, + "AC3_6": 0.21766286987300748, + "AC3_7": 0.18185840703999123 + }, + "prompt_2": { + "overall_acc": 0.30275974025974023, + "language_acc": { + "English": 0.3693181818181818, + "Filipino": 0.2556818181818182, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.35795454545454547, + "Indonesian": 0.26136363636363635, + "Malay": 0.3068181818181818, + "Spanish": 0.3125 + }, + "consistency_score_2": 0.5037878787878787, + "consistency_score_3": 0.314448051948052, + "consistency_score_4": 0.21249999999999997, + "consistency_score_5": 0.14880952380952378, + "consistency_score_6": 0.10714285714285716, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.48863636363636365, + "English,Vietnamese": 0.5511363636363636, + "English,Chinese": 0.5511363636363636, + "English,Indonesian": 0.5738636363636364, + "English,Malay": 0.5397727272727273, + "English,Spanish": 0.6136363636363636, + "Filipino,Vietnamese": 0.42613636363636365, + "Filipino,Chinese": 0.39204545454545453, + "Filipino,Indonesian": 0.48863636363636365, + "Filipino,Malay": 0.48863636363636365, + "Filipino,Spanish": 0.4090909090909091, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Spanish": 0.5056818181818182, + "Chinese,Indonesian": 0.4772727272727273, + "Chinese,Malay": 0.4772727272727273, + "Chinese,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Spanish": 0.5056818181818182, + "Malay,Spanish": 0.4772727272727273 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3068181818181818, + "English,Filipino,Chinese": 0.2840909090909091, + "English,Filipino,Indonesian": 0.32954545454545453, + "English,Filipino,Malay": 0.3125, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Vietnamese,Chinese": 0.3522727272727273, + "English,Vietnamese,Indonesian": 0.38636363636363635, + "English,Vietnamese,Malay": 0.3693181818181818, + "English,Vietnamese,Spanish": 0.38636363636363635, + "English,Chinese,Indonesian": 0.3465909090909091, + "English,Chinese,Malay": 0.3352272727272727, + "English,Chinese,Spanish": 0.32386363636363635, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Indonesian,Spanish": 0.38636363636363635, + "English,Malay,Spanish": 0.36363636363636365, + "Filipino,Vietnamese,Chinese": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian": 0.30113636363636365, + "Filipino,Vietnamese,Malay": 0.30113636363636365, + "Filipino,Vietnamese,Spanish": 0.26136363636363635, + "Filipino,Chinese,Indonesian": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.2215909090909091, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.3181818181818182, + "Filipino,Indonesian,Spanish": 0.2897727272727273, + "Filipino,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian": 0.32954545454545453, + "Vietnamese,Chinese,Malay": 0.3125, + "Vietnamese,Chinese,Spanish": 0.29545454545454547, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "Vietnamese,Malay,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Indonesian,Spanish": 0.2727272727272727, + "Chinese,Malay,Spanish": 0.26704545454545453, + "Indonesian,Malay,Spanish": 0.3352272727272727 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.1875, + "English,Filipino,Vietnamese,Indonesian": 0.23295454545454544, + "English,Filipino,Vietnamese,Malay": 0.23295454545454544, + "English,Filipino,Vietnamese,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Indonesian": 0.19318181818181818, + "English,Filipino,Chinese,Malay": 0.1875, + "English,Filipino,Chinese,Spanish": 0.16477272727272727, + "English,Filipino,Indonesian,Malay": 0.24431818181818182, + "English,Filipino,Indonesian,Spanish": 0.23295454545454544, + "English,Filipino,Malay,Spanish": 0.2159090909090909, + "English,Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Chinese,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Malay,Spanish": 0.26136363636363635, + "English,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Chinese,Malay,Spanish": 0.20454545454545456, + "English,Indonesian,Malay,Spanish": 0.26136363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Filipino,Vietnamese,Malay,Spanish": 0.19318181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.18181818181818182, + "Filipino,Chinese,Indonesian,Spanish": 0.14772727272727273, + "Filipino,Chinese,Malay,Spanish": 0.13068181818181818, + "Filipino,Indonesian,Malay,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Malay,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,Spanish": 0.26704545454545453, + "Chinese,Indonesian,Malay,Spanish": 0.19886363636363635 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "English,Filipino,Vietnamese,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Filipino,Chinese,Malay,Spanish": 0.10795454545454546, + "English,Filipino,Indonesian,Malay,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Malay,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.20454545454545456, + "English,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.14204545454545456, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09090909090909091 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454 + } + }, + "AC3_2": 0.37822115819774643, + "AC3_3": 0.30849322292903786, + "AC3_4": 0.2497243225728323, + "AC3_5": 0.1995420696296965, + "AC3_6": 0.1582743988298456, + "AC3_7": 0.12598919124286034 + }, + "prompt_3": { + "overall_acc": 0.31493506493506496, + "language_acc": { + "English": 0.3465909090909091, + "Filipino": 0.26136363636363635, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.36363636363636365, + "Indonesian": 0.2840909090909091, + "Malay": 0.32386363636363635, + "Spanish": 0.36363636363636365 + }, + "consistency_score_2": 0.5219155844155845, + "consistency_score_3": 0.3392857142857143, + "consistency_score_4": 0.2412337662337663, + "consistency_score_5": 0.1801948051948052, + "consistency_score_6": 0.14042207792207792, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.45454545454545453, + "English,Vietnamese": 0.5511363636363636, + "English,Chinese": 0.5738636363636364, + "English,Indonesian": 0.6477272727272727, + "English,Malay": 0.5397727272727273, + "English,Spanish": 0.6590909090909091, + "Filipino,Vietnamese": 0.4318181818181818, + "Filipino,Chinese": 0.375, + "Filipino,Indonesian": 0.45454545454545453, + "Filipino,Malay": 0.4772727272727273, + "Filipino,Spanish": 0.4715909090909091, + "Vietnamese,Chinese": 0.4715909090909091, + "Vietnamese,Indonesian": 0.5170454545454546, + "Vietnamese,Malay": 0.5170454545454546, + "Vietnamese,Spanish": 0.5227272727272727, + "Chinese,Indonesian": 0.5511363636363636, + "Chinese,Malay": 0.45454545454545453, + "Chinese,Spanish": 0.5284090909090909, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Spanish": 0.6022727272727273, + "Malay,Spanish": 0.5454545454545454 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese": 0.2784090909090909, + "English,Filipino,Indonesian": 0.3181818181818182, + "English,Filipino,Malay": 0.29545454545454547, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese": 0.3522727272727273, + "English,Vietnamese,Indonesian": 0.38636363636363635, + "English,Vietnamese,Malay": 0.35795454545454547, + "English,Vietnamese,Spanish": 0.4147727272727273, + "English,Chinese,Indonesian": 0.4375, + "English,Chinese,Malay": 0.3465909090909091, + "English,Chinese,Spanish": 0.4090909090909091, + "English,Indonesian,Malay": 0.4375, + "English,Indonesian,Spanish": 0.4943181818181818, + "English,Malay,Spanish": 0.4318181818181818, + "Filipino,Vietnamese,Chinese": 0.2215909090909091, + "Filipino,Vietnamese,Indonesian": 0.2840909090909091, + "Filipino,Vietnamese,Malay": 0.29545454545454547, + "Filipino,Vietnamese,Spanish": 0.29545454545454547, + "Filipino,Chinese,Indonesian": 0.2556818181818182, + "Filipino,Chinese,Malay": 0.2215909090909091, + "Filipino,Chinese,Spanish": 0.2556818181818182, + "Filipino,Indonesian,Malay": 0.3181818181818182, + "Filipino,Indonesian,Spanish": 0.3181818181818182, + "Filipino,Malay,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian": 0.3181818181818182, + "Vietnamese,Chinese,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay": 0.375, + "Vietnamese,Indonesian,Spanish": 0.3693181818181818, + "Vietnamese,Malay,Spanish": 0.35795454545454547, + "Chinese,Indonesian,Malay": 0.36363636363636365, + "Chinese,Indonesian,Spanish": 0.375, + "Chinese,Malay,Spanish": 0.32954545454545453, + "Indonesian,Malay,Spanish": 0.42613636363636365 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian": 0.23295454545454544, + "English,Filipino,Vietnamese,Malay": 0.22727272727272727, + "English,Filipino,Vietnamese,Spanish": 0.25, + "English,Filipino,Chinese,Indonesian": 0.21022727272727273, + "English,Filipino,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Chinese,Spanish": 0.20454545454545456, + "English,Filipino,Indonesian,Malay": 0.24431818181818182, + "English,Filipino,Indonesian,Spanish": 0.26136363636363635, + "English,Filipino,Malay,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "English,Vietnamese,Chinese,Malay": 0.23863636363636365, + "English,Vietnamese,Chinese,Spanish": 0.26704545454545453, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.3125, + "English,Vietnamese,Malay,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.3068181818181818, + "English,Chinese,Indonesian,Spanish": 0.32954545454545453, + "English,Chinese,Malay,Spanish": 0.2727272727272727, + "English,Indonesian,Malay,Spanish": 0.3522727272727273, + "Filipino,Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Spanish": 0.17613636363636365, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, + "Filipino,Chinese,Indonesian,Malay": 0.1875, + "Filipino,Chinese,Indonesian,Spanish": 0.1875, + "Filipino,Chinese,Malay,Spanish": 0.18181818181818182, + "Filipino,Indonesian,Malay,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,Spanish": 0.2897727272727273, + "Chinese,Indonesian,Malay,Spanish": 0.2784090909090909 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.1534090909090909, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "English,Filipino,Vietnamese,Malay,Spanish": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian,Spanish": 0.16477272727272727, + "English,Filipino,Chinese,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Indonesian,Malay,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1875, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363 + } + }, + "AC3_2": 0.39282880063830944, + "AC3_3": 0.32665721370405726, + "AC3_4": 0.2732011129068071, + "AC3_5": 0.22923142426709428, + "AC3_6": 0.19423802574617172, + "AC3_7": 0.16701101924477732 + }, + "prompt_4": { + "overall_acc": 0.32954545454545453, + "language_acc": { + "English": 0.39204545454545453, + "Filipino": 0.26136363636363635, + "Vietnamese": 0.2784090909090909, + "Chinese": 0.3352272727272727, + "Indonesian": 0.32386363636363635, + "Malay": 0.3522727272727273, + "Spanish": 0.36363636363636365 + }, + "consistency_score_2": 0.5465367965367965, + "consistency_score_3": 0.3652597402597403, + "consistency_score_4": 0.26298701298701294, + "consistency_score_5": 0.19534632034632035, + "consistency_score_6": 0.14772727272727273, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.4659090909090909, + "English,Vietnamese": 0.5795454545454546, + "English,Chinese": 0.6193181818181818, + "English,Indonesian": 0.6534090909090909, + "English,Malay": 0.5625, + "English,Spanish": 0.6477272727272727, + "Filipino,Vietnamese": 0.4318181818181818, + "Filipino,Chinese": 0.42045454545454547, + "Filipino,Indonesian": 0.48295454545454547, + "Filipino,Malay": 0.4772727272727273, + "Filipino,Spanish": 0.4943181818181818, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5909090909090909, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,Spanish": 0.5795454545454546, + "Chinese,Indonesian": 0.6079545454545454, + "Chinese,Malay": 0.4659090909090909, + "Chinese,Spanish": 0.5397727272727273, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Spanish": 0.6136363636363636, + "Malay,Spanish": 0.5795454545454546 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese": 0.3125, + "English,Filipino,Indonesian": 0.3409090909090909, + "English,Filipino,Malay": 0.30113636363636365, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese": 0.39204545454545453, + "English,Vietnamese,Indonesian": 0.44886363636363635, + "English,Vietnamese,Malay": 0.3806818181818182, + "English,Vietnamese,Spanish": 0.44886363636363635, + "English,Chinese,Indonesian": 0.4772727272727273, + "English,Chinese,Malay": 0.3806818181818182, + "English,Chinese,Spanish": 0.4375, + "English,Indonesian,Malay": 0.4431818181818182, + "English,Indonesian,Spanish": 0.4943181818181818, + "English,Malay,Spanish": 0.4375, + "Filipino,Vietnamese,Chinese": 0.25, + "Filipino,Vietnamese,Indonesian": 0.3068181818181818, + "Filipino,Vietnamese,Malay": 0.29545454545454547, + "Filipino,Vietnamese,Spanish": 0.30113636363636365, + "Filipino,Chinese,Indonesian": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.22727272727272727, + "Filipino,Chinese,Spanish": 0.2784090909090909, + "Filipino,Indonesian,Malay": 0.32954545454545453, + "Filipino,Indonesian,Spanish": 0.3409090909090909, + "Filipino,Malay,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian": 0.39204545454545453, + "Vietnamese,Chinese,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Spanish": 0.35795454545454547, + "Vietnamese,Indonesian,Malay": 0.4147727272727273, + "Vietnamese,Indonesian,Spanish": 0.4318181818181818, + "Vietnamese,Malay,Spanish": 0.38636363636363635, + "Chinese,Indonesian,Malay": 0.38636363636363635, + "Chinese,Indonesian,Spanish": 0.42613636363636365, + "Chinese,Malay,Spanish": 0.3522727272727273, + "Indonesian,Malay,Spanish": 0.4375 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.21022727272727273, + "English,Filipino,Vietnamese,Indonesian": 0.23863636363636365, + "English,Filipino,Vietnamese,Malay": 0.2159090909090909, + "English,Filipino,Vietnamese,Spanish": 0.22727272727272727, + "English,Filipino,Chinese,Indonesian": 0.26136363636363635, + "English,Filipino,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Chinese,Spanish": 0.23295454545454544, + "English,Filipino,Indonesian,Malay": 0.25, + "English,Filipino,Indonesian,Spanish": 0.2727272727272727, + "English,Filipino,Malay,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian": 0.3352272727272727, + "English,Vietnamese,Chinese,Malay": 0.26136363636363635, + "English,Vietnamese,Chinese,Spanish": 0.3068181818181818, + "English,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.375, + "English,Vietnamese,Malay,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.32954545454545453, + "English,Chinese,Indonesian,Spanish": 0.3806818181818182, + "English,Chinese,Malay,Spanish": 0.30113636363636365, + "English,Indonesian,Malay,Spanish": 0.35795454545454547, + "Filipino,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "Filipino,Vietnamese,Chinese,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Spanish": 0.1875, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, + "Filipino,Chinese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Chinese,Indonesian,Spanish": 0.24431818181818182, + "Filipino,Chinese,Malay,Spanish": 0.1875, + "Filipino,Indonesian,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Malay,Spanish": 0.25, + "Vietnamese,Indonesian,Malay,Spanish": 0.3181818181818182, + "Chinese,Indonesian,Malay,Spanish": 0.3068181818181818 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.1590909090909091, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Filipino,Vietnamese,Malay,Spanish": 0.17613636363636365, + "English,Filipino,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Chinese,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Indonesian,Malay,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Chinese,Malay,Spanish": 0.2215909090909091, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.2727272727272727, + "English,Chinese,Indonesian,Malay,Spanish": 0.2784090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2215909090909091 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11931818181818182, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1534090909090909, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.20454545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363 + } + }, + "AC3_2": 0.41116851025684636, + "AC3_3": 0.3464847068320353, + "AC3_4": 0.29252801987591104, + "AC3_5": 0.24529053416132834, + "AC3_6": 0.20400432896158524, + "AC3_7": 0.1689976689595362 + }, + "prompt_5": { + "overall_acc": 0.3141233766233766, + "language_acc": { + "English": 0.3693181818181818, + "Filipino": 0.24431818181818182, + "Vietnamese": 0.25, + "Chinese": 0.35795454545454547, + "Indonesian": 0.2727272727272727, + "Malay": 0.36363636363636365, + "Spanish": 0.3409090909090909 + }, + "consistency_score_2": 0.4897186147186148, + "consistency_score_3": 0.2982142857142857, + "consistency_score_4": 0.1961038961038961, + "consistency_score_5": 0.13284632034632035, + "consistency_score_6": 0.09090909090909091, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.4659090909090909, + "English,Vietnamese": 0.5227272727272727, + "English,Chinese": 0.6079545454545454, + "English,Indonesian": 0.5965909090909091, + "English,Malay": 0.5681818181818182, + "English,Spanish": 0.5738636363636364, + "Filipino,Vietnamese": 0.4034090909090909, + "Filipino,Chinese": 0.3977272727272727, + "Filipino,Indonesian": 0.4659090909090909, + "Filipino,Malay": 0.4375, + "Filipino,Spanish": 0.4034090909090909, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,Spanish": 0.4659090909090909, + "Chinese,Indonesian": 0.5113636363636364, + "Chinese,Malay": 0.4602272727272727, + "Chinese,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5681818181818182, + "Indonesian,Spanish": 0.5, + "Malay,Spanish": 0.48295454545454547 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.2727272727272727, + "English,Filipino,Chinese": 0.29545454545454547, + "English,Filipino,Indonesian": 0.3181818181818182, + "English,Filipino,Malay": 0.2784090909090909, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Vietnamese,Chinese": 0.3181818181818182, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Malay": 0.3409090909090909, + "English,Vietnamese,Spanish": 0.3522727272727273, + "English,Chinese,Indonesian": 0.4090909090909091, + "English,Chinese,Malay": 0.375, + "English,Chinese,Spanish": 0.36363636363636365, + "English,Indonesian,Malay": 0.39204545454545453, + "English,Indonesian,Spanish": 0.39204545454545453, + "English,Malay,Spanish": 0.36363636363636365, + "Filipino,Vietnamese,Chinese": 0.20454545454545456, + "Filipino,Vietnamese,Indonesian": 0.2556818181818182, + "Filipino,Vietnamese,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Spanish": 0.2159090909090909, + "Filipino,Chinese,Indonesian": 0.26704545454545453, + "Filipino,Chinese,Malay": 0.21022727272727273, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.2784090909090909, + "Filipino,Indonesian,Spanish": 0.2556818181818182, + "Filipino,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "Vietnamese,Chinese,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish": 0.25, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "Vietnamese,Malay,Spanish": 0.29545454545454547, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.3181818181818182, + "Chinese,Malay,Spanish": 0.29545454545454547, + "Indonesian,Malay,Spanish": 0.3409090909090909 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.17613636363636365, + "English,Filipino,Vietnamese,Indonesian": 0.21022727272727273, + "English,Filipino,Vietnamese,Malay": 0.1590909090909091, + "English,Filipino,Vietnamese,Spanish": 0.18181818181818182, + "English,Filipino,Chinese,Indonesian": 0.23295454545454544, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Spanish": 0.17613636363636365, + "English,Filipino,Indonesian,Malay": 0.21022727272727273, + "English,Filipino,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Malay,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian": 0.25, + "English,Vietnamese,Chinese,Malay": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Malay,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Chinese,Malay,Spanish": 0.24431818181818182, + "English,Indonesian,Malay,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "Filipino,Vietnamese,Chinese,Malay": 0.10795454545454546, + "Filipino,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Filipino,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.13068181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Spanish": 0.17613636363636365, + "Filipino,Chinese,Malay,Spanish": 0.14772727272727273, + "Filipino,Indonesian,Malay,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Malay": 0.09659090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Malay,Spanish": 0.11363636363636363, + "English,Filipino,Chinese,Indonesian,Malay": 0.14772727272727273, + "English,Filipino,Chinese,Indonesian,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Indonesian,Malay,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Malay,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.19318181818181818, + "English,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.07954545454545454, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.09090909090909091, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13636363636363635 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.06818181818181818, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.09090909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.0625 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.0625 + } + }, + "AC3_2": 0.38274204753792046, + "AC3_3": 0.3059621647735453, + "AC3_4": 0.24146423091738334, + "AC3_5": 0.18672467056606684, + "AC3_6": 0.1410092912750782, + "AC3_7": 0.10425646548955948 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3786407766990291 + }, + "prompt_2": { + "accuracy": 0.4077669902912621 + }, + "prompt_3": { + "accuracy": 0.36893203883495146 + }, + "prompt_4": { + "accuracy": 0.3592233009708738 + }, + "prompt_5": { + "accuracy": 0.39805825242718446 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3047619047619048 + }, + "prompt_2": { + "accuracy": 0.3047619047619048 + }, + "prompt_3": { + "accuracy": 0.3142857142857143 + }, + "prompt_4": { + "accuracy": 0.26666666666666666 + }, + "prompt_5": { + "accuracy": 0.2857142857142857 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37383177570093457 + }, + "prompt_2": { + "accuracy": 0.35514018691588783 + }, + "prompt_3": { + "accuracy": 0.34579439252336447 + }, + "prompt_4": { + "accuracy": 0.37383177570093457 + }, + "prompt_5": { + "accuracy": 0.3644859813084112 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.5, + "history": 0.06666666666666667, + "literature": 0.2, + "politics": 0.3, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.29, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.4, + "history": 0.06666666666666667, + "literature": 0.2, + "politics": 0.4, + "culture": 0.2, + "film": 0.3, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.6, + "history": 0.06666666666666667, + "literature": 0.2, + "politics": 0.3, + "culture": 0.2, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.5, + "history": 0.06666666666666667, + "literature": 0.3, + "politics": 0.3, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.5, + "history": 0.06666666666666667, + "literature": 0.3, + "politics": 0.2, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.08018261727594969 + }, + "prompt_2": { + "bleu_score": 0.12382019242173196 + }, + "prompt_3": { + "bleu_score": 0.1199767749511639 + }, + "prompt_4": { + "bleu_score": 0.03195403005978134 + }, + "prompt_5": { + "bleu_score": 0.061824747337313385 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.32672408037919753, + "category_acc": { + "History": 0.2891566265060241, + "Geography": 0.27346938775510204, + "Lampungic": 0.3673469387755102, + "Social science": 0.42070116861435725, + "Balinese": 0.32696390658174096, + "Makassarese": 0.27419354838709675, + "Banjarese": 0.3541666666666667, + "Chemistry": 0.23065693430656933, + "Biology": 0.29704142011834317, + "Science": 0.32714138286893707, + "Christian religion": 0.3383084577114428, + "Art": 0.3410981697171381, + "Islam religion": 0.34281650071123754, + "Hindu religion": 0.36, + "Madurese": 0.24745762711864408, + "Sport": 0.3108108108108108, + "Indonesian language": 0.37141967621419675, + "Physics": 0.2585858585858586, + "Minangkabau culture": 0.25125628140703515, + "Dayak language": 0.26605504587155965, + "Sociology": 0.3185483870967742, + "Economy": 0.2766393442622951, + "Sundanese": 0.33362143474503025, + "Javanese": 0.3014112903225806, + "Civic education": 0.37625178826895567 + } + }, + "prompt_2": { + "accuracy": 0.3370719006609253, + "category_acc": { + "History": 0.2891566265060241, + "Geography": 0.2612244897959184, + "Lampungic": 0.3877551020408163, + "Social science": 0.48080133555926546, + "Balinese": 0.2951167728237792, + "Makassarese": 0.26881720430107525, + "Banjarese": 0.3611111111111111, + "Chemistry": 0.24379562043795622, + "Biology": 0.28875739644970416, + "Science": 0.35810113519091846, + "Christian religion": 0.38308457711442784, + "Art": 0.33943427620632277, + "Islam religion": 0.34992887624466573, + "Hindu religion": 0.4066666666666667, + "Madurese": 0.27796610169491526, + "Sport": 0.3581081081081081, + "Indonesian language": 0.38418430884184307, + "Physics": 0.2727272727272727, + "Minangkabau culture": 0.3165829145728643, + "Dayak language": 0.3119266055045872, + "Sociology": 0.30443548387096775, + "Economy": 0.29713114754098363, + "Sundanese": 0.3362143474503025, + "Javanese": 0.2913306451612903, + "Civic education": 0.38626609442060084 + } + }, + "prompt_3": { + "accuracy": 0.3315308098003872, + "category_acc": { + "History": 0.28714859437751006, + "Geography": 0.2693877551020408, + "Lampungic": 0.3673469387755102, + "Social science": 0.4540901502504174, + "Balinese": 0.28874734607218683, + "Makassarese": 0.25268817204301075, + "Banjarese": 0.3611111111111111, + "Chemistry": 0.24379562043795622, + "Biology": 0.28994082840236685, + "Science": 0.3446852425180599, + "Christian religion": 0.38308457711442784, + "Art": 0.33610648918469216, + "Islam religion": 0.3456614509246088, + "Hindu religion": 0.38666666666666666, + "Madurese": 0.288135593220339, + "Sport": 0.36486486486486486, + "Indonesian language": 0.37733499377334995, + "Physics": 0.26262626262626265, + "Minangkabau culture": 0.2964824120603015, + "Dayak language": 0.3119266055045872, + "Sociology": 0.29838709677419356, + "Economy": 0.2930327868852459, + "Sundanese": 0.337942955920484, + "Javanese": 0.2913306451612903, + "Civic education": 0.37052932761087265 + } + }, + "prompt_4": { + "accuracy": 0.3195807463782629, + "category_acc": { + "History": 0.2751004016064257, + "Geography": 0.26326530612244897, + "Lampungic": 0.35374149659863946, + "Social science": 0.4257095158597663, + "Balinese": 0.3057324840764331, + "Makassarese": 0.27419354838709675, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.24233576642335766, + "Biology": 0.3088757396449704, + "Science": 0.32610939112487103, + "Christian religion": 0.3482587064676617, + "Art": 0.3178036605657238, + "Islam religion": 0.3314366998577525, + "Hindu religion": 0.36666666666666664, + "Madurese": 0.2677966101694915, + "Sport": 0.3310810810810811, + "Indonesian language": 0.35678704856787047, + "Physics": 0.24848484848484848, + "Minangkabau culture": 0.24623115577889448, + "Dayak language": 0.27522935779816515, + "Sociology": 0.2842741935483871, + "Economy": 0.2766393442622951, + "Sundanese": 0.331028522039758, + "Javanese": 0.29536290322580644, + "Civic education": 0.35765379113018597 + } + }, + "prompt_5": { + "accuracy": 0.3167100607517191, + "category_acc": { + "History": 0.26907630522088355, + "Geography": 0.27755102040816326, + "Lampungic": 0.38095238095238093, + "Social science": 0.4524207011686144, + "Balinese": 0.2951167728237792, + "Makassarese": 0.27419354838709675, + "Banjarese": 0.3611111111111111, + "Chemistry": 0.24379562043795622, + "Biology": 0.30532544378698223, + "Science": 0.31785345717234265, + "Christian religion": 0.38308457711442784, + "Art": 0.3427620632279534, + "Islam religion": 0.3058321479374111, + "Hindu religion": 0.32666666666666666, + "Madurese": 0.2576271186440678, + "Sport": 0.3108108108108108, + "Indonesian language": 0.35024906600249067, + "Physics": 0.25252525252525254, + "Minangkabau culture": 0.24623115577889448, + "Dayak language": 0.3211009174311927, + "Sociology": 0.28830645161290325, + "Economy": 0.2827868852459016, + "Sundanese": 0.32497839239412274, + "Javanese": 0.2661290322580645, + "Civic education": 0.3547925608011445 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1343785060078945 + }, + "prompt_2": { + "bleu_score": 0.1496093405264642 + }, + "prompt_3": { + "bleu_score": 0.16003855519520016 + }, + "prompt_4": { + "bleu_score": 0.10462834210636253 + }, + "prompt_5": { + "bleu_score": 0.06385242696530409 + } }, - "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.1087606777198467 + }, + "prompt_2": { + "bleu_score": 0.11902537481654184 + }, + "prompt_3": { + "bleu_score": 0.11904787991589674 + }, + "prompt_4": { + "bleu_score": 0.08972204187399575 + }, + "prompt_5": { + "bleu_score": 0.055067576910017214 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.09334331817883475 + }, + "prompt_2": { + "bleu_score": 0.10315026424681181 + }, + "prompt_3": { + "bleu_score": 0.10991932612344779 + }, + "prompt_4": { + "bleu_score": 0.09121174219069825 + }, + "prompt_5": { + "bleu_score": 0.056671234844644376 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.11139158189885928 + }, + "prompt_2": { + "bleu_score": 0.12086218447554221 + }, + "prompt_3": { + "bleu_score": 0.12973348185998357 + }, + "prompt_4": { + "bleu_score": 0.08166315239199984 + }, + "prompt_5": { + "bleu_score": 0.037636119241057975 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4130688448074679 + }, + "prompt_2": { + "accuracy": 0.42473745624270715 + }, + "prompt_3": { + "accuracy": 0.4282380396732789 + }, + "prompt_4": { + "accuracy": 0.4049008168028005 + }, + "prompt_5": { + "accuracy": 0.411901983663944 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38248122988916694, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.42424242424242425, + "clinical_knowledge": 0.39015151515151514, + "medical_genetics": 0.42424242424242425, + "high_school_us_history": 0.3891625615763547, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5042372881355932, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.3037974683544304, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.43042071197411, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.3774193548387097, + "professional_medicine": 0.3062730627306273, + "nutrition": 0.4426229508196721, + "global_facts": 0.31313131313131315, + "machine_learning": 0.3783783783783784, + "security_studies": 0.36885245901639346, + "public_relations": 0.42201834862385323, + "professional_psychology": 0.397708674304419, + "prehistory": 0.3993808049535604, + "anatomy": 0.44776119402985076, + "human_sexuality": 0.43846153846153846, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.4375, + "college_chemistry": 0.20202020202020202, + "logical_fallacies": 0.41358024691358025, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.27586206896551724, + "human_aging": 0.481981981981982, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.5018382352941176, + "formal_logic": 0.384, + "high_school_statistics": 0.2186046511627907, + "international_law": 0.49166666666666664, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.36363636363636365, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.5089514066496164, + "high_school_chemistry": 0.27722772277227725, + "marketing": 0.6137339055793991, + "professional_law": 0.31441617742987604, + "management": 0.4411764705882353, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.42990654205607476, + "world_religions": 0.5529411764705883, + "sociology": 0.485, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.3676092544987147, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.4579710144927536, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.31125827814569534, + "college_biology": 0.42657342657342656 + } + }, + "prompt_2": { + "accuracy": 0.39656775116195925, + "category_acc": { + "high_school_european_history": 0.5060975609756098, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.4090909090909091, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.3694581280788177, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5550847457627118, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.33755274261603374, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.44983818770226536, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.29537366548042704, + "philosophy": 0.38387096774193546, + "professional_medicine": 0.2952029520295203, + "nutrition": 0.4721311475409836, + "global_facts": 0.2727272727272727, + "machine_learning": 0.38738738738738737, + "security_studies": 0.3770491803278688, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.4026186579378069, + "prehistory": 0.4179566563467492, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.4895833333333333, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.4074074074074074, + "high_school_geography": 0.49238578680203043, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.46846846846846846, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.5680147058823529, + "formal_logic": 0.376, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.525, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.5332480818414322, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.5879828326180258, + "professional_law": 0.3268101761252446, + "management": 0.46078431372549017, + "college_physics": 0.1782178217821782, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.5352941176470588, + "sociology": 0.53, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.4138817480719794, + "computer_security": 0.5454545454545454, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.4753623188405797, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.33774834437086093, + "college_biology": 0.4195804195804196 + } + }, + "prompt_3": { + "accuracy": 0.39756882373972113, + "category_acc": { + "high_school_european_history": 0.5, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.4128787878787879, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.4039408866995074, + "high_school_physics": 0.28, + "high_school_world_history": 0.5550847457627118, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.3291139240506329, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.2828282828282828, + "high_school_biology": 0.44660194174757284, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.3709677419354839, + "professional_medicine": 0.2988929889298893, + "nutrition": 0.4721311475409836, + "global_facts": 0.2828282828282828, + "machine_learning": 0.36936936936936937, + "security_studies": 0.38934426229508196, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.3993453355155483, + "prehistory": 0.4458204334365325, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.4895833333333333, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.38271604938271603, + "high_school_geography": 0.48223350253807107, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.481981981981982, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.5606617647058824, + "formal_logic": 0.376, + "high_school_statistics": 0.24186046511627907, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.3939393939393939, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.5383631713554987, + "high_school_chemistry": 0.3069306930693069, + "marketing": 0.6223175965665236, + "professional_law": 0.32746249184605347, + "management": 0.45098039215686275, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.45794392523364486, + "world_religions": 0.5352941176470588, + "sociology": 0.535, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.41131105398457585, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.4492753623188406, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.3576158940397351, + "college_biology": 0.44755244755244755 + } + }, + "prompt_4": { + "accuracy": 0.3877011083303539, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.41414141414141414, + "clinical_knowledge": 0.3977272727272727, + "medical_genetics": 0.42424242424242425, + "high_school_us_history": 0.4088669950738916, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.5338983050847458, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.3459915611814346, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.42071197411003236, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3096085409252669, + "philosophy": 0.3709677419354839, + "professional_medicine": 0.3025830258302583, + "nutrition": 0.46557377049180326, + "global_facts": 0.3333333333333333, + "machine_learning": 0.3783783783783784, + "security_studies": 0.38114754098360654, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.39279869067103107, + "prehistory": 0.42105263157894735, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.4307692307692308, + "college_medicine": 0.3953488372093023, + "high_school_government_and_politics": 0.4322916666666667, + "college_chemistry": 0.18181818181818182, + "logical_fallacies": 0.3888888888888889, + "high_school_geography": 0.47715736040609136, + "elementary_mathematics": 0.27320954907161804, + "human_aging": 0.46846846846846846, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.5128676470588235, + "formal_logic": 0.336, + "high_school_statistics": 0.24186046511627907, + "international_law": 0.48333333333333334, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.3838383838383838, + "conceptual_physics": 0.3333333333333333, + "miscellaneous": 0.5179028132992327, + "high_school_chemistry": 0.28217821782178215, + "marketing": 0.6137339055793991, + "professional_law": 0.33072407045009783, + "management": 0.4411764705882353, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.5470588235294118, + "sociology": 0.485, + "us_foreign_policy": 0.5353535353535354, + "high_school_macroeconomics": 0.39845758354755784, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.48405797101449277, + "electrical_engineering": 0.4375, + "astronomy": 0.304635761589404, + "college_biology": 0.40559440559440557 + } + }, + "prompt_5": { + "accuracy": 0.3866285305684662, + "category_acc": { + "high_school_european_history": 0.5, + "business_ethics": 0.43434343434343436, + "clinical_knowledge": 0.38257575757575757, + "medical_genetics": 0.41414141414141414, + "high_school_us_history": 0.3891625615763547, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5211864406779662, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.3459915611814346, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.4174757281553398, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.3774193548387097, + "professional_medicine": 0.3062730627306273, + "nutrition": 0.4524590163934426, + "global_facts": 0.3333333333333333, + "machine_learning": 0.36936936936936937, + "security_studies": 0.4180327868852459, + "public_relations": 0.4036697247706422, + "professional_psychology": 0.39279869067103107, + "prehistory": 0.42105263157894735, + "anatomy": 0.417910447761194, + "human_sexuality": 0.4230769230769231, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.4427083333333333, + "college_chemistry": 0.1919191919191919, + "logical_fallacies": 0.41358024691358025, + "high_school_geography": 0.48223350253807107, + "elementary_mathematics": 0.26790450928381965, + "human_aging": 0.49099099099099097, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.5183823529411765, + "formal_logic": 0.336, + "high_school_statistics": 0.24186046511627907, + "international_law": 0.475, + "high_school_mathematics": 0.26394052044609667, + "high_school_computer_science": 0.3838383838383838, + "conceptual_physics": 0.34615384615384615, + "miscellaneous": 0.5191815856777494, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.5793991416309013, + "professional_law": 0.3268101761252446, + "management": 0.4411764705882353, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.4766355140186916, + "world_religions": 0.5411764705882353, + "sociology": 0.5, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.3676092544987147, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.4608695652173913, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.31125827814569534, + "college_biology": 0.40559440559440557 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3395245170876672 + }, + "prompt_2": { + "accuracy": 0.35438335809806837 + }, + "prompt_3": { + "accuracy": 0.3536404160475483 + }, + "prompt_4": { + "accuracy": 0.3261515601783061 + }, + "prompt_5": { + "accuracy": 0.3395245170876672 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.30199252801992527, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.40476190476190477, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.08333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.34615384615384615, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.35714285714285715, + "college_economics": 0.2833333333333333, + "business_administration": 0.2631578947368421, + "marxism": 0.5, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.2962962962962963, + "law": 0.1724137931034483, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.25, + "high_school_history": 0.44, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.28846153846153844, + "sports_science": 0.20833333333333334, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.2777777777777778, + "physician": 0.3888888888888889 + } + }, + "prompt_2": { + "accuracy": 0.3343711083437111, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.35714285714285715, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3448275862068966, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.125, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.3333333333333333, + "business_administration": 0.34210526315789475, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.42857142857142855, + "high_school_politics": 0.25, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.25, + "logic": 0.3333333333333333, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.2692307692307692, + "sports_science": 0.20833333333333334, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.35185185185185186, + "physician": 0.42592592592592593 + } + }, + "prompt_3": { + "accuracy": 0.32503113325031135, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.38095238095238093, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.3103448275862069, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.125, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.34210526315789475, + "marxism": 0.5, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.16666666666666666, + "logic": 0.37037037037037035, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.52, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.25, + "sports_science": 0.25, + "plant_protection": 0.4074074074074074, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.2962962962962963, + "physician": 0.3888888888888889 + } + }, + "prompt_4": { + "accuracy": 0.30759651307596514, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.35714285714285715, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.125, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.26666666666666666, + "business_administration": 0.3157894736842105, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.3877551020408163, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.3333333333333333, + "law": 0.2413793103448276, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.23529411764705882, + "legal_professional": 0.14285714285714285, + "high_school_chinese": 0.25, + "high_school_history": 0.48, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.28846153846153844, + "sports_science": 0.3333333333333333, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.24074074074074073, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.2222222222222222, + "tax_accountant": 0.3333333333333333, + "physician": 0.4074074074074074 + } + }, + "prompt_5": { + "accuracy": 0.3138231631382316, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.35714285714285715, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.125, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.3, + "business_administration": 0.2894736842105263, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.3448275862068966, + "education_science": 0.47058823529411764, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.2962962962962963, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.44, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.2692307692307692, + "sports_science": 0.2916666666666667, + "plant_protection": 0.37037037037037035, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.2962962962962963, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.25, + "tax_accountant": 0.3333333333333333, + "physician": 0.4074074074074074 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2939068100358423 + }, + "prompt_2": { + "accuracy": 0.33691756272401435 + }, + "prompt_3": { + "accuracy": 0.34408602150537637 + }, + "prompt_4": { + "accuracy": 0.2867383512544803 + }, + "prompt_5": { + "accuracy": 0.3010752688172043 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3240372992574685, + "category_acc": { + "agronomy": 0.3254437869822485, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.27439024390243905, + "arts": 0.31875, + "astronomy": 0.3090909090909091, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.30625, + "chinese_driving_rule": 0.4580152671755725, + "chinese_food_culture": 0.27205882352941174, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.29721362229102166, + "chinese_literature": 0.28431372549019607, + "chinese_teacher_qualification": 0.3240223463687151, + "clinical_knowledge": 0.2869198312236287, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.25, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.3584905660377358, + "college_medicine": 0.31135531135531136, + "computer_science": 0.35294117647058826, + "computer_security": 0.3391812865497076, + "conceptual_physics": 0.2585034013605442, + "construction_project_management": 0.3237410071942446, + "economics": 0.33962264150943394, + "education": 0.3558282208588957, + "electrical_engineering": 0.36046511627906974, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.4495798319327731, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.31851851851851853, + "food_science": 0.34965034965034963, + "genetics": 0.29545454545454547, + "global_facts": 0.2550335570469799, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.25, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.25, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.36363636363636365, + "human_sexuality": 0.3492063492063492, + "international_law": 0.2810810810810811, + "journalism": 0.31976744186046513, + "jurisprudence": 0.35766423357664234, + "legal_and_moral_basis": 0.4766355140186916, + "logical": 0.3008130081300813, + "machine_learning": 0.21311475409836064, + "management": 0.36666666666666664, + "marketing": 0.4222222222222222, + "marxist_theory": 0.32275132275132273, + "modern_chinese": 0.2413793103448276, + "nutrition": 0.36551724137931035, + "philosophy": 0.37142857142857144, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.2579787234042553, + "professional_psychology": 0.3448275862068966, + "public_relations": 0.3620689655172414, + "security_study": 0.34814814814814815, + "sociology": 0.37610619469026546, + "sports_science": 0.3939393939393939, + "traditional_chinese_medicine": 0.2918918918918919, + "virology": 0.33727810650887574, + "world_history": 0.35403726708074534, + "world_religions": 0.3375 + } + }, + "prompt_2": { + "accuracy": 0.33906061129338627, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2865853658536585, + "arts": 0.325, + "astronomy": 0.3212121212121212, + "business_ethics": 0.39712918660287083, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.45038167938931295, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.33436532507739936, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.3854748603351955, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.4205607476635514, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.2777777777777778, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.32967032967032966, + "computer_science": 0.35294117647058826, + "computer_security": 0.3684210526315789, + "conceptual_physics": 0.29931972789115646, + "construction_project_management": 0.3597122302158273, + "economics": 0.34591194968553457, + "education": 0.4171779141104294, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.3333333333333333, + "elementary_information_and_technology": 0.48739495798319327, + "elementary_mathematics": 0.26521739130434785, + "ethnology": 0.2814814814814815, + "food_science": 0.3706293706293706, + "genetics": 0.3125, + "global_facts": 0.26174496644295303, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.3220338983050847, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.34265734265734266, + "human_sexuality": 0.3888888888888889, + "international_law": 0.2594594594594595, + "journalism": 0.3372093023255814, + "jurisprudence": 0.3844282238442822, + "legal_and_moral_basis": 0.5373831775700935, + "logical": 0.3252032520325203, + "machine_learning": 0.21311475409836064, + "management": 0.37142857142857144, + "marketing": 0.40555555555555556, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.1896551724137931, + "nutrition": 0.36551724137931035, + "philosophy": 0.38095238095238093, + "professional_accounting": 0.35428571428571426, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.26595744680851063, + "professional_psychology": 0.34913793103448276, + "public_relations": 0.3793103448275862, + "security_study": 0.35555555555555557, + "sociology": 0.3584070796460177, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.31891891891891894, + "virology": 0.378698224852071, + "world_history": 0.3167701863354037, + "world_religions": 0.35625 + } + }, + "prompt_3": { + "accuracy": 0.34001036090485237, + "category_acc": { + "agronomy": 0.3136094674556213, + "anatomy": 0.3108108108108108, + "ancient_chinese": 0.29878048780487804, + "arts": 0.3375, + "astronomy": 0.3212121212121212, + "business_ethics": 0.39712918660287083, + "chinese_civil_service_exam": 0.3125, + "chinese_driving_rule": 0.4732824427480916, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.3644859813084112, + "chinese_history": 0.33436532507739936, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.3854748603351955, + "clinical_knowledge": 0.26582278481012656, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.25925925925925924, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.3113207547169811, + "college_medicine": 0.3333333333333333, + "computer_science": 0.3382352941176471, + "computer_security": 0.36257309941520466, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.3669064748201439, + "economics": 0.34591194968553457, + "education": 0.37423312883435583, + "electrical_engineering": 0.38372093023255816, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.3282828282828283, + "elementary_information_and_technology": 0.48739495798319327, + "elementary_mathematics": 0.26956521739130435, + "ethnology": 0.2962962962962963, + "food_science": 0.34265734265734266, + "genetics": 0.32386363636363635, + "global_facts": 0.2684563758389262, + "high_school_biology": 0.28402366863905326, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.29878048780487804, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.3776223776223776, + "human_sexuality": 0.3968253968253968, + "international_law": 0.2648648648648649, + "journalism": 0.3488372093023256, + "jurisprudence": 0.38686131386861317, + "legal_and_moral_basis": 0.5467289719626168, + "logical": 0.3089430894308943, + "machine_learning": 0.19672131147540983, + "management": 0.38571428571428573, + "marketing": 0.4111111111111111, + "marxist_theory": 0.37037037037037035, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.4, + "philosophy": 0.38095238095238093, + "professional_accounting": 0.33714285714285713, + "professional_law": 0.33175355450236965, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.3448275862068966, + "public_relations": 0.3850574712643678, + "security_study": 0.3111111111111111, + "sociology": 0.37610619469026546, + "sports_science": 0.41818181818181815, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.40236686390532544, + "world_history": 0.35403726708074534, + "world_religions": 0.325 + } + }, + "prompt_4": { + "accuracy": 0.3205836643066828, + "category_acc": { + "agronomy": 0.33136094674556216, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.2865853658536585, + "arts": 0.3625, + "astronomy": 0.30303030303030304, + "business_ethics": 0.36363636363636365, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.37404580152671757, + "chinese_food_culture": 0.2426470588235294, + "chinese_foreign_policy": 0.29906542056074764, + "chinese_history": 0.3157894736842105, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.3463687150837989, + "clinical_knowledge": 0.29957805907172996, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.24074074074074073, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.29304029304029305, + "computer_science": 0.3480392156862745, + "computer_security": 0.34502923976608185, + "conceptual_physics": 0.2653061224489796, + "construction_project_management": 0.3381294964028777, + "economics": 0.3333333333333333, + "education": 0.36809815950920244, + "electrical_engineering": 0.3313953488372093, + "elementary_chinese": 0.26587301587301587, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.42857142857142855, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.3037037037037037, + "food_science": 0.3356643356643357, + "genetics": 0.2784090909090909, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.2603550295857988, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.30952380952380953, + "international_law": 0.32972972972972975, + "journalism": 0.3546511627906977, + "jurisprudence": 0.3746958637469586, + "legal_and_moral_basis": 0.4719626168224299, + "logical": 0.3170731707317073, + "machine_learning": 0.16393442622950818, + "management": 0.35714285714285715, + "marketing": 0.3888888888888889, + "marxist_theory": 0.328042328042328, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.3310344827586207, + "philosophy": 0.3523809523809524, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.3175355450236967, + "professional_medicine": 0.2553191489361702, + "professional_psychology": 0.3275862068965517, + "public_relations": 0.3448275862068966, + "security_study": 0.34074074074074073, + "sociology": 0.33185840707964603, + "sports_science": 0.3393939393939394, + "traditional_chinese_medicine": 0.2810810810810811, + "virology": 0.30177514792899407, + "world_history": 0.32919254658385094, + "world_religions": 0.31875 + } + }, + "prompt_5": { + "accuracy": 0.3287860473147988, + "category_acc": { + "agronomy": 0.3076923076923077, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.2621951219512195, + "arts": 0.31875, + "astronomy": 0.34545454545454546, + "business_ethics": 0.3875598086124402, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.40458015267175573, + "chinese_food_culture": 0.27205882352941174, + "chinese_foreign_policy": 0.32710280373831774, + "chinese_history": 0.3281733746130031, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.3463687150837989, + "clinical_knowledge": 0.28270042194092826, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.40186915887850466, + "college_engineering_hydrology": 0.3584905660377358, + "college_law": 0.24074074074074073, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.304029304029304, + "computer_science": 0.35784313725490197, + "computer_security": 0.36257309941520466, + "conceptual_physics": 0.272108843537415, + "construction_project_management": 0.381294964028777, + "economics": 0.3522012578616352, + "education": 0.3987730061349693, + "electrical_engineering": 0.3430232558139535, + "elementary_chinese": 0.2896825396825397, + "elementary_commonsense": 0.3181818181818182, + "elementary_information_and_technology": 0.4495798319327731, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.3111111111111111, + "food_science": 0.3146853146853147, + "genetics": 0.3068181818181818, + "global_facts": 0.2483221476510067, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.2865853658536585, + "high_school_physics": 0.3, + "high_school_politics": 0.32167832167832167, + "human_sexuality": 0.3492063492063492, + "international_law": 0.2918918918918919, + "journalism": 0.3313953488372093, + "jurisprudence": 0.36253041362530414, + "legal_and_moral_basis": 0.4672897196261682, + "logical": 0.3252032520325203, + "machine_learning": 0.2459016393442623, + "management": 0.38095238095238093, + "marketing": 0.4111111111111111, + "marxist_theory": 0.32275132275132273, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.36551724137931035, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.3314285714285714, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.26861702127659576, + "professional_psychology": 0.3448275862068966, + "public_relations": 0.39655172413793105, + "security_study": 0.32592592592592595, + "sociology": 0.3495575221238938, + "sports_science": 0.37575757575757573, + "traditional_chinese_medicine": 0.32972972972972975, + "virology": 0.34911242603550297, + "world_history": 0.3105590062111801, + "world_religions": 0.3375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.24242424242424243 + }, + "prompt_3": { + "accuracy": 0.2727272727272727 + }, + "prompt_4": { + "accuracy": 0.15151515151515152 + }, + "prompt_5": { + "accuracy": 0.18181818181818182 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.19318181818181818 + }, + "prompt_2": { + "accuracy": 0.32954545454545453 + }, + "prompt_3": { + "accuracy": 0.3159090909090909 + }, + "prompt_4": { + "accuracy": 0.3 + }, + "prompt_5": { + "accuracy": 0.2863636363636364 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.31389830508474575 + }, + "prompt_2": { + "accuracy": 0.3298305084745763 + }, + "prompt_3": { + "accuracy": 0.3338983050847458 + }, + "prompt_4": { + "accuracy": 0.33661016949152545 + }, + "prompt_5": { + "accuracy": 0.3257627118644068 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5295437546746448 + }, + "prompt_2": { + "accuracy": 0.5706806282722513 + }, + "prompt_3": { + "accuracy": 0.5770381451009723 + }, + "prompt_4": { + "accuracy": 0.537771129394166 + }, + "prompt_5": { + "accuracy": 0.5340314136125655 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6501714845663891 + }, + "prompt_2": { + "accuracy": 0.6369426751592356 + }, + "prompt_3": { + "accuracy": 0.6384125428711416 + }, + "prompt_4": { + "accuracy": 0.6168544830965214 + }, + "prompt_5": { + "accuracy": 0.636452719255267 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.31344115422400337, + "rouge2": 0.1036221896994188, + "rougeL": 0.22854998680082683, + "avg_rouge": 0.21520444357474966 + }, + "prompt_2": { + "rouge1": 0.3188031282758853, + "rouge2": 0.10774315781944968, + "rougeL": 0.23474785108290097, + "avg_rouge": 0.22043137905941199 + }, + "prompt_3": { + "rouge1": 0.3157189009484272, + "rouge2": 0.10289887003712299, + "rougeL": 0.22914599553732648, + "avg_rouge": 0.21592125550762556 + }, + "prompt_4": { + "rouge1": 0.31920791659169884, + "rouge2": 0.10953901151356257, + "rougeL": 0.23476812607836148, + "avg_rouge": 0.2211716847278743 + }, + "prompt_5": { + "rouge1": 0.3246893874303912, + "rouge2": 0.1089580319565158, + "rougeL": 0.23948957715069147, + "avg_rouge": 0.22437899884586612 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.23227916647496788, + "rouge2": 0.05624286819781799, + "rougeL": 0.17358563999298626, + "avg_rouge": 0.15403589155525738 + }, + "prompt_2": { + "rouge1": 0.2452239443600146, + "rouge2": 0.05725498405449709, + "rougeL": 0.185319717122106, + "avg_rouge": 0.1625995485122059 + }, + "prompt_3": { + "rouge1": 0.2388732104452333, + "rouge2": 0.05771735383440788, + "rougeL": 0.18053672581818622, + "avg_rouge": 0.15904243003260912 + }, + "prompt_4": { + "rouge1": 0.16908728895992295, + "rouge2": 0.037116848538535197, + "rougeL": 0.13666684201124235, + "avg_rouge": 0.1142903265032335 + }, + "prompt_5": { + "rouge1": 0.23423702359070647, + "rouge2": 0.05247876339931337, + "rougeL": 0.18282893135054412, + "avg_rouge": 0.15651490611352134 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8153669724770642 + }, + "prompt_2": { + "accuracy": 0.8245412844036697 + }, + "prompt_3": { + "accuracy": 0.801605504587156 + }, + "prompt_4": { + "accuracy": 0.8073394495412844 + }, + "prompt_5": { + "accuracy": 0.8532110091743119 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48609779482262705 + }, + "prompt_2": { + "accuracy": 0.5819750719079578 + }, + "prompt_3": { + "accuracy": 0.6462128475551294 + }, + "prompt_4": { + "accuracy": 0.6462128475551294 + }, + "prompt_5": { + "accuracy": 0.5704697986577181 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.377 + }, + "prompt_2": { + "accuracy": 0.437 + }, + "prompt_3": { + "accuracy": 0.4155 + }, + "prompt_4": { + "accuracy": 0.385 + }, + "prompt_5": { + "accuracy": 0.408 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.362 + }, + "prompt_2": { + "accuracy": 0.347 + }, + "prompt_3": { + "accuracy": 0.358 + }, + "prompt_4": { + "accuracy": 0.3685 + }, + "prompt_5": { + "accuracy": 0.3455 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5895 + }, + "prompt_2": { + "accuracy": 0.5815 + }, + "prompt_3": { + "accuracy": 0.5395 + }, + "prompt_4": { + "accuracy": 0.504 + }, + "prompt_5": { + "accuracy": 0.513 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5211267605633803 + }, + "prompt_2": { + "accuracy": 0.4647887323943662 + }, + "prompt_3": { + "accuracy": 0.4788732394366197 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.49295774647887325 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5126353790613718 + }, + "prompt_2": { + "accuracy": 0.5126353790613718 + }, + "prompt_3": { + "accuracy": 0.5306859205776173 + }, + "prompt_4": { + "accuracy": 0.5270758122743683 + }, + "prompt_5": { + "accuracy": 0.5342960288808665 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4632352941176471 + }, + "prompt_2": { + "accuracy": 0.46078431372549017 + }, + "prompt_3": { + "accuracy": 0.46568627450980393 + }, + "prompt_4": { + "accuracy": 0.46078431372549017 + }, + "prompt_5": { + "accuracy": 0.46568627450980393 + } } }, "five_shot": { @@ -13524,235 +119611,3250 @@ "model_link": "https://huggingface.co/google/gemma-7b", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.5180952380952382, + "language_acc": { + "Vietnamese": 0.4533333333333333, + "English": 0.62, + "Malay": 0.4066666666666667, + "Chinese": 0.49333333333333335, + "Filipino": 0.5933333333333334, + "Indonesian": 0.5, + "Spanish": 0.56 + }, + "consistency_score_2": 0.5625396825396825, + "consistency_score_3": 0.38247619047619047, + "consistency_score_4": 0.2801904761904762, + "consistency_score_5": 0.21396825396825397, + "consistency_score_6": 0.16761904761904764, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.4666666666666667, + "Vietnamese,Malay": 0.5333333333333333, + "Vietnamese,Chinese": 0.44666666666666666, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Spanish": 0.49333333333333335, + "English,Malay": 0.52, + "English,Chinese": 0.6066666666666667, + "English,Filipino": 0.6466666666666666, + "English,Indonesian": 0.6333333333333333, + "English,Spanish": 0.6466666666666666, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.5533333333333333, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.5266666666666666, + "Chinese,Filipino": 0.5866666666666667, + "Chinese,Indonesian": 0.56, + "Chinese,Spanish": 0.6066666666666667, + "Filipino,Indonesian": 0.62, + "Filipino,Spanish": 0.6933333333333334, + "Indonesian,Spanish": 0.6533333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.3333333333333333, + "Vietnamese,English,Chinese": 0.3, + "Vietnamese,English,Filipino": 0.3466666666666667, + "Vietnamese,English,Indonesian": 0.34, + "Vietnamese,English,Spanish": 0.3466666666666667, + "Vietnamese,Malay,Chinese": 0.3, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.32, + "Vietnamese,Chinese,Indonesian": 0.29333333333333333, + "Vietnamese,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian": 0.3466666666666667, + "Vietnamese,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish": 0.36, + "English,Malay,Chinese": 0.3466666666666667, + "English,Malay,Filipino": 0.38666666666666666, + "English,Malay,Indonesian": 0.38666666666666666, + "English,Malay,Spanish": 0.38666666666666666, + "English,Chinese,Filipino": 0.44, + "English,Chinese,Indonesian": 0.44666666666666666, + "English,Chinese,Spanish": 0.4533333333333333, + "English,Filipino,Indonesian": 0.47333333333333333, + "English,Filipino,Spanish": 0.5133333333333333, + "English,Indonesian,Spanish": 0.49333333333333335, + "Malay,Chinese,Filipino": 0.35333333333333333, + "Malay,Chinese,Indonesian": 0.34, + "Malay,Chinese,Spanish": 0.34, + "Malay,Filipino,Indonesian": 0.38666666666666666, + "Malay,Filipino,Spanish": 0.41333333333333333, + "Malay,Indonesian,Spanish": 0.38666666666666666, + "Chinese,Filipino,Indonesian": 0.42, + "Chinese,Filipino,Spanish": 0.47333333333333333, + "Chinese,Indonesian,Spanish": 0.43333333333333335, + "Filipino,Indonesian,Spanish": 0.5066666666666667 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino": 0.26666666666666666, + "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Spanish": 0.26, + "Vietnamese,English,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,English,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,English,Chinese,Spanish": 0.24, + "Vietnamese,English,Filipino,Indonesian": 0.26666666666666666, + "Vietnamese,English,Filipino,Spanish": 0.29333333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.28, + "Vietnamese,Malay,Chinese,Filipino": 0.24, + "Vietnamese,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian": 0.28, + "Vietnamese,Malay,Filipino,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,Chinese,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.3, + "English,Malay,Chinese,Filipino": 0.2733333333333333, + "English,Malay,Chinese,Indonesian": 0.2866666666666667, + "English,Malay,Chinese,Spanish": 0.2733333333333333, + "English,Malay,Filipino,Indonesian": 0.30666666666666664, + "English,Malay,Filipino,Spanish": 0.32, + "English,Malay,Indonesian,Spanish": 0.32, + "English,Chinese,Filipino,Indonesian": 0.34, + "English,Chinese,Filipino,Spanish": 0.36666666666666664, + "English,Chinese,Indonesian,Spanish": 0.34, + "English,Filipino,Indonesian,Spanish": 0.3933333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.26, + "Malay,Chinese,Filipino,Spanish": 0.28, + "Malay,Chinese,Indonesian,Spanish": 0.26666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.32, + "Chinese,Filipino,Indonesian,Spanish": 0.36 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "English,Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, + "English,Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "English,Chinese,Filipino,Indonesian,Spanish": 0.28, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.13333333333333333 + } + }, + "AC3_2": 0.5394035028318483, + "AC3_3": 0.44007412774099774, + "AC3_4": 0.3636927201825941, + "AC3_5": 0.3028587948208935, + "AC3_6": 0.25329100525406706, + "AC3_7": 0.2120857699479498 + }, + "prompt_2": { + "overall_acc": 0.5285714285714286, + "language_acc": { + "Vietnamese": 0.54, + "English": 0.6733333333333333, + "Malay": 0.38666666666666666, + "Chinese": 0.48, + "Filipino": 0.5933333333333334, + "Indonesian": 0.48, + "Spanish": 0.5466666666666666 + }, + "consistency_score_2": 0.5320634920634921, + "consistency_score_3": 0.3546666666666668, + "consistency_score_4": 0.259047619047619, + "consistency_score_5": 0.19873015873015876, + "consistency_score_6": 0.15714285714285717, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.56, + "Vietnamese,Malay": 0.43333333333333335, + "Vietnamese,Chinese": 0.5066666666666667, + "Vietnamese,Filipino": 0.6, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,Spanish": 0.54, + "English,Malay": 0.4533333333333333, + "English,Chinese": 0.49333333333333335, + "English,Filipino": 0.62, + "English,Indonesian": 0.58, + "English,Spanish": 0.62, + "Malay,Chinese": 0.5266666666666666, + "Malay,Filipino": 0.49333333333333335, + "Malay,Indonesian": 0.5266666666666666, + "Malay,Spanish": 0.47333333333333333, + "Chinese,Filipino": 0.49333333333333335, + "Chinese,Indonesian": 0.47333333333333333, + "Chinese,Spanish": 0.43333333333333335, + "Filipino,Indonesian": 0.6333333333333333, + "Filipino,Spanish": 0.6066666666666667, + "Indonesian,Spanish": 0.5733333333333334 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.29333333333333333, + "Vietnamese,English,Chinese": 0.34, + "Vietnamese,English,Filipino": 0.43333333333333335, + "Vietnamese,English,Indonesian": 0.4, + "Vietnamese,English,Spanish": 0.42, + "Vietnamese,Malay,Chinese": 0.3, + "Vietnamese,Malay,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian": 0.3, + "Vietnamese,Malay,Spanish": 0.31333333333333335, + "Vietnamese,Chinese,Filipino": 0.4, + "Vietnamese,Chinese,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian": 0.43333333333333335, + "Vietnamese,Filipino,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian,Spanish": 0.38, + "English,Malay,Chinese": 0.3, + "English,Malay,Filipino": 0.32666666666666666, + "English,Malay,Indonesian": 0.32, + "English,Malay,Spanish": 0.32666666666666666, + "English,Chinese,Filipino": 0.36666666666666664, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.32666666666666666, + "English,Filipino,Indonesian": 0.46, + "English,Filipino,Spanish": 0.46, + "English,Indonesian,Spanish": 0.43333333333333335, + "Malay,Chinese,Filipino": 0.31333333333333335, + "Malay,Chinese,Indonesian": 0.30666666666666664, + "Malay,Chinese,Spanish": 0.2866666666666667, + "Malay,Filipino,Indonesian": 0.3466666666666667, + "Malay,Filipino,Spanish": 0.35333333333333333, + "Malay,Indonesian,Spanish": 0.3333333333333333, + "Chinese,Filipino,Indonesian": 0.36666666666666664, + "Chinese,Filipino,Spanish": 0.3466666666666667, + "Chinese,Indonesian,Spanish": 0.29333333333333333, + "Filipino,Indonesian,Spanish": 0.43333333333333335 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.23333333333333334, + "Vietnamese,English,Malay,Spanish": 0.24, + "Vietnamese,English,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,English,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,English,Chinese,Spanish": 0.26, + "Vietnamese,English,Filipino,Indonesian": 0.3466666666666667, + "Vietnamese,English,Filipino,Spanish": 0.3333333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.32, + "Vietnamese,Malay,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian": 0.24, + "Vietnamese,Malay,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,Chinese,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.32, + "English,Malay,Chinese,Filipino": 0.24, + "English,Malay,Chinese,Indonesian": 0.2, + "English,Malay,Chinese,Spanish": 0.22, + "English,Malay,Filipino,Indonesian": 0.25333333333333335, + "English,Malay,Filipino,Spanish": 0.28, + "English,Malay,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian": 0.2866666666666667, + "English,Chinese,Filipino,Spanish": 0.28, + "English,Chinese,Indonesian,Spanish": 0.24, + "English,Filipino,Indonesian,Spanish": 0.36, + "Malay,Chinese,Filipino,Indonesian": 0.23333333333333334, + "Malay,Chinese,Filipino,Spanish": 0.24, + "Malay,Chinese,Indonesian,Spanish": 0.2, + "Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.2, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.2, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.2, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.28, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "English,Malay,Chinese,Filipino,Indonesian": 0.18, + "English,Malay,Chinese,Filipino,Spanish": 0.2, + "English,Malay,Chinese,Indonesian,Spanish": 0.16, + "English,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.22, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + } + }, + "AC3_2": 0.5303117115846695, + "AC3_3": 0.4244985981835537, + "AC3_4": 0.3476939021842192, + "AC3_5": 0.2888570181058222, + "AC3_6": 0.24226190472657494, + "AC3_7": 0.20436046508509034 + }, + "prompt_3": { + "overall_acc": 0.5114285714285715, + "language_acc": { + "Vietnamese": 0.4866666666666667, + "English": 0.6, + "Malay": 0.4066666666666667, + "Chinese": 0.44, + "Filipino": 0.58, + "Indonesian": 0.48, + "Spanish": 0.5866666666666667 + }, + "consistency_score_2": 0.5193650793650794, + "consistency_score_3": 0.33352380952380956, + "consistency_score_4": 0.24076190476190473, + "consistency_score_5": 0.18666666666666668, + "consistency_score_6": 0.15142857142857144, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.44, + "Vietnamese,Malay": 0.5866666666666667, + "Vietnamese,Chinese": 0.52, + "Vietnamese,Filipino": 0.5466666666666666, + "Vietnamese,Indonesian": 0.5533333333333333, + "Vietnamese,Spanish": 0.5, + "English,Malay": 0.47333333333333333, + "English,Chinese": 0.4666666666666667, + "English,Filipino": 0.6133333333333333, + "English,Indonesian": 0.4866666666666667, + "English,Spanish": 0.6133333333333333, + "Malay,Chinese": 0.47333333333333333, + "Malay,Filipino": 0.48, + "Malay,Indonesian": 0.5466666666666666, + "Malay,Spanish": 0.48, + "Chinese,Filipino": 0.44666666666666666, + "Chinese,Indonesian": 0.4533333333333333, + "Chinese,Spanish": 0.47333333333333333, + "Filipino,Indonesian": 0.58, + "Filipino,Spanish": 0.6333333333333333, + "Indonesian,Spanish": 0.54 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.30666666666666664, + "Vietnamese,English,Chinese": 0.3, + "Vietnamese,English,Filipino": 0.35333333333333333, + "Vietnamese,English,Indonesian": 0.29333333333333333, + "Vietnamese,English,Spanish": 0.32, + "Vietnamese,Malay,Chinese": 0.3466666666666667, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.38, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian": 0.38, + "Vietnamese,Filipino,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian,Spanish": 0.3333333333333333, + "English,Malay,Chinese": 0.2733333333333333, + "English,Malay,Filipino": 0.34, + "English,Malay,Indonesian": 0.30666666666666664, + "English,Malay,Spanish": 0.3333333333333333, + "English,Chinese,Filipino": 0.32, + "English,Chinese,Indonesian": 0.26666666666666666, + "English,Chinese,Spanish": 0.3333333333333333, + "English,Filipino,Indonesian": 0.38666666666666666, + "English,Filipino,Spanish": 0.4666666666666667, + "English,Indonesian,Spanish": 0.38, + "Malay,Chinese,Filipino": 0.28, + "Malay,Chinese,Indonesian": 0.3, + "Malay,Chinese,Spanish": 0.28, + "Malay,Filipino,Indonesian": 0.3333333333333333, + "Malay,Filipino,Spanish": 0.36, + "Malay,Indonesian,Spanish": 0.32666666666666666, + "Chinese,Filipino,Indonesian": 0.32, + "Chinese,Filipino,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,Spanish": 0.4 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.22, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.24, + "Vietnamese,English,Malay,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,English,Chinese,Indonesian": 0.2, + "Vietnamese,English,Chinese,Spanish": 0.24, + "Vietnamese,English,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,English,Filipino,Spanish": 0.28, + "Vietnamese,English,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Spanish": 0.26, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.24, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Chinese,Indonesian": 0.2, + "English,Malay,Chinese,Spanish": 0.22, + "English,Malay,Filipino,Indonesian": 0.26, + "English,Malay,Filipino,Spanish": 0.28, + "English,Malay,Indonesian,Spanish": 0.24666666666666667, + "English,Chinese,Filipino,Indonesian": 0.22, + "English,Chinese,Filipino,Spanish": 0.26666666666666666, + "English,Chinese,Indonesian,Spanish": 0.22, + "English,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.22, + "Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Spanish": 0.18, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Chinese,Filipino,Indonesian": 0.17333333333333334, + "English,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Filipino,Indonesian,Spanish": 0.22, + "English,Chinese,Filipino,Indonesian,Spanish": 0.18, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.14, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.14, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + } + }, + "AC3_2": 0.5153662721133185, + "AC3_3": 0.4037472626084062, + "AC3_4": 0.3273971710305254, + "AC3_5": 0.27350613911498206, + "AC3_6": 0.23366995070366434, + "AC3_7": 0.2030447760875825 + }, + "prompt_4": { + "overall_acc": 0.54, + "language_acc": { + "Vietnamese": 0.5066666666666667, + "English": 0.6066666666666667, + "Malay": 0.4666666666666667, + "Chinese": 0.48, + "Filipino": 0.6066666666666667, + "Indonesian": 0.52, + "Spanish": 0.5933333333333334 + }, + "consistency_score_2": 0.5647619047619047, + "consistency_score_3": 0.3931428571428571, + "consistency_score_4": 0.2948571428571429, + "consistency_score_5": 0.22984126984126982, + "consistency_score_6": 0.18476190476190474, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.5, + "Vietnamese,Malay": 0.5533333333333333, + "Vietnamese,Chinese": 0.48, + "Vietnamese,Filipino": 0.56, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Spanish": 0.48, + "English,Malay": 0.5266666666666666, + "English,Chinese": 0.5533333333333333, + "English,Filipino": 0.6733333333333333, + "English,Indonesian": 0.6, + "English,Spanish": 0.6866666666666666, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.5266666666666666, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.52, + "Chinese,Filipino": 0.6533333333333333, + "Chinese,Indonesian": 0.54, + "Chinese,Spanish": 0.5866666666666667, + "Filipino,Indonesian": 0.6466666666666666, + "Filipino,Spanish": 0.6466666666666666, + "Indonesian,Spanish": 0.6 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.35333333333333333, + "Vietnamese,English,Chinese": 0.32666666666666666, + "Vietnamese,English,Filipino": 0.4, + "Vietnamese,English,Indonesian": 0.35333333333333333, + "Vietnamese,English,Spanish": 0.38, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Malay,Spanish": 0.34, + "Vietnamese,Chinese,Filipino": 0.3933333333333333, + "Vietnamese,Chinese,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Filipino,Indonesian": 0.41333333333333333, + "Vietnamese,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish": 0.3466666666666667, + "English,Malay,Chinese": 0.34, + "English,Malay,Filipino": 0.4066666666666667, + "English,Malay,Indonesian": 0.36, + "English,Malay,Spanish": 0.4066666666666667, + "English,Chinese,Filipino": 0.49333333333333335, + "English,Chinese,Indonesian": 0.3933333333333333, + "English,Chinese,Spanish": 0.4533333333333333, + "English,Filipino,Indonesian": 0.49333333333333335, + "English,Filipino,Spanish": 0.5266666666666666, + "English,Indonesian,Spanish": 0.47333333333333333, + "Malay,Chinese,Filipino": 0.37333333333333335, + "Malay,Chinese,Indonesian": 0.32666666666666666, + "Malay,Chinese,Spanish": 0.3466666666666667, + "Malay,Filipino,Indonesian": 0.3933333333333333, + "Malay,Filipino,Spanish": 0.3933333333333333, + "Malay,Indonesian,Spanish": 0.36, + "Chinese,Filipino,Indonesian": 0.4666666666666667, + "Chinese,Filipino,Spanish": 0.49333333333333335, + "Chinese,Indonesian,Spanish": 0.41333333333333333, + "Filipino,Indonesian,Spanish": 0.4866666666666667 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.24, + "Vietnamese,English,Malay,Filipino": 0.2866666666666667, + "Vietnamese,English,Malay,Indonesian": 0.25333333333333335, + "Vietnamese,English,Malay,Spanish": 0.29333333333333333, + "Vietnamese,English,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,English,Chinese,Indonesian": 0.24, + "Vietnamese,English,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,English,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,English,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,English,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Chinese,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.3, + "Vietnamese,Chinese,Filipino,Spanish": 0.30666666666666664, + "Vietnamese,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.32, + "English,Malay,Chinese,Filipino": 0.30666666666666664, + "English,Malay,Chinese,Indonesian": 0.23333333333333334, + "English,Malay,Chinese,Spanish": 0.29333333333333333, + "English,Malay,Filipino,Indonesian": 0.30666666666666664, + "English,Malay,Filipino,Spanish": 0.3466666666666667, + "English,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Chinese,Filipino,Indonesian": 0.37333333333333335, + "English,Chinese,Filipino,Spanish": 0.42, + "English,Chinese,Indonesian,Spanish": 0.32666666666666666, + "English,Filipino,Indonesian,Spanish": 0.4, + "Malay,Chinese,Filipino,Indonesian": 0.28, + "Malay,Chinese,Filipino,Spanish": 0.3, + "Malay,Chinese,Indonesian,Spanish": 0.24, + "Malay,Filipino,Indonesian,Spanish": 0.3, + "Chinese,Filipino,Indonesian,Spanish": 0.37333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.22, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,English,Malay,Filipino,Spanish": 0.26, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.26, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Filipino,Spanish": 0.2733333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.2, + "English,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332 + } + }, + "AC3_2": 0.5521034482258871, + "AC3_3": 0.45501530919802347, + "AC3_4": 0.3814373716175554, + "AC3_5": 0.3224412370715179, + "AC3_6": 0.2753219447714733, + "AC3_7": 0.23884615381170488 + }, + "prompt_5": { + "overall_acc": 0.5838095238095239, + "language_acc": { + "Vietnamese": 0.5333333333333333, + "English": 0.64, + "Malay": 0.5866666666666667, + "Chinese": 0.5733333333333334, + "Filipino": 0.6, + "Indonesian": 0.5733333333333334, + "Spanish": 0.58 + }, + "consistency_score_2": 0.654285714285714, + "consistency_score_3": 0.504, + "consistency_score_4": 0.4072380952380952, + "consistency_score_5": 0.33682539682539675, + "consistency_score_6": 0.28285714285714286, + "consistency_score_7": 0.24, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.54, + "Vietnamese,Malay": 0.5733333333333334, + "Vietnamese,Chinese": 0.58, + "Vietnamese,Filipino": 0.5933333333333334, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,Spanish": 0.58, + "English,Malay": 0.7133333333333334, + "English,Chinese": 0.64, + "English,Filipino": 0.6733333333333333, + "English,Indonesian": 0.7, + "English,Spanish": 0.7666666666666667, + "Malay,Chinese": 0.6533333333333333, + "Malay,Filipino": 0.64, + "Malay,Indonesian": 0.7333333333333333, + "Malay,Spanish": 0.7333333333333333, + "Chinese,Filipino": 0.6533333333333333, + "Chinese,Indonesian": 0.6533333333333333, + "Chinese,Spanish": 0.6866666666666666, + "Filipino,Indonesian": 0.7, + "Filipino,Spanish": 0.66, + "Indonesian,Spanish": 0.7 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.4533333333333333, + "Vietnamese,English,Chinese": 0.41333333333333333, + "Vietnamese,English,Filipino": 0.43333333333333335, + "Vietnamese,English,Indonesian": 0.4266666666666667, + "Vietnamese,English,Spanish": 0.4533333333333333, + "Vietnamese,Malay,Chinese": 0.4533333333333333, + "Vietnamese,Malay,Filipino": 0.44, + "Vietnamese,Malay,Indonesian": 0.46, + "Vietnamese,Malay,Spanish": 0.4666666666666667, + "Vietnamese,Chinese,Filipino": 0.4533333333333333, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,Spanish": 0.46, + "Vietnamese,Filipino,Indonesian": 0.4533333333333333, + "Vietnamese,Filipino,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian,Spanish": 0.44, + "English,Malay,Chinese": 0.52, + "English,Malay,Filipino": 0.5333333333333333, + "English,Malay,Indonesian": 0.5866666666666667, + "English,Malay,Spanish": 0.62, + "English,Chinese,Filipino": 0.5066666666666667, + "English,Chinese,Indonesian": 0.5133333333333333, + "English,Chinese,Spanish": 0.5533333333333333, + "English,Filipino,Indonesian": 0.56, + "English,Filipino,Spanish": 0.58, + "English,Indonesian,Spanish": 0.5866666666666667, + "Malay,Chinese,Filipino": 0.5, + "Malay,Chinese,Indonesian": 0.54, + "Malay,Chinese,Spanish": 0.56, + "Malay,Filipino,Indonesian": 0.54, + "Malay,Filipino,Spanish": 0.5333333333333333, + "Malay,Indonesian,Spanish": 0.5933333333333334, + "Chinese,Filipino,Indonesian": 0.5266666666666666, + "Chinese,Filipino,Spanish": 0.52, + "Chinese,Indonesian,Spanish": 0.5333333333333333, + "Filipino,Indonesian,Spanish": 0.5466666666666666 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.36, + "Vietnamese,English,Malay,Filipino": 0.36, + "Vietnamese,English,Malay,Indonesian": 0.38, + "Vietnamese,English,Malay,Spanish": 0.3933333333333333, + "Vietnamese,English,Chinese,Filipino": 0.36666666666666664, + "Vietnamese,English,Chinese,Indonesian": 0.34, + "Vietnamese,English,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,English,Filipino,Indonesian": 0.36, + "Vietnamese,English,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,English,Indonesian,Spanish": 0.36, + "Vietnamese,Malay,Chinese,Filipino": 0.38, + "Vietnamese,Malay,Chinese,Indonesian": 0.37333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.38666666666666666, + "Vietnamese,Malay,Filipino,Indonesian": 0.36666666666666664, + "Vietnamese,Malay,Filipino,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,Spanish": 0.37333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,Filipino,Spanish": 0.38, + "Vietnamese,Chinese,Indonesian,Spanish": 0.36666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish": 0.36666666666666664, + "English,Malay,Chinese,Filipino": 0.42, + "English,Malay,Chinese,Indonesian": 0.44666666666666666, + "English,Malay,Chinese,Spanish": 0.48, + "English,Malay,Filipino,Indonesian": 0.4666666666666667, + "English,Malay,Filipino,Spanish": 0.49333333333333335, + "English,Malay,Indonesian,Spanish": 0.5266666666666666, + "English,Chinese,Filipino,Indonesian": 0.4266666666666667, + "English,Chinese,Filipino,Spanish": 0.44666666666666666, + "English,Chinese,Indonesian,Spanish": 0.44666666666666666, + "English,Filipino,Indonesian,Spanish": 0.4866666666666667, + "Malay,Chinese,Filipino,Indonesian": 0.4266666666666667, + "Malay,Chinese,Filipino,Spanish": 0.43333333333333335, + "Malay,Chinese,Indonesian,Spanish": 0.4666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.46, + "Chinese,Filipino,Indonesian,Spanish": 0.4266666666666667 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.30666666666666664, + "Vietnamese,English,Malay,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.3333333333333333, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.32666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.3, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.3, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.32, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.32, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.30666666666666664, + "English,Malay,Chinese,Filipino,Indonesian": 0.36666666666666664, + "English,Malay,Chinese,Filipino,Spanish": 0.3933333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.41333333333333333, + "English,Malay,Filipino,Indonesian,Spanish": 0.43333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.37333333333333335, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.36666666666666664 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.34 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.24 + } + }, + "AC3_2": 0.6170417581919202, + "AC3_3": 0.5409770617608103, + "AC3_4": 0.47979425784309665, + "AC3_5": 0.4271875204790501, + "AC3_6": 0.3810800627503778, + "AC3_7": 0.3401618496696913 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.46185064935064934, + "language_acc": { + "Spanish": 0.4772727272727273, + "Chinese": 0.4431818181818182, + "Vietnamese": 0.4943181818181818, + "Indonesian": 0.42045454545454547, + "Malay": 0.4375, + "Filipino": 0.4772727272727273, + "English": 0.48295454545454547 + }, + "consistency_score_2": 0.5641233766233766, + "consistency_score_3": 0.3926948051948052, + "consistency_score_4": 0.2967532467532467, + "consistency_score_5": 0.2343073593073593, + "consistency_score_6": 0.19074675324675325, + "consistency_score_7": 0.1590909090909091, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4715909090909091, + "Spanish,Vietnamese": 0.5738636363636364, + "Spanish,Indonesian": 0.5795454545454546, + "Spanish,Malay": 0.5965909090909091, + "Spanish,Filipino": 0.625, + "Spanish,English": 0.6079545454545454, + "Chinese,Vietnamese": 0.45454545454545453, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Malay": 0.5227272727272727, + "Chinese,Filipino": 0.5170454545454546, + "Chinese,English": 0.45454545454545453, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Malay": 0.5113636363636364, + "Vietnamese,Filipino": 0.5965909090909091, + "Vietnamese,English": 0.5852272727272727, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Filipino": 0.6420454545454546, + "Indonesian,English": 0.5795454545454546, + "Malay,Filipino": 0.6534090909090909, + "Malay,English": 0.5795454545454546, + "Filipino,English": 0.625 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Indonesian": 0.3181818181818182, + "Spanish,Chinese,Malay": 0.3465909090909091, + "Spanish,Chinese,Filipino": 0.35795454545454547, + "Spanish,Chinese,English": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian": 0.42613636363636365, + "Spanish,Vietnamese,Malay": 0.38636363636363635, + "Spanish,Vietnamese,Filipino": 0.4431818181818182, + "Spanish,Vietnamese,English": 0.4147727272727273, + "Spanish,Indonesian,Malay": 0.44886363636363635, + "Spanish,Indonesian,Filipino": 0.4715909090909091, + "Spanish,Indonesian,English": 0.42045454545454547, + "Spanish,Malay,Filipino": 0.4659090909090909, + "Spanish,Malay,English": 0.42613636363636365, + "Spanish,Filipino,English": 0.45454545454545453, + "Chinese,Vietnamese,Indonesian": 0.30113636363636365, + "Chinese,Vietnamese,Malay": 0.3181818181818182, + "Chinese,Vietnamese,Filipino": 0.3465909090909091, + "Chinese,Vietnamese,English": 0.3125, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino": 0.3522727272727273, + "Chinese,Indonesian,English": 0.3068181818181818, + "Chinese,Malay,Filipino": 0.39204545454545453, + "Chinese,Malay,English": 0.3465909090909091, + "Chinese,Filipino,English": 0.3409090909090909, + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Filipino": 0.4602272727272727, + "Vietnamese,Indonesian,English": 0.4090909090909091, + "Vietnamese,Malay,Filipino": 0.42045454545454547, + "Vietnamese,Malay,English": 0.3806818181818182, + "Vietnamese,Filipino,English": 0.4375, + "Indonesian,Malay,Filipino": 0.4943181818181818, + "Indonesian,Malay,English": 0.42613636363636365, + "Indonesian,Filipino,English": 0.4659090909090909, + "Malay,Filipino,English": 0.4659090909090909 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Malay": 0.24431818181818182, + "Spanish,Chinese,Vietnamese,Filipino": 0.26136363636363635, + "Spanish,Chinese,Vietnamese,English": 0.2215909090909091, + "Spanish,Chinese,Indonesian,Malay": 0.2784090909090909, + "Spanish,Chinese,Indonesian,Filipino": 0.2840909090909091, + "Spanish,Chinese,Indonesian,English": 0.23863636363636365, + "Spanish,Chinese,Malay,Filipino": 0.30113636363636365, + "Spanish,Chinese,Malay,English": 0.26136363636363635, + "Spanish,Chinese,Filipino,English": 0.26136363636363635, + "Spanish,Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Spanish,Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Spanish,Vietnamese,Indonesian,English": 0.32386363636363635, + "Spanish,Vietnamese,Malay,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,Malay,English": 0.2897727272727273, + "Spanish,Vietnamese,Filipino,English": 0.3352272727272727, + "Spanish,Indonesian,Malay,Filipino": 0.38636363636363635, + "Spanish,Indonesian,Malay,English": 0.3352272727272727, + "Spanish,Indonesian,Filipino,English": 0.35795454545454547, + "Spanish,Malay,Filipino,English": 0.3522727272727273, + "Chinese,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "Chinese,Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "Chinese,Vietnamese,Indonesian,English": 0.2159090909090909, + "Chinese,Vietnamese,Malay,Filipino": 0.2784090909090909, + "Chinese,Vietnamese,Malay,English": 0.24431818181818182, + "Chinese,Vietnamese,Filipino,English": 0.2556818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.3125, + "Chinese,Indonesian,Malay,English": 0.2727272727272727, + "Chinese,Indonesian,Filipino,English": 0.2556818181818182, + "Chinese,Malay,Filipino,English": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.35795454545454547, + "Vietnamese,Indonesian,Malay,English": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino,English": 0.3465909090909091, + "Vietnamese,Malay,Filipino,English": 0.32954545454545453, + "Indonesian,Malay,Filipino,English": 0.3693181818181818 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.1875, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Spanish,Chinese,Indonesian,Malay,English": 0.2215909090909091, + "Spanish,Chinese,Indonesian,Filipino,English": 0.21022727272727273, + "Spanish,Chinese,Malay,Filipino,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.3068181818181818, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.26136363636363635, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2840909090909091, + "Spanish,Vietnamese,Malay,Filipino,English": 0.25, + "Spanish,Indonesian,Malay,Filipino,English": 0.29545454545454547, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.19886363636363635, + "Chinese,Vietnamese,Malay,Filipino,English": 0.2215909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2784090909090909 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1875 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + } + }, + "AC3_2": 0.5078895589674293, + "AC3_3": 0.42447443793635364, + "AC3_4": 0.36133660898852776, + "AC3_5": 0.3108920810572946, + "AC3_6": 0.2699873198526651, + "AC3_7": 0.23666072485790582 + }, + "prompt_2": { + "overall_acc": 0.4837662337662338, + "language_acc": { + "Spanish": 0.4943181818181818, + "Chinese": 0.4147727272727273, + "Vietnamese": 0.5, + "Indonesian": 0.4715909090909091, + "Malay": 0.4772727272727273, + "Filipino": 0.4715909090909091, + "English": 0.5568181818181818 + }, + "consistency_score_2": 0.5446428571428573, + "consistency_score_3": 0.3696428571428572, + "consistency_score_4": 0.26801948051948055, + "consistency_score_5": 0.19994588744588743, + "consistency_score_6": 0.15097402597402595, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4090909090909091, + "Spanish,Vietnamese": 0.5681818181818182, + "Spanish,Indonesian": 0.5454545454545454, + "Spanish,Malay": 0.6079545454545454, + "Spanish,Filipino": 0.6136363636363636, + "Spanish,English": 0.6363636363636364, + "Chinese,Vietnamese": 0.4147727272727273, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Malay": 0.375, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,English": 0.4375, + "Vietnamese,Indonesian": 0.5625, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Filipino": 0.5625, + "Vietnamese,English": 0.6193181818181818, + "Indonesian,Malay": 0.6534090909090909, + "Indonesian,Filipino": 0.5795454545454546, + "Indonesian,English": 0.6363636363636364, + "Malay,Filipino": 0.5909090909090909, + "Malay,English": 0.6306818181818182, + "Filipino,English": 0.6306818181818182 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2840909090909091, + "Spanish,Chinese,Indonesian": 0.26136363636363635, + "Spanish,Chinese,Malay": 0.2784090909090909, + "Spanish,Chinese,Filipino": 0.2727272727272727, + "Spanish,Chinese,English": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian": 0.3977272727272727, + "Spanish,Vietnamese,Malay": 0.4147727272727273, + "Spanish,Vietnamese,Filipino": 0.4034090909090909, + "Spanish,Vietnamese,English": 0.4431818181818182, + "Spanish,Indonesian,Malay": 0.4375, + "Spanish,Indonesian,Filipino": 0.4034090909090909, + "Spanish,Indonesian,English": 0.44886363636363635, + "Spanish,Malay,Filipino": 0.4318181818181818, + "Spanish,Malay,English": 0.4715909090909091, + "Spanish,Filipino,English": 0.4715909090909091, + "Chinese,Vietnamese,Indonesian": 0.2897727272727273, + "Chinese,Vietnamese,Malay": 0.26136363636363635, + "Chinese,Vietnamese,Filipino": 0.2556818181818182, + "Chinese,Vietnamese,English": 0.2897727272727273, + "Chinese,Indonesian,Malay": 0.29545454545454547, + "Chinese,Indonesian,Filipino": 0.26704545454545453, + "Chinese,Indonesian,English": 0.30113636363636365, + "Chinese,Malay,Filipino": 0.25, + "Chinese,Malay,English": 0.2897727272727273, + "Chinese,Filipino,English": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Indonesian,Filipino": 0.39204545454545453, + "Vietnamese,Indonesian,English": 0.44886363636363635, + "Vietnamese,Malay,Filipino": 0.3977272727272727, + "Vietnamese,Malay,English": 0.44886363636363635, + "Vietnamese,Filipino,English": 0.4375, + "Indonesian,Malay,Filipino": 0.44886363636363635, + "Indonesian,Malay,English": 0.4943181818181818, + "Indonesian,Filipino,English": 0.44886363636363635, + "Malay,Filipino,English": 0.4659090909090909 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,Malay": 0.20454545454545456, + "Spanish,Chinese,Vietnamese,Filipino": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.21022727272727273, + "Spanish,Chinese,Indonesian,Filipino": 0.18181818181818182, + "Spanish,Chinese,Indonesian,English": 0.23295454545454544, + "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Malay,English": 0.23863636363636365, + "Spanish,Chinese,Filipino,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Spanish,Vietnamese,Indonesian,English": 0.3409090909090909, + "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, + "Spanish,Vietnamese,Malay,English": 0.3465909090909091, + "Spanish,Vietnamese,Filipino,English": 0.3409090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.3352272727272727, + "Spanish,Indonesian,Malay,English": 0.3806818181818182, + "Spanish,Indonesian,Filipino,English": 0.3465909090909091, + "Spanish,Malay,Filipino,English": 0.36363636363636365, + "Chinese,Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "Chinese,Vietnamese,Indonesian,English": 0.22727272727272727, + "Chinese,Vietnamese,Malay,Filipino": 0.1875, + "Chinese,Vietnamese,Malay,English": 0.2159090909090909, + "Chinese,Vietnamese,Filipino,English": 0.20454545454545456, + "Chinese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Malay,English": 0.23863636363636365, + "Chinese,Indonesian,Filipino,English": 0.2159090909090909, + "Chinese,Malay,Filipino,English": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.3181818181818182, + "Vietnamese,Indonesian,Malay,English": 0.36363636363636365, + "Vietnamese,Indonesian,Filipino,English": 0.32386363636363635, + "Vietnamese,Malay,Filipino,English": 0.3409090909090909, + "Indonesian,Malay,Filipino,English": 0.3806818181818182 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Indonesian,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Malay,Filipino,English": 0.17045454545454544, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2556818181818182, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2897727272727273, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.26136363636363635, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino,English": 0.30113636363636365, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2784090909090909 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.125, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + } + }, + "AC3_2": 0.5124027511058862, + "AC3_3": 0.41907388782828214, + "AC3_4": 0.3449354575002427, + "AC3_5": 0.2829467722207821, + "AC3_6": 0.230129205796275, + "AC3_7": 0.18404150194547764 + }, + "prompt_3": { + "overall_acc": 0.45860389610389607, + "language_acc": { + "Spanish": 0.48863636363636365, + "Chinese": 0.39204545454545453, + "Vietnamese": 0.4318181818181818, + "Indonesian": 0.4943181818181818, + "Malay": 0.4602272727272727, + "Filipino": 0.4318181818181818, + "English": 0.5113636363636364 + }, + "consistency_score_2": 0.5462662337662338, + "consistency_score_3": 0.3678571428571428, + "consistency_score_4": 0.2683441558441559, + "consistency_score_5": 0.20535714285714285, + "consistency_score_6": 0.16396103896103895, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.44886363636363635, + "Spanish,Vietnamese": 0.5340909090909091, + "Spanish,Indonesian": 0.5284090909090909, + "Spanish,Malay": 0.5852272727272727, + "Spanish,Filipino": 0.6420454545454546, + "Spanish,English": 0.6534090909090909, + "Chinese,Vietnamese": 0.44886363636363635, + "Chinese,Indonesian": 0.44886363636363635, + "Chinese,Malay": 0.4147727272727273, + "Chinese,Filipino": 0.44886363636363635, + "Chinese,English": 0.4602272727272727, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,Filipino": 0.5681818181818182, + "Vietnamese,English": 0.5795454545454546, + "Indonesian,Malay": 0.5909090909090909, + "Indonesian,Filipino": 0.5795454545454546, + "Indonesian,English": 0.5852272727272727, + "Malay,Filipino": 0.625, + "Malay,English": 0.6306818181818182, + "Filipino,English": 0.6477272727272727 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Spanish,Chinese,Indonesian": 0.2727272727272727, + "Spanish,Chinese,Malay": 0.2897727272727273, + "Spanish,Chinese,Filipino": 0.3181818181818182, + "Spanish,Chinese,English": 0.3352272727272727, + "Spanish,Vietnamese,Indonesian": 0.35795454545454547, + "Spanish,Vietnamese,Malay": 0.3693181818181818, + "Spanish,Vietnamese,Filipino": 0.4034090909090909, + "Spanish,Vietnamese,English": 0.4147727272727273, + "Spanish,Indonesian,Malay": 0.3977272727272727, + "Spanish,Indonesian,Filipino": 0.4034090909090909, + "Spanish,Indonesian,English": 0.42613636363636365, + "Spanish,Malay,Filipino": 0.4602272727272727, + "Spanish,Malay,English": 0.4659090909090909, + "Spanish,Filipino,English": 0.4943181818181818, + "Chinese,Vietnamese,Indonesian": 0.2840909090909091, + "Chinese,Vietnamese,Malay": 0.26704545454545453, + "Chinese,Vietnamese,Filipino": 0.2840909090909091, + "Chinese,Vietnamese,English": 0.3068181818181818, + "Chinese,Indonesian,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino": 0.2840909090909091, + "Chinese,Indonesian,English": 0.3068181818181818, + "Chinese,Malay,Filipino": 0.30113636363636365, + "Chinese,Malay,English": 0.3125, + "Chinese,Filipino,English": 0.3409090909090909, + "Vietnamese,Indonesian,Malay": 0.38636363636363635, + "Vietnamese,Indonesian,Filipino": 0.38636363636363635, + "Vietnamese,Indonesian,English": 0.39204545454545453, + "Vietnamese,Malay,Filipino": 0.42045454545454547, + "Vietnamese,Malay,English": 0.4090909090909091, + "Vietnamese,Filipino,English": 0.42613636363636365, + "Indonesian,Malay,Filipino": 0.42613636363636365, + "Indonesian,Malay,English": 0.4375, + "Indonesian,Filipino,English": 0.4375, + "Malay,Filipino,English": 0.48863636363636365 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Malay": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino": 0.22727272727272727, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.22727272727272727, + "Spanish,Chinese,Malay,Filipino": 0.24431818181818182, + "Spanish,Chinese,Malay,English": 0.26136363636363635, + "Spanish,Chinese,Filipino,English": 0.2727272727272727, + "Spanish,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,Indonesian,English": 0.29545454545454547, + "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, + "Spanish,Vietnamese,Malay,English": 0.3181818181818182, + "Spanish,Vietnamese,Filipino,English": 0.32954545454545453, + "Spanish,Indonesian,Malay,Filipino": 0.32386363636363635, + "Spanish,Indonesian,Malay,English": 0.3352272727272727, + "Spanish,Indonesian,Filipino,English": 0.3409090909090909, + "Spanish,Malay,Filipino,English": 0.3977272727272727, + "Chinese,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,English": 0.23295454545454544, + "Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Chinese,Vietnamese,Malay,English": 0.22727272727272727, + "Chinese,Vietnamese,Filipino,English": 0.24431818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Chinese,Indonesian,Malay,English": 0.2159090909090909, + "Chinese,Indonesian,Filipino,English": 0.23863636363636365, + "Chinese,Malay,Filipino,English": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,English": 0.3125, + "Vietnamese,Indonesian,Filipino,English": 0.3068181818181818, + "Vietnamese,Malay,Filipino,English": 0.3409090909090909, + "Indonesian,Malay,Filipino,English": 0.3409090909090909 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,English": 0.19886363636363635, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay,English": 0.17613636363636365, + "Spanish,Chinese,Indonesian,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Malay,Filipino,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.25, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino,English": 0.2840909090909091, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Malay,Filipino,English": 0.20454545454545456, + "Chinese,Indonesian,Malay,Filipino,English": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.25 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.1590909090909091, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.21022727272727273, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + } + }, + "AC3_2": 0.49861134417965686, + "AC3_3": 0.4082484495956346, + "AC3_4": 0.33857625732713653, + "AC3_5": 0.28368407260833933, + "AC3_6": 0.24155928813065697, + "AC3_7": 0.2102195212346658 + }, + "prompt_4": { + "overall_acc": 0.476461038961039, + "language_acc": { + "Spanish": 0.4772727272727273, + "Chinese": 0.5056818181818182, + "Vietnamese": 0.4943181818181818, + "Indonesian": 0.4375, + "Malay": 0.44886363636363635, + "Filipino": 0.4659090909090909, + "English": 0.5056818181818182 + }, + "consistency_score_2": 0.5430194805194805, + "consistency_score_3": 0.37142857142857155, + "consistency_score_4": 0.27938311688311684, + "consistency_score_5": 0.22023809523809523, + "consistency_score_6": 0.17857142857142858, + "consistency_score_7": 0.14772727272727273, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.48863636363636365, + "Spanish,Vietnamese": 0.5397727272727273, + "Spanish,Indonesian": 0.5454545454545454, + "Spanish,Malay": 0.5909090909090909, + "Spanish,Filipino": 0.6193181818181818, + "Spanish,English": 0.6306818181818182, + "Chinese,Vietnamese": 0.4034090909090909, + "Chinese,Indonesian": 0.4659090909090909, + "Chinese,Malay": 0.44886363636363635, + "Chinese,Filipino": 0.4659090909090909, + "Chinese,English": 0.48863636363636365, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.5397727272727273, + "Vietnamese,Filipino": 0.5852272727272727, + "Vietnamese,English": 0.5397727272727273, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,English": 0.5795454545454546, + "Malay,Filipino": 0.5625, + "Malay,English": 0.5965909090909091, + "Filipino,English": 0.6022727272727273 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Indonesian": 0.3125, + "Spanish,Chinese,Malay": 0.32386363636363635, + "Spanish,Chinese,Filipino": 0.3465909090909091, + "Spanish,Chinese,English": 0.3522727272727273, + "Spanish,Vietnamese,Indonesian": 0.3693181818181818, + "Spanish,Vietnamese,Malay": 0.3977272727272727, + "Spanish,Vietnamese,Filipino": 0.4090909090909091, + "Spanish,Vietnamese,English": 0.4090909090909091, + "Spanish,Indonesian,Malay": 0.4431818181818182, + "Spanish,Indonesian,Filipino": 0.4147727272727273, + "Spanish,Indonesian,English": 0.42045454545454547, + "Spanish,Malay,Filipino": 0.42613636363636365, + "Spanish,Malay,English": 0.45454545454545453, + "Spanish,Filipino,English": 0.4715909090909091, + "Chinese,Vietnamese,Indonesian": 0.29545454545454547, + "Chinese,Vietnamese,Malay": 0.2840909090909091, + "Chinese,Vietnamese,Filipino": 0.2897727272727273, + "Chinese,Vietnamese,English": 0.2840909090909091, + "Chinese,Indonesian,Malay": 0.32954545454545453, + "Chinese,Indonesian,Filipino": 0.30113636363636365, + "Chinese,Indonesian,English": 0.3181818181818182, + "Chinese,Malay,Filipino": 0.3068181818181818, + "Chinese,Malay,English": 0.32954545454545453, + "Chinese,Filipino,English": 0.32954545454545453, + "Vietnamese,Indonesian,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.38636363636363635, + "Vietnamese,Indonesian,English": 0.375, + "Vietnamese,Malay,Filipino": 0.3977272727272727, + "Vietnamese,Malay,English": 0.39204545454545453, + "Vietnamese,Filipino,English": 0.4034090909090909, + "Indonesian,Malay,Filipino": 0.4147727272727273, + "Indonesian,Malay,English": 0.44886363636363635, + "Indonesian,Filipino,English": 0.42613636363636365, + "Malay,Filipino,English": 0.4318181818181818 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.23863636363636365, + "Spanish,Chinese,Vietnamese,English": 0.24431818181818182, + "Spanish,Chinese,Indonesian,Malay": 0.2556818181818182, + "Spanish,Chinese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Chinese,Indonesian,English": 0.25, + "Spanish,Chinese,Malay,Filipino": 0.2556818181818182, + "Spanish,Chinese,Malay,English": 0.26704545454545453, + "Spanish,Chinese,Filipino,English": 0.26704545454545453, + "Spanish,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Spanish,Vietnamese,Indonesian,Filipino": 0.3125, + "Spanish,Vietnamese,Indonesian,English": 0.3068181818181818, + "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, + "Spanish,Vietnamese,Malay,English": 0.32954545454545453, + "Spanish,Vietnamese,Filipino,English": 0.3465909090909091, + "Spanish,Indonesian,Malay,Filipino": 0.3465909090909091, + "Spanish,Indonesian,Malay,English": 0.36363636363636365, + "Spanish,Indonesian,Filipino,English": 0.3465909090909091, + "Spanish,Malay,Filipino,English": 0.3693181818181818, + "Chinese,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Vietnamese,Indonesian,English": 0.2215909090909091, + "Chinese,Vietnamese,Malay,Filipino": 0.2159090909090909, + "Chinese,Vietnamese,Malay,English": 0.23863636363636365, + "Chinese,Vietnamese,Filipino,English": 0.2215909090909091, + "Chinese,Indonesian,Malay,Filipino": 0.23295454545454544, + "Chinese,Indonesian,Malay,English": 0.2556818181818182, + "Chinese,Indonesian,Filipino,English": 0.23863636363636365, + "Chinese,Malay,Filipino,English": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,Filipino": 0.3125, + "Vietnamese,Indonesian,Malay,English": 0.32386363636363635, + "Vietnamese,Indonesian,Filipino,English": 0.3068181818181818, + "Vietnamese,Malay,Filipino,English": 0.32386363636363635, + "Indonesian,Malay,Filipino,English": 0.3465909090909091 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Malay,English": 0.2159090909090909, + "Spanish,Chinese,Indonesian,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Malay,Filipino,English": 0.22727272727272727, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.26704545454545453, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2727272727272727, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2727272727272727, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2897727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.3068181818181818, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.17613636363636365, + "Chinese,Vietnamese,Malay,Filipino,English": 0.19318181818181818, + "Chinese,Indonesian,Malay,Filipino,English": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.26704545454545453 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.24431818181818182, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.14772727272727273 + } + }, + "AC3_2": 0.5075675717096639, + "AC3_3": 0.41743934788982806, + "AC3_4": 0.3522291443056106, + "AC3_5": 0.3012343966280525, + "AC3_6": 0.2597804920828404, + "AC3_7": 0.225529022306935 + }, + "prompt_5": { + "overall_acc": 0.4675324675324676, + "language_acc": { + "Spanish": 0.45454545454545453, + "Chinese": 0.44886363636363635, + "Vietnamese": 0.4602272727272727, + "Indonesian": 0.4715909090909091, + "Malay": 0.4659090909090909, + "Filipino": 0.4659090909090909, + "English": 0.5056818181818182 + }, + "consistency_score_2": 0.5625, + "consistency_score_3": 0.3925324675324674, + "consistency_score_4": 0.2983766233766234, + "consistency_score_5": 0.23566017316017318, + "consistency_score_6": 0.18912337662337664, + "consistency_score_7": 0.1534090909090909, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4715909090909091, + "Spanish,Vietnamese": 0.5568181818181818, + "Spanish,Indonesian": 0.5852272727272727, + "Spanish,Malay": 0.6136363636363636, + "Spanish,Filipino": 0.5795454545454546, + "Spanish,English": 0.6534090909090909, + "Chinese,Vietnamese": 0.4034090909090909, + "Chinese,Indonesian": 0.45454545454545453, + "Chinese,Malay": 0.4431818181818182, + "Chinese,Filipino": 0.4602272727272727, + "Chinese,English": 0.5340909090909091, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,Filipino": 0.5454545454545454, + "Vietnamese,English": 0.5795454545454546, + "Indonesian,Malay": 0.6590909090909091, + "Indonesian,Filipino": 0.625, + "Indonesian,English": 0.6590909090909091, + "Malay,Filipino": 0.5852272727272727, + "Malay,English": 0.625, + "Filipino,English": 0.6477272727272727 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Spanish,Chinese,Indonesian": 0.3125, + "Spanish,Chinese,Malay": 0.3181818181818182, + "Spanish,Chinese,Filipino": 0.32386363636363635, + "Spanish,Chinese,English": 0.3693181818181818, + "Spanish,Vietnamese,Indonesian": 0.4090909090909091, + "Spanish,Vietnamese,Malay": 0.4147727272727273, + "Spanish,Vietnamese,Filipino": 0.3977272727272727, + "Spanish,Vietnamese,English": 0.4375, + "Spanish,Indonesian,Malay": 0.45454545454545453, + "Spanish,Indonesian,Filipino": 0.4375, + "Spanish,Indonesian,English": 0.48295454545454547, + "Spanish,Malay,Filipino": 0.4318181818181818, + "Spanish,Malay,English": 0.48863636363636365, + "Spanish,Filipino,English": 0.4772727272727273, + "Chinese,Vietnamese,Indonesian": 0.29545454545454547, + "Chinese,Vietnamese,Malay": 0.2727272727272727, + "Chinese,Vietnamese,Filipino": 0.2784090909090909, + "Chinese,Vietnamese,English": 0.3181818181818182, + "Chinese,Indonesian,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino": 0.32954545454545453, + "Chinese,Indonesian,English": 0.3693181818181818, + "Chinese,Malay,Filipino": 0.3068181818181818, + "Chinese,Malay,English": 0.3465909090909091, + "Chinese,Filipino,English": 0.3693181818181818, + "Vietnamese,Indonesian,Malay": 0.4375, + "Vietnamese,Indonesian,Filipino": 0.42045454545454547, + "Vietnamese,Indonesian,English": 0.4431818181818182, + "Vietnamese,Malay,Filipino": 0.38636363636363635, + "Vietnamese,Malay,English": 0.4147727272727273, + "Vietnamese,Filipino,English": 0.42613636363636365, + "Indonesian,Malay,Filipino": 0.4715909090909091, + "Indonesian,Malay,English": 0.5056818181818182, + "Indonesian,Filipino,English": 0.5, + "Malay,Filipino,English": 0.4772727272727273 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,English": 0.25, + "Spanish,Chinese,Indonesian,Malay": 0.25, + "Spanish,Chinese,Indonesian,Filipino": 0.24431818181818182, + "Spanish,Chinese,Indonesian,English": 0.2840909090909091, + "Spanish,Chinese,Malay,Filipino": 0.25, + "Spanish,Chinese,Malay,English": 0.2784090909090909, + "Spanish,Chinese,Filipino,English": 0.2840909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Spanish,Vietnamese,Indonesian,Filipino": 0.3352272727272727, + "Spanish,Vietnamese,Indonesian,English": 0.3522727272727273, + "Spanish,Vietnamese,Malay,Filipino": 0.32386363636363635, + "Spanish,Vietnamese,Malay,English": 0.3522727272727273, + "Spanish,Vietnamese,Filipino,English": 0.3409090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.35795454545454547, + "Spanish,Indonesian,Malay,English": 0.3977272727272727, + "Spanish,Indonesian,Filipino,English": 0.39204545454545453, + "Spanish,Malay,Filipino,English": 0.38636363636363635, + "Chinese,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,English": 0.26136363636363635, + "Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Chinese,Vietnamese,Malay,English": 0.24431818181818182, + "Chinese,Vietnamese,Filipino,English": 0.24431818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Chinese,Indonesian,Malay,English": 0.2897727272727273, + "Chinese,Indonesian,Filipino,English": 0.30113636363636365, + "Chinese,Malay,Filipino,English": 0.2784090909090909, + "Vietnamese,Indonesian,Malay,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,Malay,English": 0.35795454545454547, + "Vietnamese,Indonesian,Filipino,English": 0.35795454545454547, + "Vietnamese,Malay,Filipino,English": 0.32954545454545453, + "Indonesian,Malay,Filipino,English": 0.4034090909090909 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Malay,English": 0.23295454545454544, + "Spanish,Chinese,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Chinese,Malay,Filipino,English": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.30113636363636365, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.29545454545454547, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2840909090909091, + "Spanish,Indonesian,Malay,Filipino,English": 0.32954545454545453, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.2215909090909091, + "Chinese,Vietnamese,Malay,Filipino,English": 0.19886363636363635, + "Chinese,Indonesian,Malay,Filipino,English": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2897727272727273 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.17045454545454544, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.19886363636363635, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.25, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.18181818181818182 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909 + } + }, + "AC3_2": 0.5106382978227654, + "AC3_3": 0.42676236555759284, + "AC3_4": 0.3642749788946497, + "AC3_5": 0.31336727907877565, + "AC3_6": 0.2693079478825107, + "AC3_7": 0.2310160427435445 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5825242718446602 + }, + "prompt_2": { + "accuracy": 0.5728155339805825 + }, + "prompt_3": { + "accuracy": 0.6019417475728155 + }, + "prompt_4": { + "accuracy": 0.33980582524271846 + }, + "prompt_5": { + "accuracy": 0.5825242718446602 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38095238095238093 + }, + "prompt_2": { + "accuracy": 0.3619047619047619 + }, + "prompt_3": { + "accuracy": 0.3904761904761905 + }, + "prompt_4": { + "accuracy": 0.26666666666666666 + }, + "prompt_5": { + "accuracy": 0.3142857142857143 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6822429906542056 + }, + "prompt_2": { + "accuracy": 0.6915887850467289 + }, + "prompt_3": { + "accuracy": 0.7289719626168224 + }, + "prompt_4": { + "accuracy": 0.4953271028037383 + }, + "prompt_5": { + "accuracy": 0.6635514018691588 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.52, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.3, + "history": 0.6, + "literature": 0.5, + "politics": 0.7, + "culture": 0.6, + "film": 0.5, + "law": 0.5, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.5, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.3, + "history": 0.6, + "literature": 0.4, + "politics": 0.6, + "culture": 0.6, + "film": 0.7, + "law": 0.6, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.54, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.3, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.6, + "film": 0.6, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.4, + "demographics": 0.6, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.9, + "culture": 0.3, + "film": 0.5, + "law": 0.5, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.52, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.3, + "history": 0.4666666666666667, + "literature": 0.5, + "politics": 0.9, + "culture": 0.6, + "film": 0.6, + "law": 0.6, + "geography": 0.5 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06750971426706272 + }, + "prompt_2": { + "bleu_score": 0.07721750285072881 + }, + "prompt_3": { + "bleu_score": 0.06821978311551388 + }, + "prompt_4": { + "bleu_score": 0.06804397274407875 + }, + "prompt_5": { + "bleu_score": 0.05429875271696506 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.38914480272381335, + "category_acc": { + "History": 0.39959839357429716, + "Geography": 0.3551020408163265, + "Lampungic": 0.32653061224489793, + "Social science": 0.4574290484140234, + "Balinese": 0.3099787685774947, + "Makassarese": 0.3118279569892473, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.2846715328467153, + "Biology": 0.421301775147929, + "Science": 0.43343653250773995, + "Christian religion": 0.44776119402985076, + "Art": 0.3910149750415973, + "Islam religion": 0.41963015647226176, + "Hindu religion": 0.42, + "Madurese": 0.3050847457627119, + "Sport": 0.4594594594594595, + "Indonesian language": 0.4392901618929016, + "Physics": 0.3414141414141414, + "Minangkabau culture": 0.32663316582914576, + "Dayak language": 0.25688073394495414, + "Sociology": 0.4012096774193548, + "Economy": 0.3709016393442623, + "Sundanese": 0.34485738980121, + "Javanese": 0.3094758064516129, + "Civic education": 0.4434907010014306 + } + }, + "prompt_2": { + "accuracy": 0.45116496428333, + "category_acc": { + "History": 0.42570281124497994, + "Geography": 0.4122448979591837, + "Lampungic": 0.3469387755102041, + "Social science": 0.6243739565943238, + "Balinese": 0.31422505307855625, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.3680555555555556, + "Chemistry": 0.2934306569343066, + "Biology": 0.46272189349112425, + "Science": 0.5583075335397317, + "Christian religion": 0.5323383084577115, + "Art": 0.5124792013311148, + "Islam religion": 0.5092460881934566, + "Hindu religion": 0.5, + "Madurese": 0.3254237288135593, + "Sport": 0.4594594594594595, + "Indonesian language": 0.5065379825653799, + "Physics": 0.39595959595959596, + "Minangkabau culture": 0.4020100502512563, + "Dayak language": 0.28440366972477066, + "Sociology": 0.4435483870967742, + "Economy": 0.430327868852459, + "Sundanese": 0.3863439930855661, + "Javanese": 0.34173387096774194, + "Civic education": 0.51931330472103 + } + }, + "prompt_3": { + "accuracy": 0.42285866880299083, + "category_acc": { + "History": 0.39558232931726905, + "Geography": 0.3979591836734694, + "Lampungic": 0.30612244897959184, + "Social science": 0.5709515859766278, + "Balinese": 0.31422505307855625, + "Makassarese": 0.3333333333333333, + "Banjarese": 0.3333333333333333, + "Chemistry": 0.2832116788321168, + "Biology": 0.44260355029585796, + "Science": 0.5159958720330238, + "Christian religion": 0.44776119402985076, + "Art": 0.4442595673876872, + "Islam religion": 0.45803698435277385, + "Hindu religion": 0.4266666666666667, + "Madurese": 0.29491525423728815, + "Sport": 0.44594594594594594, + "Indonesian language": 0.4853673723536737, + "Physics": 0.40404040404040403, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.28440366972477066, + "Sociology": 0.4153225806451613, + "Economy": 0.38114754098360654, + "Sundanese": 0.3560933448573898, + "Javanese": 0.3336693548387097, + "Civic education": 0.48068669527896996 + } + }, + "prompt_4": { + "accuracy": 0.38240202950797786, + "category_acc": { + "History": 0.3895582329317269, + "Geography": 0.35918367346938773, + "Lampungic": 0.2925170068027211, + "Social science": 0.4590984974958264, + "Balinese": 0.3333333333333333, + "Makassarese": 0.3279569892473118, + "Banjarese": 0.3194444444444444, + "Chemistry": 0.28905109489051095, + "Biology": 0.41775147928994083, + "Science": 0.3973168214654283, + "Christian religion": 0.417910447761194, + "Art": 0.3910149750415973, + "Islam religion": 0.40540540540540543, + "Hindu religion": 0.38, + "Madurese": 0.3288135593220339, + "Sport": 0.34459459459459457, + "Indonesian language": 0.42745952677459526, + "Physics": 0.3090909090909091, + "Minangkabau culture": 0.3417085427135678, + "Dayak language": 0.27522935779816515, + "Sociology": 0.39314516129032256, + "Economy": 0.3463114754098361, + "Sundanese": 0.3500432152117545, + "Javanese": 0.3316532258064516, + "Civic education": 0.44206008583690987 + } + }, + "prompt_5": { + "accuracy": 0.4477601976099873, + "category_acc": { + "History": 0.44377510040160645, + "Geography": 0.42448979591836733, + "Lampungic": 0.3129251700680272, + "Social science": 0.5893155258764607, + "Balinese": 0.28450106157112526, + "Makassarese": 0.34946236559139787, + "Banjarese": 0.3472222222222222, + "Chemistry": 0.3343065693430657, + "Biology": 0.47218934911242605, + "Science": 0.5758513931888545, + "Christian religion": 0.4925373134328358, + "Art": 0.49584026622296173, + "Islam religion": 0.4822190611664296, + "Hindu religion": 0.4533333333333333, + "Madurese": 0.29152542372881357, + "Sport": 0.4797297297297297, + "Indonesian language": 0.5003113325031133, + "Physics": 0.4121212121212121, + "Minangkabau culture": 0.3768844221105528, + "Dayak language": 0.24770642201834864, + "Sociology": 0.41935483870967744, + "Economy": 0.4323770491803279, + "Sundanese": 0.39066551426101986, + "Javanese": 0.35080645161290325, + "Civic education": 0.5021459227467812 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.0982752747830463 + }, + "prompt_2": { + "bleu_score": 0.09693382479933621 + }, + "prompt_3": { + "bleu_score": 0.09211183542188045 + }, + "prompt_4": { + "bleu_score": 0.09162870818146696 + }, + "prompt_5": { + "bleu_score": 0.1809729930555669 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.07481805117090375 + }, + "prompt_2": { + "bleu_score": 0.12408586583177651 + }, + "prompt_3": { + "bleu_score": 0.11323612407691637 + }, + "prompt_4": { + "bleu_score": 0.07103730915658504 + }, + "prompt_5": { + "bleu_score": 0.09878667276041729 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.06792021752065504 + }, + "prompt_2": { + "bleu_score": 0.08249521942696134 + }, + "prompt_3": { + "bleu_score": 0.08084386708497976 + }, + "prompt_4": { + "bleu_score": 0.06431676202990873 + }, + "prompt_5": { + "bleu_score": 0.08100137822980161 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10560765530945605 + }, + "prompt_2": { + "bleu_score": 0.13240900994121152 + }, + "prompt_3": { + "bleu_score": 0.1296920316364985 + }, + "prompt_4": { + "bleu_score": 0.09176703052478538 + }, + "prompt_5": { + "bleu_score": 0.16824113101991417 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5974329054842473 + }, + "prompt_2": { + "accuracy": 0.588098016336056 + }, + "prompt_3": { + "accuracy": 0.6102683780630105 + }, + "prompt_4": { + "accuracy": 0.5857642940490082 + }, + "prompt_5": { + "accuracy": 0.6149358226371062 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5864855202002145, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.6325757575757576, + "medical_genetics": 0.6464646464646465, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.7838983050847458, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.6286919831223629, + "econometrics": 0.4424778761061947, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.7605177993527508, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.49110320284697506, + "philosophy": 0.632258064516129, + "professional_medicine": 0.6346863468634686, + "nutrition": 0.6950819672131148, + "global_facts": 0.37373737373737376, + "machine_learning": 0.4774774774774775, + "security_studies": 0.6639344262295082, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.7461300309597523, + "anatomy": 0.582089552238806, + "human_sexuality": 0.6846153846153846, + "college_medicine": 0.6162790697674418, + "high_school_government_and_politics": 0.7916666666666666, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6666666666666666, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.3978779840848806, + "human_aging": 0.6396396396396397, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.8014705882352942, + "formal_logic": 0.368, + "high_school_statistics": 0.5255813953488372, + "international_law": 0.7, + "high_school_mathematics": 0.3048327137546468, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7480818414322251, + "high_school_chemistry": 0.504950495049505, + "marketing": 0.8025751072961373, + "professional_law": 0.47162426614481406, + "management": 0.7254901960784313, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.8117647058823529, + "sociology": 0.75, + "us_foreign_policy": 0.7777777777777778, + "high_school_macroeconomics": 0.5681233933161953, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.6405797101449275, + "electrical_engineering": 0.5625, + "astronomy": 0.6887417218543046, + "college_biology": 0.7482517482517482 + } + }, + "prompt_2": { + "accuracy": 0.594780121558813, + "category_acc": { + "high_school_european_history": 0.774390243902439, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.6628787878787878, + "medical_genetics": 0.6767676767676768, + "high_school_us_history": 0.7832512315270936, + "high_school_physics": 0.36, + "high_school_world_history": 0.8220338983050848, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.6286919831223629, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7669902912621359, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.4875444839857651, + "philosophy": 0.6870967741935484, + "professional_medicine": 0.6531365313653137, + "nutrition": 0.7016393442622951, + "global_facts": 0.3939393939393939, + "machine_learning": 0.44144144144144143, + "security_studies": 0.680327868852459, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.6579378068739771, + "prehistory": 0.7275541795665634, + "anatomy": 0.582089552238806, + "human_sexuality": 0.7153846153846154, + "college_medicine": 0.5988372093023255, + "high_school_government_and_politics": 0.8125, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.7160493827160493, + "high_school_geography": 0.7563451776649747, + "elementary_mathematics": 0.41114058355437666, + "human_aging": 0.6666666666666666, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.8069852941176471, + "formal_logic": 0.432, + "high_school_statistics": 0.5023255813953489, + "international_law": 0.7, + "high_school_mathematics": 0.3345724907063197, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5128205128205128, + "miscellaneous": 0.7391304347826086, + "high_school_chemistry": 0.5297029702970297, + "marketing": 0.8025751072961373, + "professional_law": 0.4644487932159165, + "management": 0.7941176470588235, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.7476635514018691, + "world_religions": 0.7823529411764706, + "sociology": 0.8, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.5732647814910026, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.6231884057971014, + "electrical_engineering": 0.5555555555555556, + "astronomy": 0.6887417218543046, + "college_biology": 0.7552447552447552 + } + }, + "prompt_3": { + "accuracy": 0.6030747229174115, + "category_acc": { + "high_school_european_history": 0.7987804878048781, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6742424242424242, + "medical_genetics": 0.7070707070707071, + "high_school_us_history": 0.7783251231527094, + "high_school_physics": 0.3333333333333333, + "high_school_world_history": 0.809322033898305, + "virology": 0.5515151515151515, + "high_school_microeconomics": 0.6286919831223629, + "econometrics": 0.4424778761061947, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7637540453074434, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.5160142348754448, + "philosophy": 0.6741935483870968, + "professional_medicine": 0.6457564575645757, + "nutrition": 0.6918032786885245, + "global_facts": 0.40404040404040403, + "machine_learning": 0.36936936936936937, + "security_studies": 0.6885245901639344, + "public_relations": 0.6422018348623854, + "professional_psychology": 0.6481178396072013, + "prehistory": 0.7120743034055728, + "anatomy": 0.5895522388059702, + "human_sexuality": 0.7076923076923077, + "college_medicine": 0.6046511627906976, + "high_school_government_and_politics": 0.828125, + "college_chemistry": 0.4444444444444444, + "logical_fallacies": 0.7283950617283951, + "high_school_geography": 0.7918781725888325, + "elementary_mathematics": 0.4509283819628647, + "human_aging": 0.6891891891891891, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.8106617647058824, + "formal_logic": 0.384, + "high_school_statistics": 0.5581395348837209, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.35687732342007433, + "high_school_computer_science": 0.6565656565656566, + "conceptual_physics": 0.5470085470085471, + "miscellaneous": 0.7442455242966752, + "high_school_chemistry": 0.5495049504950495, + "marketing": 0.8240343347639485, + "professional_law": 0.4794520547945205, + "management": 0.7843137254901961, + "college_physics": 0.36633663366336633, + "jurisprudence": 0.719626168224299, + "world_religions": 0.7823529411764706, + "sociology": 0.77, + "us_foreign_policy": 0.8686868686868687, + "high_school_macroeconomics": 0.6066838046272494, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.6260869565217392, + "electrical_engineering": 0.5625, + "astronomy": 0.6821192052980133, + "college_biology": 0.7552447552447552 + } + }, + "prompt_4": { + "accuracy": 0.5804790847336432, + "category_acc": { + "high_school_european_history": 0.7804878048780488, + "business_ethics": 0.5858585858585859, + "clinical_knowledge": 0.625, + "medical_genetics": 0.6868686868686869, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.8008474576271186, + "virology": 0.5212121212121212, + "high_school_microeconomics": 0.6160337552742616, + "econometrics": 0.4690265486725664, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.7702265372168284, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.4875444839857651, + "philosophy": 0.667741935483871, + "professional_medicine": 0.6199261992619927, + "nutrition": 0.6622950819672131, + "global_facts": 0.32323232323232326, + "machine_learning": 0.45045045045045046, + "security_studies": 0.6639344262295082, + "public_relations": 0.6238532110091743, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.718266253869969, + "anatomy": 0.5522388059701493, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.622093023255814, + "high_school_government_and_politics": 0.7760416666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6604938271604939, + "high_school_geography": 0.7208121827411168, + "elementary_mathematics": 0.38992042440318303, + "human_aging": 0.6306306306306306, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.7867647058823529, + "formal_logic": 0.4, + "high_school_statistics": 0.4883720930232558, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.31970260223048325, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.5555555555555556, + "miscellaneous": 0.6508951406649617, + "high_school_chemistry": 0.5445544554455446, + "marketing": 0.8369098712446352, + "professional_law": 0.47423352902804955, + "management": 0.6862745098039216, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.7757009345794392, + "world_religions": 0.7823529411764706, + "sociology": 0.735, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.570694087403599, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.6231884057971014, + "electrical_engineering": 0.6111111111111112, + "astronomy": 0.6754966887417219, + "college_biology": 0.7482517482517482 + } + }, + "prompt_5": { + "accuracy": 0.5931355023239184, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.6515151515151515, + "medical_genetics": 0.6666666666666666, + "high_school_us_history": 0.7783251231527094, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.8050847457627118, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.6371308016877637, + "econometrics": 0.4690265486725664, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7508090614886731, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.5160142348754448, + "philosophy": 0.6580645161290323, + "professional_medicine": 0.6051660516605166, + "nutrition": 0.6852459016393443, + "global_facts": 0.3838383838383838, + "machine_learning": 0.43243243243243246, + "security_studies": 0.6475409836065574, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.7275541795665634, + "anatomy": 0.5671641791044776, + "human_sexuality": 0.7076923076923077, + "college_medicine": 0.6162790697674418, + "high_school_government_and_politics": 0.8072916666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.8020304568527918, + "elementary_mathematics": 0.4376657824933687, + "human_aging": 0.6621621621621622, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.7977941176470589, + "formal_logic": 0.368, + "high_school_statistics": 0.5162790697674419, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.5811965811965812, + "miscellaneous": 0.7851662404092071, + "high_school_chemistry": 0.5099009900990099, + "marketing": 0.8454935622317596, + "professional_law": 0.4657534246575342, + "management": 0.7156862745098039, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.7383177570093458, + "world_religions": 0.8176470588235294, + "sociology": 0.72, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5604113110539846, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.6579710144927536, + "electrical_engineering": 0.5833333333333334, + "astronomy": 0.7019867549668874, + "college_biology": 0.7622377622377622 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4138187221396731 + }, + "prompt_2": { + "accuracy": 0.4309063893016345 + }, + "prompt_3": { + "accuracy": 0.4063893016344725 + }, + "prompt_4": { + "accuracy": 0.4026745913818722 + }, + "prompt_5": { + "accuracy": 0.3179791976225854 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39975093399750933, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.5476190476190477, + "college_physics": 0.5, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.38333333333333336, + "business_administration": 0.2894736842105263, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.14814814814814814, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.4642857142857143, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.3269230769230769, + "sports_science": 0.5416666666666666, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.3148148148148148, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.2962962962962963, + "physician": 0.46296296296296297 + } + }, + "prompt_2": { + "accuracy": 0.4277708592777086, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.5, + "college_physics": 0.5, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.48333333333333334, + "business_administration": 0.34210526315789475, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.2962962962962963, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.3269230769230769, + "sports_science": 0.5833333333333334, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.5294117647058824, + "accountant": 0.37037037037037035, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.4074074074074074, + "physician": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.4122042341220423, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.5, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.375, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.5357142857142857, + "college_economics": 0.45, + "business_administration": 0.34210526315789475, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.5517241379310345, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.25, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.5, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.3269230769230769, + "sports_science": 0.5, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.3333333333333333, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.3333333333333333, + "physician": 0.42592592592592593 + } + }, + "prompt_4": { + "accuracy": 0.4202988792029888, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.5, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.5, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.4, + "business_administration": 0.34210526315789475, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.5, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.2222222222222222, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.37037037037037035, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.3148148148148148, + "physician": 0.46296296296296297 + } + }, + "prompt_5": { + "accuracy": 0.3150684931506849, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.5384615384615384, + "college_programming": 0.47619047619047616, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.15789473684210525, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.375, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.14814814814814814, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.44, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.3076923076923077, + "sports_science": 0.3333333333333333, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.375, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.25925925925925924, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.37037037037037035 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48028673835125446 + }, + "prompt_2": { + "accuracy": 0.5089605734767025 + }, + "prompt_3": { + "accuracy": 0.4767025089605735 + }, + "prompt_4": { + "accuracy": 0.4731182795698925 + }, + "prompt_5": { + "accuracy": 0.3906810035842294 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3975133828354343, + "category_acc": { + "agronomy": 0.3609467455621302, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.22560975609756098, + "arts": 0.46875, + "astronomy": 0.30303030303030304, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.5648854961832062, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.4117647058823529, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.5027932960893855, + "clinical_knowledge": 0.38396624472573837, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.49056603773584906, + "college_law": 0.2962962962962963, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.42857142857142855, + "computer_science": 0.4264705882352941, + "computer_security": 0.5146198830409356, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.3597122302158273, + "economics": 0.42138364779874216, + "education": 0.4171779141104294, + "electrical_engineering": 0.46511627906976744, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.3838383838383838, + "elementary_information_and_technology": 0.5672268907563025, + "elementary_mathematics": 0.33043478260869563, + "ethnology": 0.34074074074074073, + "food_science": 0.44755244755244755, + "genetics": 0.42045454545454547, + "global_facts": 0.37583892617449666, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.3706293706293706, + "human_sexuality": 0.4365079365079365, + "international_law": 0.3675675675675676, + "journalism": 0.3953488372093023, + "jurisprudence": 0.36009732360097324, + "legal_and_moral_basis": 0.5654205607476636, + "logical": 0.5203252032520326, + "machine_learning": 0.4344262295081967, + "management": 0.4, + "marketing": 0.40555555555555556, + "marxist_theory": 0.41798941798941797, + "modern_chinese": 0.3706896551724138, + "nutrition": 0.43448275862068964, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.4342857142857143, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.34308510638297873, + "professional_psychology": 0.41379310344827586, + "public_relations": 0.5172413793103449, + "security_study": 0.4888888888888889, + "sociology": 0.47345132743362833, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.35135135135135137, + "virology": 0.47337278106508873, + "world_history": 0.40993788819875776, + "world_religions": 0.46875 + } + }, + "prompt_2": { + "accuracy": 0.4317043688482127, + "category_acc": { + "agronomy": 0.4556213017751479, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.1951219512195122, + "arts": 0.5, + "astronomy": 0.4, + "business_ethics": 0.4449760765550239, + "chinese_civil_service_exam": 0.35625, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.36764705882352944, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.4458204334365325, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.5586592178770949, + "clinical_knowledge": 0.43037974683544306, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.37962962962962965, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.4358974358974359, + "computer_science": 0.45098039215686275, + "computer_security": 0.6198830409356725, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.38848920863309355, + "economics": 0.5220125786163522, + "education": 0.4601226993865031, + "electrical_engineering": 0.48255813953488375, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.3484848484848485, + "elementary_information_and_technology": 0.6722689075630253, + "elementary_mathematics": 0.34347826086956523, + "ethnology": 0.362962962962963, + "food_science": 0.4965034965034965, + "genetics": 0.4715909090909091, + "global_facts": 0.4429530201342282, + "high_school_biology": 0.3668639053254438, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.34146341463414637, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.42857142857142855, + "international_law": 0.3783783783783784, + "journalism": 0.45930232558139533, + "jurisprudence": 0.38686131386861317, + "legal_and_moral_basis": 0.677570093457944, + "logical": 0.44715447154471544, + "machine_learning": 0.4344262295081967, + "management": 0.49523809523809526, + "marketing": 0.48333333333333334, + "marxist_theory": 0.4656084656084656, + "modern_chinese": 0.35344827586206895, + "nutrition": 0.5241379310344828, + "philosophy": 0.49523809523809526, + "professional_accounting": 0.5085714285714286, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.3829787234042553, + "professional_psychology": 0.4525862068965517, + "public_relations": 0.5287356321839081, + "security_study": 0.5259259259259259, + "sociology": 0.4557522123893805, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.34594594594594597, + "virology": 0.5207100591715976, + "world_history": 0.45962732919254656, + "world_religions": 0.46875 + } + }, + "prompt_3": { + "accuracy": 0.4194439647729235, + "category_acc": { + "agronomy": 0.4556213017751479, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.21341463414634146, + "arts": 0.54375, + "astronomy": 0.3575757575757576, + "business_ethics": 0.4258373205741627, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.549618320610687, + "chinese_food_culture": 0.375, + "chinese_foreign_policy": 0.5607476635514018, + "chinese_history": 0.43343653250773995, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.3755274261603376, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.3611111111111111, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.41025641025641024, + "computer_science": 0.45098039215686275, + "computer_security": 0.5321637426900585, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.3597122302158273, + "economics": 0.4968553459119497, + "education": 0.49693251533742333, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.2976190476190476, + "elementary_commonsense": 0.4292929292929293, + "elementary_information_and_technology": 0.5840336134453782, + "elementary_mathematics": 0.36086956521739133, + "ethnology": 0.3925925925925926, + "food_science": 0.4755244755244755, + "genetics": 0.4715909090909091, + "global_facts": 0.47651006711409394, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.32867132867132864, + "human_sexuality": 0.40476190476190477, + "international_law": 0.3675675675675676, + "journalism": 0.46511627906976744, + "jurisprudence": 0.3722627737226277, + "legal_and_moral_basis": 0.6728971962616822, + "logical": 0.43902439024390244, + "machine_learning": 0.4098360655737705, + "management": 0.49523809523809526, + "marketing": 0.5111111111111111, + "marxist_theory": 0.4708994708994709, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.42758620689655175, + "philosophy": 0.5333333333333333, + "professional_accounting": 0.5028571428571429, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.35106382978723405, + "professional_psychology": 0.4482758620689655, + "public_relations": 0.4885057471264368, + "security_study": 0.5185185185185185, + "sociology": 0.47345132743362833, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.3783783783783784, + "virology": 0.5088757396449705, + "world_history": 0.43478260869565216, + "world_religions": 0.48125 + } + }, + "prompt_4": { + "accuracy": 0.4064064928337075, + "category_acc": { + "agronomy": 0.4319526627218935, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.21341463414634146, + "arts": 0.55625, + "astronomy": 0.3393939393939394, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.5419847328244275, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.4953271028037383, + "chinese_history": 0.3684210526315789, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.35864978902953587, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5046728971962616, + "college_engineering_hydrology": 0.4716981132075472, + "college_law": 0.3425925925925926, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.40293040293040294, + "computer_science": 0.44607843137254904, + "computer_security": 0.4853801169590643, + "conceptual_physics": 0.41496598639455784, + "construction_project_management": 0.37410071942446044, + "economics": 0.5220125786163522, + "education": 0.39263803680981596, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.3134920634920635, + "elementary_commonsense": 0.398989898989899, + "elementary_information_and_technology": 0.6260504201680672, + "elementary_mathematics": 0.33043478260869563, + "ethnology": 0.37777777777777777, + "food_science": 0.5034965034965035, + "genetics": 0.44886363636363635, + "global_facts": 0.4228187919463087, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3706293706293706, + "human_sexuality": 0.47619047619047616, + "international_law": 0.31351351351351353, + "journalism": 0.4418604651162791, + "jurisprudence": 0.340632603406326, + "legal_and_moral_basis": 0.5654205607476636, + "logical": 0.4878048780487805, + "machine_learning": 0.45901639344262296, + "management": 0.45714285714285713, + "marketing": 0.43333333333333335, + "marxist_theory": 0.4656084656084656, + "modern_chinese": 0.3706896551724138, + "nutrition": 0.496551724137931, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.4514285714285714, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.3377659574468085, + "professional_psychology": 0.44396551724137934, + "public_relations": 0.47126436781609193, + "security_study": 0.562962962962963, + "sociology": 0.3805309734513274, + "sports_science": 0.4484848484848485, + "traditional_chinese_medicine": 0.32432432432432434, + "virology": 0.5029585798816568, + "world_history": 0.43478260869565216, + "world_religions": 0.46875 + } + }, + "prompt_5": { + "accuracy": 0.32844068381972025, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.2926829268292683, + "arts": 0.4375, + "astronomy": 0.2727272727272727, + "business_ethics": 0.3253588516746411, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.3893129770992366, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.32710280373831774, + "chinese_history": 0.34055727554179566, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.3575418994413408, + "clinical_knowledge": 0.31223628691983124, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.3925233644859813, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.28703703703703703, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.3772893772893773, + "computer_science": 0.38235294117647056, + "computer_security": 0.3333333333333333, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.28776978417266186, + "economics": 0.31446540880503143, + "education": 0.3496932515337423, + "electrical_engineering": 0.36627906976744184, + "elementary_chinese": 0.3134920634920635, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.3319327731092437, + "elementary_mathematics": 0.3173913043478261, + "ethnology": 0.362962962962963, + "food_science": 0.34965034965034963, + "genetics": 0.30113636363636365, + "global_facts": 0.348993288590604, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.2867132867132867, + "human_sexuality": 0.35714285714285715, + "international_law": 0.2864864864864865, + "journalism": 0.4127906976744186, + "jurisprudence": 0.30656934306569344, + "legal_and_moral_basis": 0.4485981308411215, + "logical": 0.34146341463414637, + "machine_learning": 0.319672131147541, + "management": 0.3523809523809524, + "marketing": 0.34444444444444444, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.3103448275862069, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.36, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.2553191489361702, + "professional_psychology": 0.35344827586206895, + "public_relations": 0.3850574712643678, + "security_study": 0.3333333333333333, + "sociology": 0.3672566371681416, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.3081081081081081, + "virology": 0.378698224852071, + "world_history": 0.2981366459627329, + "world_religions": 0.40625 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.21212121212121213 + }, + "prompt_3": { + "accuracy": 0.24242424242424243 + }, + "prompt_4": { + "accuracy": 0.12121212121212122 + }, + "prompt_5": { + "accuracy": 0.2727272727272727 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5522727272727272 + }, + "prompt_2": { + "accuracy": 0.22045454545454546 + }, + "prompt_3": { + "accuracy": 0.19318181818181818 + }, + "prompt_4": { + "accuracy": 0.6022727272727273 + }, + "prompt_5": { + "accuracy": 0.5363636363636364 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3423728813559322 + }, + "prompt_2": { + "accuracy": 0.34576271186440677 + }, + "prompt_3": { + "accuracy": 0.32610169491525426 + }, + "prompt_4": { + "accuracy": 0.3423728813559322 + }, + "prompt_5": { + "accuracy": 0.32508474576271185 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6619296933433059 + }, + "prompt_2": { + "accuracy": 0.6264023934181002 + }, + "prompt_3": { + "accuracy": 0.6024682124158564 + }, + "prompt_4": { + "accuracy": 0.6510845175766642 + }, + "prompt_5": { + "accuracy": 0.5314136125654451 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8677119059284665 + }, + "prompt_2": { + "accuracy": 0.8162665360117589 + }, + "prompt_3": { + "accuracy": 0.8451739343459088 + }, + "prompt_4": { + "accuracy": 0.8784909358157765 + }, + "prompt_5": { + "accuracy": 0.8784909358157765 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3148466900867728, + "rouge2": 0.13439425963384796, + "rougeL": 0.2493723840004376, + "avg_rouge": 0.23287111124035278 + }, + "prompt_2": { + "rouge1": 0.3760284317692511, + "rouge2": 0.1703326819107988, + "rougeL": 0.30239763225528216, + "avg_rouge": 0.282919581978444 + }, + "prompt_3": { + "rouge1": 0.321710625396653, + "rouge2": 0.13296655744055622, + "rougeL": 0.2552227836573773, + "avg_rouge": 0.2366333221648622 + }, + "prompt_4": { + "rouge1": 0.3492817033666253, + "rouge2": 0.1568905106341149, + "rougeL": 0.27974950173421925, + "avg_rouge": 0.2619739052449865 + }, + "prompt_5": { + "rouge1": 0.3596885364704884, + "rouge2": 0.15550623284418724, + "rougeL": 0.2853950264312206, + "avg_rouge": 0.2668632652486321 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2180485502100887, + "rouge2": 0.06097714440972195, + "rougeL": 0.16243716383480442, + "avg_rouge": 0.14715428615153836 + }, + "prompt_2": { + "rouge1": 0.21923428230963032, + "rouge2": 0.06078194919283872, + "rougeL": 0.16199188272866893, + "avg_rouge": 0.14733603807704598 + }, + "prompt_3": { + "rouge1": 0.21698867466943603, + "rouge2": 0.062382836391260764, + "rougeL": 0.16111354597032204, + "avg_rouge": 0.14682835234367295 + }, + "prompt_4": { + "rouge1": 0.22413012734377513, + "rouge2": 0.06340741342797845, + "rougeL": 0.16625918343029397, + "avg_rouge": 0.15126557473401583 + }, + "prompt_5": { + "rouge1": 0.19968281716481368, + "rouge2": 0.06461254234379582, + "rougeL": 0.15056622645650905, + "avg_rouge": 0.1382871953217062 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8738532110091743 + }, + "prompt_2": { + "accuracy": 0.7809633027522935 + }, + "prompt_3": { + "accuracy": 0.8772935779816514 + }, + "prompt_4": { + "accuracy": 0.8692660550458715 + }, + "prompt_5": { + "accuracy": 0.5527522935779816 + } }, - "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "cola": { + "prompt_1": { + "accuracy": 0.3317353787152445 + }, + "prompt_2": { + "accuracy": 0.3211888782358581 + }, + "prompt_3": { + "accuracy": 0.36145733461169705 + }, + "prompt_4": { + "accuracy": 0.3231064237775647 + }, + "prompt_5": { + "accuracy": 0.5589645254074784 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.602 + }, + "prompt_2": { + "accuracy": 0.533 + }, + "prompt_3": { + "accuracy": 0.553 + }, + "prompt_4": { + "accuracy": 0.596 + }, + "prompt_5": { + "accuracy": 0.569 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.406 + }, + "prompt_2": { + "accuracy": 0.3745 + }, + "prompt_3": { + "accuracy": 0.404 + }, + "prompt_4": { + "accuracy": 0.463 + }, + "prompt_5": { + "accuracy": 0.447 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.513 + }, + "prompt_2": { + "accuracy": 0.5325 + }, + "prompt_3": { + "accuracy": 0.524 + }, + "prompt_4": { + "accuracy": 0.5205 + }, + "prompt_5": { + "accuracy": 0.531 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6056338028169014 + }, + "prompt_2": { + "accuracy": 0.5774647887323944 + }, + "prompt_3": { + "accuracy": 0.5915492957746479 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5070422535211268 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6462093862815884 + }, + "prompt_2": { + "accuracy": 0.5703971119133574 + }, + "prompt_3": { + "accuracy": 0.6173285198555957 + }, + "prompt_4": { + "accuracy": 0.5595667870036101 + }, + "prompt_5": { + "accuracy": 0.628158844765343 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6838235294117647 + }, + "prompt_2": { + "accuracy": 0.6053921568627451 + }, + "prompt_3": { + "accuracy": 0.6642156862745098 + }, + "prompt_4": { + "accuracy": 0.6715686274509803 + }, + "prompt_5": { + "accuracy": 0.6740196078431373 + } } }, "five_shot": { @@ -13862,235 +122964,3250 @@ "model_link": "https://huggingface.co/google/gemma-7b-it", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.43523809523809526, + "language_acc": { + "Vietnamese": 0.4066666666666667, + "English": 0.5666666666666667, + "Malay": 0.44, + "Chinese": 0.36, + "Filipino": 0.3933333333333333, + "Indonesian": 0.41333333333333333, + "Spanish": 0.4666666666666667 + }, + "consistency_score_2": 0.493015873015873, + "consistency_score_3": 0.3224761904761904, + "consistency_score_4": 0.2441904761904762, + "consistency_score_5": 0.2, + "consistency_score_6": 0.17047619047619045, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.54, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.49333333333333335, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Spanish": 0.5266666666666666, + "English,Malay": 0.5466666666666666, + "English,Chinese": 0.44666666666666666, + "English,Filipino": 0.52, + "English,Indonesian": 0.52, + "English,Spanish": 0.6, + "Malay,Chinese": 0.43333333333333335, + "Malay,Filipino": 0.52, + "Malay,Indonesian": 0.5466666666666666, + "Malay,Spanish": 0.4866666666666667, + "Chinese,Filipino": 0.38, + "Chinese,Indonesian": 0.4266666666666667, + "Chinese,Spanish": 0.46, + "Filipino,Indonesian": 0.52, + "Filipino,Spanish": 0.4666666666666667, + "Indonesian,Spanish": 0.49333333333333335 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.37333333333333335, + "Vietnamese,English,Chinese": 0.28, + "Vietnamese,English,Filipino": 0.34, + "Vietnamese,English,Indonesian": 0.37333333333333335, + "Vietnamese,English,Spanish": 0.4, + "Vietnamese,Malay,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Malay,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.24, + "Vietnamese,Chinese,Indonesian": 0.2733333333333333, + "Vietnamese,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian": 0.35333333333333333, + "Vietnamese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish": 0.38, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.35333333333333333, + "English,Malay,Indonesian": 0.38, + "English,Malay,Spanish": 0.3933333333333333, + "English,Chinese,Filipino": 0.26666666666666666, + "English,Chinese,Indonesian": 0.28, + "English,Chinese,Spanish": 0.32, + "English,Filipino,Indonesian": 0.36666666666666664, + "English,Filipino,Spanish": 0.36, + "English,Indonesian,Spanish": 0.38666666666666666, + "Malay,Chinese,Filipino": 0.26666666666666666, + "Malay,Chinese,Indonesian": 0.28, + "Malay,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.31333333333333335, + "Malay,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Filipino,Indonesian": 0.2733333333333333, + "Chinese,Filipino,Spanish": 0.26, + "Chinese,Indonesian,Spanish": 0.2733333333333333, + "Filipino,Indonesian,Spanish": 0.32666666666666666 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino": 0.2733333333333333, + "Vietnamese,English,Malay,Indonesian": 0.32, + "Vietnamese,English,Malay,Spanish": 0.3, + "Vietnamese,English,Chinese,Filipino": 0.2, + "Vietnamese,English,Chinese,Indonesian": 0.21333333333333335, + "Vietnamese,English,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,English,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,English,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Malay,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Indonesian": 0.28, + "Vietnamese,Malay,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.28, + "English,Malay,Chinese,Filipino": 0.21333333333333335, + "English,Malay,Chinese,Indonesian": 0.21333333333333335, + "English,Malay,Chinese,Spanish": 0.21333333333333335, + "English,Malay,Filipino,Indonesian": 0.29333333333333333, + "English,Malay,Filipino,Spanish": 0.28, + "English,Malay,Indonesian,Spanish": 0.3, + "English,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Chinese,Filipino,Spanish": 0.22666666666666666, + "English,Chinese,Indonesian,Spanish": 0.23333333333333334, + "English,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.2, + "Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.18, + "English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Spanish": 0.18, + "English,Malay,Chinese,Indonesian,Spanish": 0.18, + "English,Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + } + }, + "AC3_2": 0.46232883846234274, + "AC3_3": 0.3704666139471331, + "AC3_4": 0.3128540723370417, + "AC3_5": 0.2740629684725988, + "AC3_6": 0.2449925126879149, + "AC3_7": 0.21939989085154882 + }, + "prompt_2": { + "overall_acc": 0.4485714285714285, + "language_acc": { + "Vietnamese": 0.41333333333333333, + "English": 0.5466666666666666, + "Malay": 0.4066666666666667, + "Chinese": 0.48, + "Filipino": 0.42, + "Indonesian": 0.42, + "Spanish": 0.4533333333333333 + }, + "consistency_score_2": 0.48730158730158746, + "consistency_score_3": 0.32476190476190475, + "consistency_score_4": 0.2554285714285714, + "consistency_score_5": 0.21714285714285717, + "consistency_score_6": 0.1923809523809524, + "consistency_score_7": 0.17333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.52, + "Vietnamese,Malay": 0.48, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Filipino": 0.49333333333333335, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish": 0.5066666666666667, + "English,Malay": 0.4666666666666667, + "English,Chinese": 0.5, + "English,Filipino": 0.48, + "English,Indonesian": 0.4866666666666667, + "English,Spanish": 0.54, + "Malay,Chinese": 0.4666666666666667, + "Malay,Filipino": 0.4666666666666667, + "Malay,Indonesian": 0.5066666666666667, + "Malay,Spanish": 0.49333333333333335, + "Chinese,Filipino": 0.47333333333333333, + "Chinese,Indonesian": 0.4866666666666667, + "Chinese,Spanish": 0.5266666666666666, + "Filipino,Indonesian": 0.41333333333333333, + "Filipino,Spanish": 0.46, + "Indonesian,Spanish": 0.4533333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.3333333333333333, + "Vietnamese,English,Chinese": 0.36666666666666664, + "Vietnamese,English,Filipino": 0.32, + "Vietnamese,English,Indonesian": 0.36, + "Vietnamese,English,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Chinese": 0.32, + "Vietnamese,Malay,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish": 0.35333333333333333, + "English,Malay,Chinese": 0.32, + "English,Malay,Filipino": 0.31333333333333335, + "English,Malay,Indonesian": 0.32, + "English,Malay,Spanish": 0.34, + "English,Chinese,Filipino": 0.32, + "English,Chinese,Indonesian": 0.3333333333333333, + "English,Chinese,Spanish": 0.36, + "English,Filipino,Indonesian": 0.29333333333333333, + "English,Filipino,Spanish": 0.3333333333333333, + "English,Indonesian,Spanish": 0.32, + "Malay,Chinese,Filipino": 0.30666666666666664, + "Malay,Chinese,Indonesian": 0.31333333333333335, + "Malay,Chinese,Spanish": 0.31333333333333335, + "Malay,Filipino,Indonesian": 0.2866666666666667, + "Malay,Filipino,Spanish": 0.30666666666666664, + "Malay,Indonesian,Spanish": 0.31333333333333335, + "Chinese,Filipino,Indonesian": 0.29333333333333333, + "Chinese,Filipino,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Spanish": 0.32666666666666666, + "Filipino,Indonesian,Spanish": 0.3 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.28, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,English,Malay,Spanish": 0.2733333333333333, + "Vietnamese,English,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,English,Chinese,Indonesian": 0.28, + "Vietnamese,English,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,English,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Filipino,Spanish": 0.26, + "Vietnamese,English,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish": 0.24, + "Vietnamese,Malay,Indonesian,Spanish": 0.26, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.26, + "Vietnamese,Chinese,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Chinese,Indonesian": 0.24, + "English,Malay,Chinese,Spanish": 0.25333333333333335, + "English,Malay,Filipino,Indonesian": 0.24, + "English,Malay,Filipino,Spanish": 0.26666666666666666, + "English,Malay,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian": 0.24666666666666667, + "English,Chinese,Filipino,Spanish": 0.26666666666666666, + "English,Chinese,Indonesian,Spanish": 0.26, + "English,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Malay,Chinese,Filipino,Spanish": 0.25333333333333335, + "Malay,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,English,Malay,Chinese,Spanish": 0.24, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.22, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.22, + "English,Malay,Chinese,Filipino,Indonesian": 0.2, + "English,Malay,Chinese,Filipino,Spanish": 0.22, + "English,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "English,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + } + }, + "AC3_2": 0.46713510365235356, + "AC3_3": 0.3767558057218656, + "AC3_4": 0.32550556581647205, + "AC3_5": 0.2926302881228115, + "AC3_6": 0.2692761621316243, + "AC3_7": 0.2500459417668379 + }, + "prompt_3": { + "overall_acc": 0.44, + "language_acc": { + "Vietnamese": 0.38, + "English": 0.5333333333333333, + "Malay": 0.38666666666666666, + "Chinese": 0.4266666666666667, + "Filipino": 0.41333333333333333, + "Indonesian": 0.46, + "Spanish": 0.48 + }, + "consistency_score_2": 0.4825396825396825, + "consistency_score_3": 0.3135238095238095, + "consistency_score_4": 0.24000000000000002, + "consistency_score_5": 0.1980952380952381, + "consistency_score_6": 0.1695238095238095, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.5066666666666667, + "Vietnamese,Malay": 0.4866666666666667, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Spanish": 0.5266666666666666, + "English,Malay": 0.47333333333333333, + "English,Chinese": 0.43333333333333335, + "English,Filipino": 0.5, + "English,Indonesian": 0.5533333333333333, + "English,Spanish": 0.5733333333333334, + "Malay,Chinese": 0.4533333333333333, + "Malay,Filipino": 0.49333333333333335, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.48, + "Chinese,Filipino": 0.44666666666666666, + "Chinese,Indonesian": 0.4666666666666667, + "Chinese,Spanish": 0.46, + "Filipino,Indonesian": 0.4533333333333333, + "Filipino,Spanish": 0.47333333333333333, + "Indonesian,Spanish": 0.5133333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.34, + "Vietnamese,English,Chinese": 0.2866666666666667, + "Vietnamese,English,Filipino": 0.30666666666666664, + "Vietnamese,English,Indonesian": 0.34, + "Vietnamese,English,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Chinese": 0.28, + "Vietnamese,Malay,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.26, + "Vietnamese,Chinese,Indonesian": 0.2733333333333333, + "Vietnamese,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian": 0.28, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Indonesian,Spanish": 0.34, + "English,Malay,Chinese": 0.29333333333333333, + "English,Malay,Filipino": 0.32, + "English,Malay,Indonesian": 0.35333333333333333, + "English,Malay,Spanish": 0.34, + "English,Chinese,Filipino": 0.2866666666666667, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.31333333333333335, + "English,Filipino,Indonesian": 0.32666666666666666, + "English,Filipino,Spanish": 0.35333333333333333, + "English,Indonesian,Spanish": 0.4, + "Malay,Chinese,Filipino": 0.28, + "Malay,Chinese,Indonesian": 0.30666666666666664, + "Malay,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian": 0.31333333333333335, + "Malay,Filipino,Spanish": 0.30666666666666664, + "Malay,Indonesian,Spanish": 0.34, + "Chinese,Filipino,Indonesian": 0.28, + "Chinese,Filipino,Spanish": 0.29333333333333333, + "Chinese,Indonesian,Spanish": 0.31333333333333335, + "Filipino,Indonesian,Spanish": 0.32 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.24, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Spanish": 0.2733333333333333, + "Vietnamese,English,Chinese,Filipino": 0.22, + "Vietnamese,English,Chinese,Indonesian": 0.24, + "Vietnamese,English,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.24, + "Vietnamese,English,Filipino,Spanish": 0.24, + "Vietnamese,English,Indonesian,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,Malay,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Spanish": 0.24, + "Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,Chinese,Indonesian,Spanish": 0.22, + "Vietnamese,Filipino,Indonesian,Spanish": 0.22666666666666666, + "English,Malay,Chinese,Filipino": 0.22, + "English,Malay,Chinese,Indonesian": 0.23333333333333334, + "English,Malay,Chinese,Spanish": 0.23333333333333334, + "English,Malay,Filipino,Indonesian": 0.26, + "English,Malay,Filipino,Spanish": 0.26, + "English,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Chinese,Filipino,Spanish": 0.24666666666666667, + "English,Chinese,Indonesian,Spanish": 0.26, + "English,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.20666666666666667, + "Malay,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Chinese,Filipino,Indonesian,Spanish": 0.24 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.2, + "Vietnamese,English,Malay,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.18, + "English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.18, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + } + }, + "AC3_2": 0.46028905707329965, + "AC3_3": 0.366147623813896, + "AC3_4": 0.3105882352484429, + "AC3_5": 0.2731940298079323, + "AC3_6": 0.2447499999598457, + "AC3_7": 0.2199999999625 + }, + "prompt_4": { + "overall_acc": 0.45809523809523817, + "language_acc": { + "Vietnamese": 0.4266666666666667, + "English": 0.54, + "Malay": 0.4066666666666667, + "Chinese": 0.43333333333333335, + "Filipino": 0.46, + "Indonesian": 0.4533333333333333, + "Spanish": 0.4866666666666667 + }, + "consistency_score_2": 0.49142857142857144, + "consistency_score_3": 0.33219047619047615, + "consistency_score_4": 0.2678095238095239, + "consistency_score_5": 0.233968253968254, + "consistency_score_6": 0.21142857142857144, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.49333333333333335, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Filipino": 0.47333333333333333, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish": 0.49333333333333335, + "English,Malay": 0.4533333333333333, + "English,Chinese": 0.5066666666666667, + "English,Filipino": 0.5, + "English,Indonesian": 0.5266666666666666, + "English,Spanish": 0.58, + "Malay,Chinese": 0.43333333333333335, + "Malay,Filipino": 0.5, + "Malay,Indonesian": 0.5133333333333333, + "Malay,Spanish": 0.49333333333333335, + "Chinese,Filipino": 0.46, + "Chinese,Indonesian": 0.49333333333333335, + "Chinese,Spanish": 0.4866666666666667, + "Filipino,Indonesian": 0.46, + "Filipino,Spanish": 0.47333333333333333, + "Indonesian,Spanish": 0.5 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.32666666666666666, + "Vietnamese,English,Chinese": 0.3466666666666667, + "Vietnamese,English,Filipino": 0.32, + "Vietnamese,English,Indonesian": 0.34, + "Vietnamese,English,Spanish": 0.36, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Filipino": 0.34, + "Vietnamese,Malay,Indonesian": 0.34, + "Vietnamese,Malay,Spanish": 0.34, + "Vietnamese,Chinese,Filipino": 0.3333333333333333, + "Vietnamese,Chinese,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Spanish": 0.34, + "Vietnamese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Filipino,Spanish": 0.32, + "Vietnamese,Indonesian,Spanish": 0.3466666666666667, + "English,Malay,Chinese": 0.3, + "English,Malay,Filipino": 0.32, + "English,Malay,Indonesian": 0.32666666666666666, + "English,Malay,Spanish": 0.36, + "English,Chinese,Filipino": 0.32666666666666666, + "English,Chinese,Indonesian": 0.36, + "English,Chinese,Spanish": 0.36666666666666664, + "English,Filipino,Indonesian": 0.34, + "English,Filipino,Spanish": 0.3466666666666667, + "English,Indonesian,Spanish": 0.35333333333333333, + "Malay,Chinese,Filipino": 0.31333333333333335, + "Malay,Chinese,Indonesian": 0.32666666666666666, + "Malay,Chinese,Spanish": 0.3, + "Malay,Filipino,Indonesian": 0.32666666666666666, + "Malay,Filipino,Spanish": 0.32666666666666666, + "Malay,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Filipino,Indonesian": 0.32666666666666666, + "Chinese,Filipino,Spanish": 0.31333333333333335, + "Chinese,Indonesian,Spanish": 0.32666666666666666, + "Filipino,Indonesian,Spanish": 0.31333333333333335 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.26666666666666666, + "Vietnamese,English,Malay,Filipino": 0.26, + "Vietnamese,English,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,English,Malay,Spanish": 0.28, + "Vietnamese,English,Chinese,Filipino": 0.28, + "Vietnamese,English,Chinese,Indonesian": 0.28, + "Vietnamese,English,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,English,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,English,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,English,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.28, + "Vietnamese,Chinese,Filipino,Indonesian": 0.26, + "Vietnamese,Chinese,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Chinese,Indonesian": 0.24666666666666667, + "English,Malay,Chinese,Spanish": 0.26666666666666666, + "English,Malay,Filipino,Indonesian": 0.2733333333333333, + "English,Malay,Filipino,Spanish": 0.2733333333333333, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Chinese,Filipino,Indonesian": 0.28, + "English,Chinese,Filipino,Spanish": 0.26666666666666666, + "English,Chinese,Indonesian,Spanish": 0.28, + "English,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.25333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.24666666666666667, + "Malay,Chinese,Indonesian,Spanish": 0.26, + "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.26666666666666666 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.22666666666666666, + "Vietnamese,English,Malay,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.22, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.22, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.24, + "English,Malay,Chinese,Filipino,Indonesian": 0.22, + "English,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "English,Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, + "English,Malay,Filipino,Indonesian,Spanish": 0.24, + "English,Chinese,Filipino,Indonesian,Spanish": 0.24, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 + } + }, + "AC3_2": 0.47417681611283563, + "AC3_3": 0.3851135671906498, + "AC3_4": 0.3380120202955533, + "AC3_5": 0.3097396242453271, + "AC3_6": 0.2893233082274634, + "AC3_7": 0.271910331342275 + }, + "prompt_5": { + "overall_acc": 0.4476190476190475, + "language_acc": { + "Vietnamese": 0.4266666666666667, + "English": 0.5466666666666666, + "Malay": 0.42, + "Chinese": 0.44666666666666666, + "Filipino": 0.41333333333333333, + "Indonesian": 0.42, + "Spanish": 0.46 + }, + "consistency_score_2": 0.47777777777777786, + "consistency_score_3": 0.3095238095238096, + "consistency_score_4": 0.24209523809523809, + "consistency_score_5": 0.20761904761904765, + "consistency_score_6": 0.18476190476190477, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.47333333333333333, + "Vietnamese,Malay": 0.5266666666666666, + "Vietnamese,Chinese": 0.4666666666666667, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish": 0.46, + "English,Malay": 0.4866666666666667, + "English,Chinese": 0.44666666666666666, + "English,Filipino": 0.5133333333333333, + "English,Indonesian": 0.5, + "English,Spanish": 0.5666666666666667, + "Malay,Chinese": 0.44, + "Malay,Filipino": 0.44666666666666666, + "Malay,Indonesian": 0.5466666666666666, + "Malay,Spanish": 0.5266666666666666, + "Chinese,Filipino": 0.38666666666666666, + "Chinese,Indonesian": 0.5066666666666667, + "Chinese,Spanish": 0.46, + "Filipino,Indonesian": 0.48, + "Filipino,Spanish": 0.44666666666666666, + "Indonesian,Spanish": 0.4533333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.32, + "Vietnamese,English,Chinese": 0.31333333333333335, + "Vietnamese,English,Filipino": 0.29333333333333333, + "Vietnamese,English,Indonesian": 0.32, + "Vietnamese,English,Spanish": 0.32, + "Vietnamese,Malay,Chinese": 0.32, + "Vietnamese,Malay,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.28, + "Vietnamese,Chinese,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish": 0.30666666666666664, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.31333333333333335, + "English,Malay,Indonesian": 0.36, + "English,Malay,Spanish": 0.35333333333333333, + "English,Chinese,Filipino": 0.28, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.3333333333333333, + "English,Filipino,Indonesian": 0.32666666666666666, + "English,Filipino,Spanish": 0.3466666666666667, + "English,Indonesian,Spanish": 0.3333333333333333, + "Malay,Chinese,Filipino": 0.26, + "Malay,Chinese,Indonesian": 0.3, + "Malay,Chinese,Spanish": 0.28, + "Malay,Filipino,Indonesian": 0.32, + "Malay,Filipino,Spanish": 0.29333333333333333, + "Malay,Indonesian,Spanish": 0.32666666666666666, + "Chinese,Filipino,Indonesian": 0.2866666666666667, + "Chinese,Filipino,Spanish": 0.26666666666666666, + "Chinese,Indonesian,Spanish": 0.2866666666666667, + "Filipino,Indonesian,Spanish": 0.2866666666666667 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.26, + "Vietnamese,English,Malay,Filipino": 0.24666666666666667, + "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Spanish": 0.25333333333333335, + "Vietnamese,English,Chinese,Filipino": 0.24, + "Vietnamese,English,Chinese,Indonesian": 0.26, + "Vietnamese,English,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,English,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Filipino,Spanish": 0.24, + "Vietnamese,English,Indonesian,Spanish": 0.26, + "Vietnamese,Malay,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,Chinese,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.23333333333333334, + "English,Malay,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Chinese,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Spanish": 0.24, + "English,Malay,Filipino,Indonesian": 0.26666666666666666, + "English,Malay,Filipino,Spanish": 0.26, + "English,Malay,Indonesian,Spanish": 0.26, + "English,Chinese,Filipino,Indonesian": 0.23333333333333334, + "English,Chinese,Filipino,Spanish": 0.23333333333333334, + "English,Chinese,Indonesian,Spanish": 0.24666666666666667, + "English,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.21333333333333335, + "Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.24, + "Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,English,Malay,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,English,Malay,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.22, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino,Indonesian": 0.19333333333333333, + "English,Malay,Chinese,Filipino,Spanish": 0.2, + "English,Malay,Chinese,Indonesian,Spanish": 0.2, + "English,Malay,Filipino,Indonesian,Spanish": 0.22, + "English,Chinese,Filipino,Indonesian,Spanish": 0.2, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666 + } + }, + "AC3_2": 0.46220697536457567, + "AC3_3": 0.3659778376276142, + "AC3_4": 0.3142357411992944, + "AC3_5": 0.2836655592036626, + "AC3_6": 0.26156052778422684, + "AC3_7": 0.2428940568080043 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.3774350649350649, + "language_acc": { + "Spanish": 0.3806818181818182, + "Chinese": 0.39204545454545453, + "Vietnamese": 0.3977272727272727, + "Indonesian": 0.39204545454545453, + "Malay": 0.38636363636363635, + "Filipino": 0.26136363636363635, + "English": 0.4318181818181818 + }, + "consistency_score_2": 0.48376623376623373, + "consistency_score_3": 0.2928571428571429, + "consistency_score_4": 0.20081168831168825, + "consistency_score_5": 0.1482683982683983, + "consistency_score_6": 0.11444805194805195, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.44886363636363635, + "Spanish,Vietnamese": 0.5625, + "Spanish,Indonesian": 0.5284090909090909, + "Spanish,Malay": 0.5170454545454546, + "Spanish,Filipino": 0.4375, + "Spanish,English": 0.5965909090909091, + "Chinese,Vietnamese": 0.4431818181818182, + "Chinese,Indonesian": 0.3409090909090909, + "Chinese,Malay": 0.39204545454545453, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,English": 0.4318181818181818, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Malay": 0.625, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,English": 0.5625, + "Indonesian,Malay": 0.5454545454545454, + "Indonesian,Filipino": 0.4431818181818182, + "Indonesian,English": 0.48863636363636365, + "Malay,Filipino": 0.48863636363636365, + "Malay,English": 0.5227272727272727, + "Filipino,English": 0.4375 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Spanish,Chinese,Indonesian": 0.23295454545454544, + "Spanish,Chinese,Malay": 0.25, + "Spanish,Chinese,Filipino": 0.19886363636363635, + "Spanish,Chinese,English": 0.3125, + "Spanish,Vietnamese,Indonesian": 0.38636363636363635, + "Spanish,Vietnamese,Malay": 0.39204545454545453, + "Spanish,Vietnamese,Filipino": 0.3068181818181818, + "Spanish,Vietnamese,English": 0.42613636363636365, + "Spanish,Indonesian,Malay": 0.3522727272727273, + "Spanish,Indonesian,Filipino": 0.2840909090909091, + "Spanish,Indonesian,English": 0.375, + "Spanish,Malay,Filipino": 0.2897727272727273, + "Spanish,Malay,English": 0.39204545454545453, + "Spanish,Filipino,English": 0.30113636363636365, + "Chinese,Vietnamese,Indonesian": 0.23863636363636365, + "Chinese,Vietnamese,Malay": 0.2784090909090909, + "Chinese,Vietnamese,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,English": 0.29545454545454547, + "Chinese,Indonesian,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino": 0.1590909090909091, + "Chinese,Indonesian,English": 0.2215909090909091, + "Chinese,Malay,Filipino": 0.1875, + "Chinese,Malay,English": 0.25, + "Chinese,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,English": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.3352272727272727, + "Vietnamese,Malay,English": 0.38636363636363635, + "Vietnamese,Filipino,English": 0.30113636363636365, + "Indonesian,Malay,Filipino": 0.30113636363636365, + "Indonesian,Malay,English": 0.32954545454545453, + "Indonesian,Filipino,English": 0.2727272727272727, + "Malay,Filipino,English": 0.2784090909090909 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay": 0.19886363636363635, + "Spanish,Chinese,Vietnamese,Filipino": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,English": 0.23295454545454544, + "Spanish,Chinese,Indonesian,Malay": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Filipino": 0.13636363636363635, + "Spanish,Chinese,Indonesian,English": 0.18181818181818182, + "Spanish,Chinese,Malay,Filipino": 0.14204545454545456, + "Spanish,Chinese,Malay,English": 0.19886363636363635, + "Spanish,Chinese,Filipino,English": 0.1534090909090909, + "Spanish,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Spanish,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,English": 0.29545454545454547, + "Spanish,Vietnamese,Malay,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Malay,English": 0.3181818181818182, + "Spanish,Vietnamese,Filipino,English": 0.2556818181818182, + "Spanish,Indonesian,Malay,Filipino": 0.21022727272727273, + "Spanish,Indonesian,Malay,English": 0.2840909090909091, + "Spanish,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Malay,Filipino,English": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,Malay": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Filipino": 0.125, + "Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, + "Chinese,Vietnamese,Malay,Filipino": 0.14772727272727273, + "Chinese,Vietnamese,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Filipino,English": 0.14772727272727273, + "Chinese,Indonesian,Malay,Filipino": 0.11363636363636363, + "Chinese,Indonesian,Malay,English": 0.16477272727272727, + "Chinese,Indonesian,Filipino,English": 0.125, + "Chinese,Malay,Filipino,English": 0.125, + "Vietnamese,Indonesian,Malay,Filipino": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,English": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,English": 0.21022727272727273, + "Vietnamese,Malay,Filipino,English": 0.23863636363636365, + "Indonesian,Malay,Filipino,English": 0.20454545454545456 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.10227272727272728, + "Spanish,Chinese,Indonesian,Malay,English": 0.14204545454545456, + "Spanish,Chinese,Indonesian,Filipino,English": 0.11931818181818182, + "Spanish,Chinese,Malay,Filipino,English": 0.11931818181818182, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.24431818181818182, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.10795454545454546, + "Chinese,Vietnamese,Malay,Filipino,English": 0.11931818181818182, + "Chinese,Indonesian,Malay,Filipino,English": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.17613636363636365 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.09659090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.125, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.10795454545454546, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.11363636363636363, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.09659090909090909, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09090909090909091 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09090909090909091 + } + }, + "AC3_2": 0.42403637827554763, + "AC3_3": 0.32981005427041693, + "AC3_4": 0.26214889120939544, + "AC3_5": 0.2129021261400631, + "AC3_6": 0.17563809948853085, + "AC3_7": 0.14652591772513443 + }, + "prompt_2": { + "overall_acc": 0.372564935064935, + "language_acc": { + "Spanish": 0.375, + "Chinese": 0.35795454545454547, + "Vietnamese": 0.38636363636363635, + "Indonesian": 0.3465909090909091, + "Malay": 0.38636363636363635, + "Filipino": 0.32386363636363635, + "English": 0.4318181818181818 + }, + "consistency_score_2": 0.48430735930735935, + "consistency_score_3": 0.2982142857142857, + "consistency_score_4": 0.210064935064935, + "consistency_score_5": 0.1609848484848485, + "consistency_score_6": 0.1314935064935065, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4659090909090909, + "Spanish,Vietnamese": 0.5625, + "Spanish,Indonesian": 0.5, + "Spanish,Malay": 0.4715909090909091, + "Spanish,Filipino": 0.4772727272727273, + "Spanish,English": 0.6022727272727273, + "Chinese,Vietnamese": 0.4431818181818182, + "Chinese,Indonesian": 0.3977272727272727, + "Chinese,Malay": 0.4431818181818182, + "Chinese,Filipino": 0.38636363636363635, + "Chinese,English": 0.4602272727272727, + "Vietnamese,Indonesian": 0.5511363636363636, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Filipino": 0.48295454545454547, + "Vietnamese,English": 0.5795454545454546, + "Indonesian,Malay": 0.5625, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,English": 0.5284090909090909, + "Malay,Filipino": 0.4147727272727273, + "Malay,English": 0.4659090909090909, + "Filipino,English": 0.44886363636363635 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Spanish,Chinese,Indonesian": 0.2556818181818182, + "Spanish,Chinese,Malay": 0.2784090909090909, + "Spanish,Chinese,Filipino": 0.24431818181818182, + "Spanish,Chinese,English": 0.3181818181818182, + "Spanish,Vietnamese,Indonesian": 0.375, + "Spanish,Vietnamese,Malay": 0.3465909090909091, + "Spanish,Vietnamese,Filipino": 0.32386363636363635, + "Spanish,Vietnamese,English": 0.42613636363636365, + "Spanish,Indonesian,Malay": 0.3409090909090909, + "Spanish,Indonesian,Filipino": 0.2727272727272727, + "Spanish,Indonesian,English": 0.3806818181818182, + "Spanish,Malay,Filipino": 0.25, + "Spanish,Malay,English": 0.3409090909090909, + "Spanish,Filipino,English": 0.32954545454545453, + "Chinese,Vietnamese,Indonesian": 0.2840909090909091, + "Chinese,Vietnamese,Malay": 0.29545454545454547, + "Chinese,Vietnamese,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,English": 0.3068181818181818, + "Chinese,Indonesian,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino": 0.18181818181818182, + "Chinese,Indonesian,English": 0.26136363636363635, + "Chinese,Malay,Filipino": 0.22727272727272727, + "Chinese,Malay,English": 0.2556818181818182, + "Chinese,Filipino,English": 0.22727272727272727, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,English": 0.3806818181818182, + "Vietnamese,Malay,Filipino": 0.2897727272727273, + "Vietnamese,Malay,English": 0.3522727272727273, + "Vietnamese,Filipino,English": 0.32386363636363635, + "Indonesian,Malay,Filipino": 0.26136363636363635, + "Indonesian,Malay,English": 0.32386363636363635, + "Indonesian,Filipino,English": 0.26136363636363635, + "Malay,Filipino,English": 0.23295454545454544 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,Filipino": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Spanish,Chinese,Indonesian,English": 0.21022727272727273, + "Spanish,Chinese,Malay,Filipino": 0.17613636363636365, + "Spanish,Chinese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Filipino,English": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,English": 0.3125, + "Spanish,Vietnamese,Malay,Filipino": 0.21022727272727273, + "Spanish,Vietnamese,Malay,English": 0.2727272727272727, + "Spanish,Vietnamese,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.19318181818181818, + "Spanish,Indonesian,Malay,English": 0.26136363636363635, + "Spanish,Indonesian,Filipino,English": 0.23295454545454544, + "Spanish,Malay,Filipino,English": 0.20454545454545456, + "Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,English": 0.20454545454545456, + "Chinese,Vietnamese,Malay,Filipino": 0.17613636363636365, + "Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Chinese,Vietnamese,Filipino,English": 0.18181818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Indonesian,Malay,English": 0.1875, + "Chinese,Indonesian,Filipino,English": 0.14772727272727273, + "Chinese,Malay,Filipino,English": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,English": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino,English": 0.2159090909090909, + "Vietnamese,Malay,Filipino,English": 0.20454545454545456, + "Indonesian,Malay,Filipino,English": 0.18181818181818182 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Malay,English": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.14772727272727273, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.1875, + "Spanish,Indonesian,Malay,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Chinese,Vietnamese,Malay,Filipino,English": 0.14204545454545456, + "Chinese,Indonesian,Malay,Filipino,English": 0.125, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.125, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + } + }, + "AC3_2": 0.42115013178933064, + "AC3_3": 0.3312690153011882, + "AC3_4": 0.26865367840138227, + "AC3_5": 0.22482366766946138, + "AC3_6": 0.19438170521270712, + "AC3_7": 0.17415389281590324 + }, + "prompt_3": { + "overall_acc": 0.3547077922077922, + "language_acc": { + "Spanish": 0.38636363636363635, + "Chinese": 0.375, + "Vietnamese": 0.36363636363636365, + "Indonesian": 0.32954545454545453, + "Malay": 0.35795454545454547, + "Filipino": 0.26136363636363635, + "English": 0.4090909090909091 + }, + "consistency_score_2": 0.4862012987012986, + "consistency_score_3": 0.3025974025974026, + "consistency_score_4": 0.21120129870129875, + "consistency_score_5": 0.15557359307359309, + "consistency_score_6": 0.11931818181818181, + "consistency_score_7": 0.09659090909090909, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4090909090909091, + "Spanish,Vietnamese": 0.5454545454545454, + "Spanish,Indonesian": 0.5454545454545454, + "Spanish,Malay": 0.5340909090909091, + "Spanish,Filipino": 0.5227272727272727, + "Spanish,English": 0.6022727272727273, + "Chinese,Vietnamese": 0.5056818181818182, + "Chinese,Indonesian": 0.39204545454545453, + "Chinese,Malay": 0.4147727272727273, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,English": 0.4602272727272727, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.5511363636363636, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.4147727272727273, + "Indonesian,English": 0.48863636363636365, + "Malay,Filipino": 0.4318181818181818, + "Malay,English": 0.4715909090909091, + "Filipino,English": 0.44886363636363635 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Indonesian": 0.26136363636363635, + "Spanish,Chinese,Malay": 0.2556818181818182, + "Spanish,Chinese,Filipino": 0.2215909090909091, + "Spanish,Chinese,English": 0.3068181818181818, + "Spanish,Vietnamese,Indonesian": 0.375, + "Spanish,Vietnamese,Malay": 0.35795454545454547, + "Spanish,Vietnamese,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,English": 0.4034090909090909, + "Spanish,Indonesian,Malay": 0.4034090909090909, + "Spanish,Indonesian,Filipino": 0.30113636363636365, + "Spanish,Indonesian,English": 0.39204545454545453, + "Spanish,Malay,Filipino": 0.3125, + "Spanish,Malay,English": 0.36363636363636365, + "Spanish,Filipino,English": 0.3352272727272727, + "Chinese,Vietnamese,Indonesian": 0.29545454545454547, + "Chinese,Vietnamese,Malay": 0.30113636363636365, + "Chinese,Vietnamese,Filipino": 0.2556818181818182, + "Chinese,Vietnamese,English": 0.32386363636363635, + "Chinese,Indonesian,Malay": 0.26136363636363635, + "Chinese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Indonesian,English": 0.25, + "Chinese,Malay,Filipino": 0.21022727272727273, + "Chinese,Malay,English": 0.24431818181818182, + "Chinese,Filipino,English": 0.2159090909090909, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,English": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.29545454545454547, + "Vietnamese,Malay,English": 0.32954545454545453, + "Vietnamese,Filipino,English": 0.3125, + "Indonesian,Malay,Filipino": 0.2897727272727273, + "Indonesian,Malay,English": 0.32386363636363635, + "Indonesian,Filipino,English": 0.26704545454545453, + "Malay,Filipino,English": 0.24431818181818182 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Malay": 0.20454545454545456, + "Spanish,Chinese,Vietnamese,Filipino": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Indonesian,English": 0.20454545454545456, + "Spanish,Chinese,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Filipino,English": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Spanish,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,English": 0.30113636363636365, + "Spanish,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Spanish,Vietnamese,Malay,English": 0.2727272727272727, + "Spanish,Vietnamese,Filipino,English": 0.26136363636363635, + "Spanish,Indonesian,Malay,Filipino": 0.23295454545454544, + "Spanish,Indonesian,Malay,English": 0.29545454545454547, + "Spanish,Indonesian,Filipino,English": 0.24431818181818182, + "Spanish,Malay,Filipino,English": 0.22727272727272727, + "Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,English": 0.20454545454545456, + "Chinese,Vietnamese,Malay,Filipino": 0.17045454545454544, + "Chinese,Vietnamese,Malay,English": 0.19318181818181818, + "Chinese,Vietnamese,Filipino,English": 0.1875, + "Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Indonesian,Malay,English": 0.17045454545454544, + "Chinese,Indonesian,Filipino,English": 0.14772727272727273, + "Chinese,Malay,Filipino,English": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,English": 0.25, + "Vietnamese,Indonesian,Filipino,English": 0.2215909090909091, + "Vietnamese,Malay,Filipino,English": 0.19886363636363635, + "Indonesian,Malay,Filipino,English": 0.19318181818181818 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,English": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.13068181818181818, + "Spanish,Chinese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.13636363636363635, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.1875, + "Spanish,Indonesian,Malay,Filipino,English": 0.1875, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Chinese,Vietnamese,Malay,Filipino,English": 0.11931818181818182, + "Chinese,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.125, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.125, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.11363636363636363, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09659090909090909 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09659090909090909 + } + }, + "AC3_2": 0.41017368244633357, + "AC3_3": 0.3265869718818157, + "AC3_4": 0.26475894296086744, + "AC3_5": 0.21628523910870737, + "AC3_6": 0.17856864877926584, + "AC3_7": 0.15183534986825256 + }, + "prompt_4": { + "overall_acc": 0.37905844155844154, + "language_acc": { + "Spanish": 0.39204545454545453, + "Chinese": 0.4034090909090909, + "Vietnamese": 0.375, + "Indonesian": 0.38636363636363635, + "Malay": 0.375, + "Filipino": 0.3068181818181818, + "English": 0.4147727272727273 + }, + "consistency_score_2": 0.5016233766233766, + "consistency_score_3": 0.3220779220779221, + "consistency_score_4": 0.23522727272727276, + "consistency_score_5": 0.18560606060606064, + "consistency_score_6": 0.15503246753246755, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.48295454545454547, + "Spanish,Vietnamese": 0.5454545454545454, + "Spanish,Indonesian": 0.5170454545454546, + "Spanish,Malay": 0.5, + "Spanish,Filipino": 0.4772727272727273, + "Spanish,English": 0.5795454545454546, + "Chinese,Vietnamese": 0.5170454545454546, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Malay": 0.4659090909090909, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,English": 0.5056818181818182, + "Vietnamese,Indonesian": 0.5852272727272727, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.5909090909090909, + "Indonesian,Malay": 0.5852272727272727, + "Indonesian,Filipino": 0.44886363636363635, + "Indonesian,English": 0.48295454545454547, + "Malay,Filipino": 0.44886363636363635, + "Malay,English": 0.5, + "Filipino,English": 0.4715909090909091 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.32954545454545453, + "Spanish,Chinese,Indonesian": 0.2897727272727273, + "Spanish,Chinese,Malay": 0.3125, + "Spanish,Chinese,Filipino": 0.24431818181818182, + "Spanish,Chinese,English": 0.36363636363636365, + "Spanish,Vietnamese,Indonesian": 0.39204545454545453, + "Spanish,Vietnamese,Malay": 0.36363636363636365, + "Spanish,Vietnamese,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,English": 0.4147727272727273, + "Spanish,Indonesian,Malay": 0.3693181818181818, + "Spanish,Indonesian,Filipino": 0.3125, + "Spanish,Indonesian,English": 0.375, + "Spanish,Malay,Filipino": 0.2727272727272727, + "Spanish,Malay,English": 0.375, + "Spanish,Filipino,English": 0.32954545454545453, + "Chinese,Vietnamese,Indonesian": 0.3181818181818182, + "Chinese,Vietnamese,Malay": 0.32954545454545453, + "Chinese,Vietnamese,Filipino": 0.26136363636363635, + "Chinese,Vietnamese,English": 0.35795454545454547, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Indonesian,Filipino": 0.2159090909090909, + "Chinese,Indonesian,English": 0.30113636363636365, + "Chinese,Malay,Filipino": 0.23295454545454544, + "Chinese,Malay,English": 0.3125, + "Chinese,Filipino,English": 0.23863636363636365, + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,English": 0.38636363636363635, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,English": 0.35795454545454547, + "Vietnamese,Filipino,English": 0.3409090909090909, + "Indonesian,Malay,Filipino": 0.29545454545454547, + "Indonesian,Malay,English": 0.3352272727272727, + "Indonesian,Filipino,English": 0.2840909090909091, + "Malay,Filipino,English": 0.2727272727272727 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.23863636363636365, + "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.20454545454545456, + "Spanish,Chinese,Vietnamese,English": 0.2784090909090909, + "Spanish,Chinese,Indonesian,Malay": 0.23295454545454544, + "Spanish,Chinese,Indonesian,Filipino": 0.17613636363636365, + "Spanish,Chinese,Indonesian,English": 0.25, + "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Malay,English": 0.26136363636363635, + "Spanish,Chinese,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Spanish,Vietnamese,Indonesian,English": 0.3181818181818182, + "Spanish,Vietnamese,Malay,Filipino": 0.22727272727272727, + "Spanish,Vietnamese,Malay,English": 0.30113636363636365, + "Spanish,Vietnamese,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.2159090909090909, + "Spanish,Indonesian,Malay,English": 0.2897727272727273, + "Spanish,Indonesian,Filipino,English": 0.25, + "Spanish,Malay,Filipino,English": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Malay": 0.25, + "Chinese,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,English": 0.25, + "Chinese,Vietnamese,Malay,Filipino": 0.19886363636363635, + "Chinese,Vietnamese,Malay,English": 0.23863636363636365, + "Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Chinese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Chinese,Indonesian,Malay,English": 0.23295454545454544, + "Chinese,Indonesian,Filipino,English": 0.17045454545454544, + "Chinese,Malay,Filipino,English": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,English": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.24431818181818182, + "Vietnamese,Malay,Filipino,English": 0.22727272727272727, + "Indonesian,Malay,Filipino,English": 0.21022727272727273 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Indonesian,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Malay,Filipino,English": 0.17045454545454544, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2556818181818182, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.22727272727272727, + "Spanish,Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.1534090909090909, + "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1875 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + } + }, + "AC3_2": 0.43181219697053874, + "AC3_3": 0.3482528121994379, + "AC3_4": 0.290304271526833, + "AC3_5": 0.24919414553238128, + "AC3_6": 0.2200612836598984, + "AC3_7": 0.20057265565185162 + }, + "prompt_5": { + "overall_acc": 0.37987012987012997, + "language_acc": { + "Spanish": 0.4375, + "Chinese": 0.4147727272727273, + "Vietnamese": 0.39204545454545453, + "Indonesian": 0.3465909090909091, + "Malay": 0.38636363636363635, + "Filipino": 0.30113636363636365, + "English": 0.3806818181818182 + }, + "consistency_score_2": 0.510551948051948, + "consistency_score_3": 0.3287337662337663, + "consistency_score_4": 0.23912337662337663, + "consistency_score_5": 0.18641774891774893, + "consistency_score_6": 0.15422077922077923, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.5056818181818182, + "Spanish,Vietnamese": 0.5681818181818182, + "Spanish,Indonesian": 0.5284090909090909, + "Spanish,Malay": 0.5284090909090909, + "Spanish,Filipino": 0.5113636363636364, + "Spanish,English": 0.6363636363636364, + "Chinese,Vietnamese": 0.5340909090909091, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Malay": 0.5056818181818182, + "Chinese,Filipino": 0.375, + "Chinese,English": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5738636363636364, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.5909090909090909, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.4090909090909091, + "Indonesian,English": 0.4772727272727273, + "Malay,Filipino": 0.45454545454545453, + "Malay,English": 0.4659090909090909, + "Filipino,English": 0.4715909090909091 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Spanish,Chinese,Indonesian": 0.3125, + "Spanish,Chinese,Malay": 0.32954545454545453, + "Spanish,Chinese,Filipino": 0.26704545454545453, + "Spanish,Chinese,English": 0.3806818181818182, + "Spanish,Vietnamese,Indonesian": 0.39204545454545453, + "Spanish,Vietnamese,Malay": 0.39204545454545453, + "Spanish,Vietnamese,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,English": 0.4602272727272727, + "Spanish,Indonesian,Malay": 0.3693181818181818, + "Spanish,Indonesian,Filipino": 0.30113636363636365, + "Spanish,Indonesian,English": 0.375, + "Spanish,Malay,Filipino": 0.3068181818181818, + "Spanish,Malay,English": 0.38636363636363635, + "Spanish,Filipino,English": 0.3522727272727273, + "Chinese,Vietnamese,Indonesian": 0.3465909090909091, + "Chinese,Vietnamese,Malay": 0.3409090909090909, + "Chinese,Vietnamese,Filipino": 0.2727272727272727, + "Chinese,Vietnamese,English": 0.38636363636363635, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Filipino": 0.23295454545454544, + "Chinese,Indonesian,English": 0.3068181818181818, + "Chinese,Malay,Filipino": 0.24431818181818182, + "Chinese,Malay,English": 0.29545454545454547, + "Chinese,Filipino,English": 0.23863636363636365, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,English": 0.39204545454545453, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,English": 0.3693181818181818, + "Vietnamese,Filipino,English": 0.32386363636363635, + "Indonesian,Malay,Filipino": 0.2727272727272727, + "Indonesian,Malay,English": 0.32954545454545453, + "Indonesian,Filipino,English": 0.26136363636363635, + "Malay,Filipino,English": 0.26136363636363635 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.26704545454545453, + "Spanish,Chinese,Vietnamese,Malay": 0.24431818181818182, + "Spanish,Chinese,Vietnamese,Filipino": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,English": 0.3125, + "Spanish,Chinese,Indonesian,Malay": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.2556818181818182, + "Spanish,Chinese,Malay,Filipino": 0.21022727272727273, + "Spanish,Chinese,Malay,English": 0.25, + "Spanish,Chinese,Filipino,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Spanish,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,English": 0.32954545454545453, + "Spanish,Vietnamese,Malay,Filipino": 0.23863636363636365, + "Spanish,Vietnamese,Malay,English": 0.3181818181818182, + "Spanish,Vietnamese,Filipino,English": 0.2840909090909091, + "Spanish,Indonesian,Malay,Filipino": 0.2215909090909091, + "Spanish,Indonesian,Malay,English": 0.2784090909090909, + "Spanish,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Malay,Filipino,English": 0.24431818181818182, + "Chinese,Vietnamese,Indonesian,Malay": 0.26136363636363635, + "Chinese,Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,English": 0.26704545454545453, + "Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,Malay,English": 0.24431818181818182, + "Chinese,Vietnamese,Filipino,English": 0.21022727272727273, + "Chinese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Chinese,Indonesian,Malay,English": 0.22727272727272727, + "Chinese,Indonesian,Filipino,English": 0.18181818181818182, + "Chinese,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,English": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino,English": 0.22727272727272727, + "Vietnamese,Malay,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino,English": 0.19886363636363635 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Indonesian,Filipino,English": 0.17613636363636365, + "Spanish,Chinese,Malay,Filipino,English": 0.17613636363636365, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.17613636363636365, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2159090909090909, + "Spanish,Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.1875, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.17045454545454544 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.16477272727272727, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + } + }, + "AC3_2": 0.4356213521631312, + "AC3_3": 0.35245682147924695, + "AC3_4": 0.2934952537153448, + "AC3_5": 0.2501008308020075, + "AC3_6": 0.21937788651006385, + "AC3_7": 0.20068610630760866 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5145631067961165 + }, + "prompt_2": { + "accuracy": 0.5242718446601942 + }, + "prompt_3": { + "accuracy": 0.5533980582524272 + }, + "prompt_4": { + "accuracy": 0.5242718446601942 + }, + "prompt_5": { + "accuracy": 0.5339805825242718 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37142857142857144 + }, + "prompt_2": { + "accuracy": 0.37142857142857144 + }, + "prompt_3": { + "accuracy": 0.3619047619047619 + }, + "prompt_4": { + "accuracy": 0.3523809523809524 + }, + "prompt_5": { + "accuracy": 0.38095238095238093 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4672897196261682 + }, + "prompt_2": { + "accuracy": 0.4485981308411215 + }, + "prompt_3": { + "accuracy": 0.48598130841121495 + }, + "prompt_4": { + "accuracy": 0.4766355140186916 + }, + "prompt_5": { + "accuracy": 0.4205607476635514 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.6, + "history": 0.4666666666666667, + "literature": 0.4, + "politics": 0.4, + "culture": 0.7, + "film": 0.4, + "law": 0.1, + "geography": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.7, + "history": 0.4, + "literature": 0.4, + "politics": 0.2, + "culture": 0.7, + "film": 0.6, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_3": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.5333333333333333, + "literature": 0.4, + "politics": 0.3, + "culture": 0.7, + "film": 0.4, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.3, + "culture": 0.6, + "film": 0.5, + "law": 0.6, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.5, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.3, + "culture": 0.6, + "film": 0.4, + "law": 0.1, + "geography": 0.4 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.15501284774251278 + }, + "prompt_2": { + "bleu_score": 0.15198585128851483 + }, + "prompt_3": { + "bleu_score": 0.15081158203998288 + }, + "prompt_4": { + "bleu_score": 0.15210517965664572 + }, + "prompt_5": { + "bleu_score": 0.12416601209312363 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4008278256225382, + "category_acc": { + "History": 0.37349397590361444, + "Geography": 0.37755102040816324, + "Lampungic": 0.272108843537415, + "Social science": 0.6010016694490818, + "Balinese": 0.3227176220806794, + "Makassarese": 0.3010752688172043, + "Banjarese": 0.3819444444444444, + "Chemistry": 0.26277372262773724, + "Biology": 0.3526627218934911, + "Science": 0.4953560371517028, + "Christian religion": 0.4427860696517413, + "Art": 0.5058236272878536, + "Islam religion": 0.42816500711237554, + "Hindu religion": 0.41333333333333333, + "Madurese": 0.288135593220339, + "Sport": 0.4594594594594595, + "Indonesian language": 0.4383561643835616, + "Physics": 0.37575757575757573, + "Minangkabau culture": 0.32160804020100503, + "Dayak language": 0.29357798165137616, + "Sociology": 0.41935483870967744, + "Economy": 0.3524590163934426, + "Sundanese": 0.3509075194468453, + "Javanese": 0.30544354838709675, + "Civic education": 0.463519313304721 + } + }, + "prompt_2": { + "accuracy": 0.3907470458642099, + "category_acc": { + "History": 0.3232931726907631, + "Geography": 0.3979591836734694, + "Lampungic": 0.29931972789115646, + "Social science": 0.5742904841402338, + "Balinese": 0.35668789808917195, + "Makassarese": 0.26344086021505375, + "Banjarese": 0.2638888888888889, + "Chemistry": 0.23065693430656933, + "Biology": 0.36923076923076925, + "Science": 0.47678018575851394, + "Christian religion": 0.39800995024875624, + "Art": 0.47753743760399336, + "Islam religion": 0.42105263157894735, + "Hindu religion": 0.44, + "Madurese": 0.25084745762711863, + "Sport": 0.4594594594594595, + "Indonesian language": 0.43711083437110837, + "Physics": 0.3595959595959596, + "Minangkabau culture": 0.34673366834170855, + "Dayak language": 0.25688073394495414, + "Sociology": 0.4274193548387097, + "Economy": 0.3790983606557377, + "Sundanese": 0.331028522039758, + "Javanese": 0.26713709677419356, + "Civic education": 0.4678111587982833 + } + }, + "prompt_3": { + "accuracy": 0.4028973896788838, + "category_acc": { + "History": 0.3755020080321285, + "Geography": 0.3836734693877551, + "Lampungic": 0.3469387755102041, + "Social science": 0.5976627712854758, + "Balinese": 0.3503184713375796, + "Makassarese": 0.3172043010752688, + "Banjarese": 0.3541666666666667, + "Chemistry": 0.2, + "Biology": 0.37514792899408284, + "Science": 0.47678018575851394, + "Christian religion": 0.4129353233830846, + "Art": 0.4908485856905158, + "Islam religion": 0.43812233285917496, + "Hindu religion": 0.44, + "Madurese": 0.3016949152542373, + "Sport": 0.46621621621621623, + "Indonesian language": 0.45080946450809467, + "Physics": 0.34545454545454546, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.27522935779816515, + "Sociology": 0.4254032258064516, + "Economy": 0.35655737704918034, + "Sundanese": 0.3647363872082973, + "Javanese": 0.3215725806451613, + "Civic education": 0.4334763948497854 + } + }, + "prompt_4": { + "accuracy": 0.40016022431403964, + "category_acc": { + "History": 0.3534136546184739, + "Geography": 0.37142857142857144, + "Lampungic": 0.3401360544217687, + "Social science": 0.6176961602671118, + "Balinese": 0.35668789808917195, + "Makassarese": 0.3172043010752688, + "Banjarese": 0.3680555555555556, + "Chemistry": 0.2291970802919708, + "Biology": 0.3609467455621302, + "Science": 0.48813209494324045, + "Christian religion": 0.43283582089552236, + "Art": 0.5041597337770383, + "Islam religion": 0.4366998577524893, + "Hindu religion": 0.44, + "Madurese": 0.28135593220338984, + "Sport": 0.4527027027027027, + "Indonesian language": 0.44333748443337484, + "Physics": 0.33131313131313134, + "Minangkabau culture": 0.3417085427135678, + "Dayak language": 0.27522935779816515, + "Sociology": 0.42338709677419356, + "Economy": 0.36065573770491804, + "Sundanese": 0.34917891097666376, + "Javanese": 0.3074596774193548, + "Civic education": 0.43919885550786836 + } + }, + "prompt_5": { + "accuracy": 0.3991588223512918, + "category_acc": { + "History": 0.37349397590361444, + "Geography": 0.3836734693877551, + "Lampungic": 0.3197278911564626, + "Social science": 0.6060100166944908, + "Balinese": 0.36942675159235666, + "Makassarese": 0.3064516129032258, + "Banjarese": 0.4097222222222222, + "Chemistry": 0.2364963503649635, + "Biology": 0.36923076923076925, + "Science": 0.4932920536635707, + "Christian religion": 0.4079601990049751, + "Art": 0.47587354409317806, + "Islam religion": 0.4366998577524893, + "Hindu religion": 0.44666666666666666, + "Madurese": 0.31186440677966104, + "Sport": 0.43243243243243246, + "Indonesian language": 0.4336861768368618, + "Physics": 0.3434343434343434, + "Minangkabau culture": 0.35678391959798994, + "Dayak language": 0.26605504587155965, + "Sociology": 0.39919354838709675, + "Economy": 0.3463114754098361, + "Sundanese": 0.35350043215211757, + "Javanese": 0.3074596774193548, + "Civic education": 0.4449213161659514 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.12538511120752693 + }, + "prompt_2": { + "bleu_score": 0.16192990491157527 + }, + "prompt_3": { + "bleu_score": 0.15766460992501238 + }, + "prompt_4": { + "bleu_score": 0.14443671193775645 + }, + "prompt_5": { + "bleu_score": 0.14643957455420825 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10643823932021736 + }, + "prompt_2": { + "bleu_score": 0.13501374407353342 + }, + "prompt_3": { + "bleu_score": 0.1323068208710863 + }, + "prompt_4": { + "bleu_score": 0.12773445658677324 + }, + "prompt_5": { + "bleu_score": 0.12708312423895515 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10376840053827412 + }, + "prompt_2": { + "bleu_score": 0.11120920584140642 + }, + "prompt_3": { + "bleu_score": 0.10930912476728209 + }, + "prompt_4": { + "bleu_score": 0.10573670220898011 + }, + "prompt_5": { + "bleu_score": 0.11153238338109045 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.10950210929702053 + }, + "prompt_2": { + "bleu_score": 0.15023363197616493 + }, + "prompt_3": { + "bleu_score": 0.1393033398727237 + }, + "prompt_4": { + "bleu_score": 0.13261968387717776 + }, + "prompt_5": { + "bleu_score": 0.12920092254734256 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4515752625437573 + }, + "prompt_2": { + "accuracy": 0.3652275379229872 + }, + "prompt_3": { + "accuracy": 0.4574095682613769 + }, + "prompt_4": { + "accuracy": 0.47141190198366395 + }, + "prompt_5": { + "accuracy": 0.4632438739789965 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4366821594565606, + "category_acc": { + "high_school_european_history": 0.20121951219512196, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.4734848484848485, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.22660098522167488, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.2966101694915254, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.459915611814346, + "econometrics": 0.35398230088495575, + "college_computer_science": 0.32323232323232326, + "high_school_biology": 0.5598705501618123, + "abstract_algebra": 0.16161616161616163, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.5451612903225806, + "professional_medicine": 0.22878228782287824, + "nutrition": 0.4557377049180328, + "global_facts": 0.31313131313131315, + "machine_learning": 0.36036036036036034, + "security_studies": 0.4385245901639344, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.4369885433715221, + "prehistory": 0.5046439628482973, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5230769230769231, + "college_medicine": 0.38372093023255816, + "high_school_government_and_politics": 0.5989583333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6395939086294417, + "elementary_mathematics": 0.2440318302387268, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.20202020202020202, + "high_school_psychology": 0.6727941176470589, + "formal_logic": 0.336, + "high_school_statistics": 0.2651162790697674, + "international_law": 0.6, + "high_school_mathematics": 0.22676579925650558, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.44017094017094016, + "miscellaneous": 0.6355498721227621, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.7553648068669528, + "professional_law": 0.34833659491193736, + "management": 0.6176470588235294, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.6074766355140186, + "world_religions": 0.6411764705882353, + "sociology": 0.54, + "us_foreign_policy": 0.5858585858585859, + "high_school_macroeconomics": 0.4652956298200514, + "computer_security": 0.5757575757575758, + "moral_scenarios": 0.2606263982102908, + "moral_disputes": 0.4927536231884058, + "electrical_engineering": 0.3819444444444444, + "astronomy": 0.47019867549668876, + "college_biology": 0.5874125874125874 + } + }, + "prompt_2": { + "accuracy": 0.3429388630675724, + "category_acc": { + "high_school_european_history": 0.24390243902439024, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.3446969696969697, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.27586206896551724, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.2584745762711864, + "virology": 0.2727272727272727, + "high_school_microeconomics": 0.4177215189873418, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.32323232323232326, + "high_school_biology": 0.4110032362459547, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.35587188612099646, + "philosophy": 0.31290322580645163, + "professional_medicine": 0.24723247232472326, + "nutrition": 0.3442622950819672, + "global_facts": 0.30303030303030304, + "machine_learning": 0.32432432432432434, + "security_studies": 0.45491803278688525, + "public_relations": 0.43119266055045874, + "professional_psychology": 0.36661211129296234, + "prehistory": 0.33126934984520123, + "anatomy": 0.23880597014925373, + "human_sexuality": 0.3076923076923077, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.4635416666666667, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.20424403183023873, + "human_aging": 0.31981981981981983, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.4834558823529412, + "formal_logic": 0.344, + "high_school_statistics": 0.31627906976744186, + "international_law": 0.5416666666666666, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.26495726495726496, + "miscellaneous": 0.3145780051150895, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.6223175965665236, + "professional_law": 0.3359425962165688, + "management": 0.38235294117647056, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.4953271028037383, + "world_religions": 0.35294117647058826, + "sociology": 0.485, + "us_foreign_policy": 0.43434343434343436, + "high_school_macroeconomics": 0.3393316195372751, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.22483221476510068, + "moral_disputes": 0.3884057971014493, + "electrical_engineering": 0.2916666666666667, + "astronomy": 0.33774834437086093, + "college_biology": 0.3356643356643357 + } + }, + "prompt_3": { + "accuracy": 0.4423310690025027, + "category_acc": { + "high_school_european_history": 0.22560975609756098, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.48484848484848486, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.2561576354679803, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.2584745762711864, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.48523206751054854, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.5760517799352751, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.33807829181494664, + "philosophy": 0.5548387096774193, + "professional_medicine": 0.25092250922509224, + "nutrition": 0.4459016393442623, + "global_facts": 0.23232323232323232, + "machine_learning": 0.40540540540540543, + "security_studies": 0.46311475409836067, + "public_relations": 0.6697247706422018, + "professional_psychology": 0.44844517184942717, + "prehistory": 0.5108359133126935, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5153846153846153, + "college_medicine": 0.38372093023255816, + "high_school_government_and_politics": 0.6041666666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.22811671087533156, + "human_aging": 0.6486486486486487, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.6727941176470589, + "formal_logic": 0.352, + "high_school_statistics": 0.26976744186046514, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.45726495726495725, + "miscellaneous": 0.6662404092071611, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.7725321888412017, + "professional_law": 0.33268101761252444, + "management": 0.6764705882352942, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.6882352941176471, + "sociology": 0.53, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.4601542416452442, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.527536231884058, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.40397350993377484, + "college_biology": 0.5944055944055944 + } + }, + "prompt_4": { + "accuracy": 0.44869503038970326, + "category_acc": { + "high_school_european_history": 0.27439024390243905, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.4659090909090909, + "medical_genetics": 0.5858585858585859, + "high_school_us_history": 0.30049261083743845, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.288135593220339, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.4767932489451477, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.5580645161290323, + "professional_medicine": 0.2656826568265683, + "nutrition": 0.4491803278688525, + "global_facts": 0.36363636363636365, + "machine_learning": 0.36936936936936937, + "security_studies": 0.45081967213114754, + "public_relations": 0.6788990825688074, + "professional_psychology": 0.45662847790507366, + "prehistory": 0.5232198142414861, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5692307692307692, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.6041666666666666, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6598984771573604, + "elementary_mathematics": 0.27586206896551724, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.1919191919191919, + "high_school_psychology": 0.6746323529411765, + "formal_logic": 0.328, + "high_school_statistics": 0.31627906976744186, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6521739130434783, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.759656652360515, + "professional_law": 0.3385518590998043, + "management": 0.6764705882352942, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.711764705882353, + "sociology": 0.58, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.4473007712082262, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.23154362416107382, + "moral_disputes": 0.5217391304347826, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.4105960264900662, + "college_biology": 0.5804195804195804 + } + }, + "prompt_5": { + "accuracy": 0.4449052556310332, + "category_acc": { + "high_school_european_history": 0.29878048780487804, + "business_ethics": 0.5252525252525253, + "clinical_knowledge": 0.4810606060606061, + "medical_genetics": 0.5252525252525253, + "high_school_us_history": 0.24630541871921183, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.23728813559322035, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.45147679324894513, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.37373737373737376, + "high_school_biology": 0.5566343042071198, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.38434163701067614, + "philosophy": 0.5580645161290323, + "professional_medicine": 0.2693726937269373, + "nutrition": 0.4360655737704918, + "global_facts": 0.2828282828282828, + "machine_learning": 0.3783783783783784, + "security_studies": 0.45081967213114754, + "public_relations": 0.6605504587155964, + "professional_psychology": 0.4533551554828151, + "prehistory": 0.5015479876160991, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.5461538461538461, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.5989583333333334, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.5679012345679012, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.2519893899204244, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.18181818181818182, + "high_school_psychology": 0.6617647058823529, + "formal_logic": 0.336, + "high_school_statistics": 0.29767441860465116, + "international_law": 0.6, + "high_school_mathematics": 0.20817843866171004, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6317135549872123, + "high_school_chemistry": 0.33663366336633666, + "marketing": 0.7725321888412017, + "professional_law": 0.3515981735159817, + "management": 0.6568627450980392, + "college_physics": 0.33663366336633666, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.6941176470588235, + "sociology": 0.57, + "us_foreign_policy": 0.6363636363636364, + "high_school_macroeconomics": 0.43444730077120824, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.2807606263982103, + "moral_disputes": 0.518840579710145, + "electrical_engineering": 0.3958333333333333, + "astronomy": 0.4900662251655629, + "college_biology": 0.5804195804195804 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.37444279346210996 + }, + "prompt_2": { + "accuracy": 0.3655274888558692 + }, + "prompt_3": { + "accuracy": 0.3424962852897474 + }, + "prompt_4": { + "accuracy": 0.3551263001485884 + }, + "prompt_5": { + "accuracy": 0.33580980683506684 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3368617683686177, + "category_acc": { + "computer_network": 0.20833333333333334, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.38095238095238093, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.06896551724137931, + "advanced_mathematics": 0.125, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.375, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.31666666666666665, + "business_administration": 0.2894736842105263, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.4897959183673469, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.14285714285714285, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.2962962962962963, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.25, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.25, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.32, + "middle_school_history": 0.2962962962962963, + "civil_servant": 0.23076923076923078, + "sports_science": 0.375, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.49019607843137253, + "accountant": 0.2962962962962963, + "fire_engineer": 0.3055555555555556, + "environmental_impact_assessment_engineer": 0.2777777777777778, + "tax_accountant": 0.3148148148148148, + "physician": 0.3888888888888889 + } + }, + "prompt_2": { + "accuracy": 0.35990037359900373, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.38095238095238093, + "college_physics": 0.16666666666666666, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.35, + "business_administration": 0.3157894736842105, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.375, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.46153846153846156, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.3333333333333333, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.25, + "high_school_history": 0.28, + "middle_school_history": 0.2222222222222222, + "civil_servant": 0.3076923076923077, + "sports_science": 0.4583333333333333, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.16666666666666666, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.3333333333333333, + "accountant": 0.42592592592592593, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.48148148148148145, + "physician": 0.4444444444444444 + } + }, + "prompt_3": { + "accuracy": 0.33872976338729766, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.2619047619047619, + "college_physics": 0.25, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.25, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.375, + "high_school_chemistry": 0.25, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.35, + "business_administration": 0.34210526315789475, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.40816326530612246, + "high_school_politics": 0.3333333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.37037037037037035, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.25, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.25, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.24, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.25, + "sports_science": 0.375, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.25, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.35185185185185186, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.2962962962962963, + "physician": 0.3888888888888889 + } + }, + "prompt_4": { + "accuracy": 0.3455790784557908, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.4523809523809524, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.5172413793103449, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.375, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.08333333333333333, + "middle_school_mathematics": 0.125, + "middle_school_biology": 0.5384615384615384, + "middle_school_physics": 0.4166666666666667, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.4166666666666667, + "business_administration": 0.3684210526315789, + "marxism": 0.375, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.5588235294117647, + "teacher_qualification": 0.42857142857142855, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.375, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.35294117647058826, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.4074074074074074, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.24, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.36538461538461536, + "sports_science": 0.375, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.2777777777777778, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.3888888888888889, + "physician": 0.4444444444444444 + } + }, + "prompt_5": { + "accuracy": 0.3306351183063512, + "category_acc": { + "computer_network": 0.25, + "operating_system": 0.5, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.2619047619047619, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.34782608695652173, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.125, + "middle_school_mathematics": 0.08333333333333333, + "middle_school_biology": 0.5, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.5, + "college_economics": 0.31666666666666665, + "business_administration": 0.21052631578947367, + "marxism": 0.375, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.5, + "teacher_qualification": 0.4489795918367347, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.2916666666666667, + "middle_school_politics": 0.4230769230769231, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.4583333333333333, + "logic": 0.37037037037037035, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.2647058823529412, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.16666666666666666, + "high_school_history": 0.2, + "middle_school_history": 0.14814814814814814, + "civil_servant": 0.25, + "sports_science": 0.3333333333333333, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.24074074074074073, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.3888888888888889, + "tax_accountant": 0.3888888888888889, + "physician": 0.42592592592592593 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4157706093189964 + }, + "prompt_2": { + "accuracy": 0.41935483870967744 + }, + "prompt_3": { + "accuracy": 0.4050179211469534 + }, + "prompt_4": { + "accuracy": 0.3906810035842294 + }, + "prompt_5": { + "accuracy": 0.36200716845878134 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3699706441029183, + "category_acc": { + "agronomy": 0.40236686390532544, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.2926829268292683, + "arts": 0.4, + "astronomy": 0.3575757575757576, + "business_ethics": 0.45933014354066987, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.5038167938931297, + "chinese_food_culture": 0.39705882352941174, + "chinese_foreign_policy": 0.38317757009345793, + "chinese_history": 0.35294117647058826, + "chinese_literature": 0.2647058823529412, + "chinese_teacher_qualification": 0.43575418994413406, + "clinical_knowledge": 0.25316455696202533, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.5327102803738317, + "college_engineering_hydrology": 0.36792452830188677, + "college_law": 0.25, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.36792452830188677, + "college_medicine": 0.28205128205128205, + "computer_science": 0.3627450980392157, + "computer_security": 0.49707602339181284, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.35251798561151076, + "economics": 0.4339622641509434, + "education": 0.44785276073619634, + "electrical_engineering": 0.37790697674418605, + "elementary_chinese": 0.2777777777777778, + "elementary_commonsense": 0.35353535353535354, + "elementary_information_and_technology": 0.6218487394957983, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.3037037037037037, + "food_science": 0.42657342657342656, + "genetics": 0.2784090909090909, + "global_facts": 0.35570469798657717, + "high_school_biology": 0.33136094674556216, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.3146853146853147, + "human_sexuality": 0.36507936507936506, + "international_law": 0.34594594594594597, + "journalism": 0.42441860465116277, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.616822429906542, + "logical": 0.3902439024390244, + "machine_learning": 0.3114754098360656, + "management": 0.4714285714285714, + "marketing": 0.5333333333333333, + "marxist_theory": 0.4021164021164021, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.35172413793103446, + "philosophy": 0.4857142857142857, + "professional_accounting": 0.48, + "professional_law": 0.2985781990521327, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.41810344827586204, + "public_relations": 0.41954022988505746, + "security_study": 0.37777777777777777, + "sociology": 0.42920353982300885, + "sports_science": 0.4303030303030303, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.42011834319526625, + "world_history": 0.32298136645962733, + "world_religions": 0.41875 + } + }, + "prompt_2": { + "accuracy": 0.36375410119150403, + "category_acc": { + "agronomy": 0.3609467455621302, + "anatomy": 0.25, + "ancient_chinese": 0.2804878048780488, + "arts": 0.40625, + "astronomy": 0.2909090909090909, + "business_ethics": 0.47368421052631576, + "chinese_civil_service_exam": 0.2125, + "chinese_driving_rule": 0.4732824427480916, + "chinese_food_culture": 0.3382352941176471, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.3746130030959752, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.4245810055865922, + "clinical_knowledge": 0.2489451476793249, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.5420560747663551, + "college_engineering_hydrology": 0.4056603773584906, + "college_law": 0.28703703703703703, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.30036630036630035, + "computer_science": 0.37745098039215685, + "computer_security": 0.4619883040935672, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.33093525179856115, + "economics": 0.4528301886792453, + "education": 0.4294478527607362, + "electrical_engineering": 0.4186046511627907, + "elementary_chinese": 0.25396825396825395, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.6134453781512605, + "elementary_mathematics": 0.2826086956521739, + "ethnology": 0.31851851851851853, + "food_science": 0.4125874125874126, + "genetics": 0.2840909090909091, + "global_facts": 0.33557046979865773, + "high_school_biology": 0.3668639053254438, + "high_school_chemistry": 0.3106060606060606, + "high_school_geography": 0.3135593220338983, + "high_school_mathematics": 0.27439024390243905, + "high_school_physics": 0.2818181818181818, + "high_school_politics": 0.23776223776223776, + "human_sexuality": 0.4523809523809524, + "international_law": 0.35135135135135137, + "journalism": 0.45930232558139533, + "jurisprudence": 0.35036496350364965, + "legal_and_moral_basis": 0.6448598130841121, + "logical": 0.3983739837398374, + "machine_learning": 0.3442622950819672, + "management": 0.4523809523809524, + "marketing": 0.4888888888888889, + "marxist_theory": 0.43915343915343913, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.2896551724137931, + "philosophy": 0.4857142857142857, + "professional_accounting": 0.46285714285714286, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.26063829787234044, + "professional_psychology": 0.41379310344827586, + "public_relations": 0.41954022988505746, + "security_study": 0.37777777777777777, + "sociology": 0.4026548672566372, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.47928994082840237, + "world_history": 0.30434782608695654, + "world_religions": 0.41875 + } + }, + "prompt_3": { + "accuracy": 0.36763944051113795, + "category_acc": { + "agronomy": 0.3609467455621302, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2926829268292683, + "arts": 0.4125, + "astronomy": 0.23030303030303031, + "business_ethics": 0.44976076555023925, + "chinese_civil_service_exam": 0.23125, + "chinese_driving_rule": 0.4732824427480916, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.34674922600619196, + "chinese_literature": 0.25, + "chinese_teacher_qualification": 0.4134078212290503, + "clinical_knowledge": 0.2742616033755274, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.5514018691588785, + "college_engineering_hydrology": 0.37735849056603776, + "college_law": 0.28703703703703703, + "college_mathematics": 0.20952380952380953, + "college_medical_statistics": 0.3867924528301887, + "college_medicine": 0.2783882783882784, + "computer_science": 0.39215686274509803, + "computer_security": 0.47953216374269003, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.34532374100719426, + "economics": 0.4528301886792453, + "education": 0.4171779141104294, + "electrical_engineering": 0.37790697674418605, + "elementary_chinese": 0.30158730158730157, + "elementary_commonsense": 0.35353535353535354, + "elementary_information_and_technology": 0.5966386554621849, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.3333333333333333, + "food_science": 0.4125874125874126, + "genetics": 0.26704545454545453, + "global_facts": 0.3691275167785235, + "high_school_biology": 0.3431952662721893, + "high_school_chemistry": 0.29545454545454547, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.25, + "high_school_physics": 0.39090909090909093, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.373015873015873, + "international_law": 0.372972972972973, + "journalism": 0.42441860465116277, + "jurisprudence": 0.34306569343065696, + "legal_and_moral_basis": 0.6495327102803738, + "logical": 0.43902439024390244, + "machine_learning": 0.3360655737704918, + "management": 0.45714285714285713, + "marketing": 0.48333333333333334, + "marxist_theory": 0.4656084656084656, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.31724137931034485, + "philosophy": 0.5047619047619047, + "professional_accounting": 0.4685714285714286, + "professional_law": 0.3127962085308057, + "professional_medicine": 0.2872340425531915, + "professional_psychology": 0.4396551724137931, + "public_relations": 0.41379310344827586, + "security_study": 0.37037037037037035, + "sociology": 0.4247787610619469, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.48520710059171596, + "world_history": 0.3167701863354037, + "world_religions": 0.41875 + } + }, + "prompt_4": { + "accuracy": 0.36107753410464516, + "category_acc": { + "agronomy": 0.39644970414201186, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2865853658536585, + "arts": 0.4125, + "astronomy": 0.24242424242424243, + "business_ethics": 0.430622009569378, + "chinese_civil_service_exam": 0.2875, + "chinese_driving_rule": 0.48854961832061067, + "chinese_food_culture": 0.36764705882352944, + "chinese_foreign_policy": 0.308411214953271, + "chinese_history": 0.38699690402476783, + "chinese_literature": 0.27941176470588236, + "chinese_teacher_qualification": 0.4134078212290503, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.16981132075471697, + "college_education": 0.4766355140186916, + "college_engineering_hydrology": 0.3867924528301887, + "college_law": 0.25, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.27358490566037735, + "college_medicine": 0.25274725274725274, + "computer_science": 0.39705882352941174, + "computer_security": 0.4619883040935672, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.34532374100719426, + "economics": 0.4339622641509434, + "education": 0.44171779141104295, + "electrical_engineering": 0.37790697674418605, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.35353535353535354, + "elementary_information_and_technology": 0.6428571428571429, + "elementary_mathematics": 0.23478260869565218, + "ethnology": 0.32592592592592595, + "food_science": 0.38461538461538464, + "genetics": 0.2897727272727273, + "global_facts": 0.348993288590604, + "high_school_biology": 0.3254437869822485, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.3050847457627119, + "high_school_mathematics": 0.2682926829268293, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.2727272727272727, + "human_sexuality": 0.36507936507936506, + "international_law": 0.3891891891891892, + "journalism": 0.4186046511627907, + "jurisprudence": 0.3309002433090024, + "legal_and_moral_basis": 0.6401869158878505, + "logical": 0.42276422764227645, + "machine_learning": 0.29508196721311475, + "management": 0.43333333333333335, + "marketing": 0.49444444444444446, + "marxist_theory": 0.41798941798941797, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.36551724137931035, + "philosophy": 0.4666666666666667, + "professional_accounting": 0.45714285714285713, + "professional_law": 0.2890995260663507, + "professional_medicine": 0.26861702127659576, + "professional_psychology": 0.4051724137931034, + "public_relations": 0.40804597701149425, + "security_study": 0.3333333333333333, + "sociology": 0.415929203539823, + "sports_science": 0.42424242424242425, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.42011834319526625, + "world_history": 0.30434782608695654, + "world_religions": 0.4375 + } + }, + "prompt_5": { + "accuracy": 0.3274909342082542, + "category_acc": { + "agronomy": 0.34911242603550297, + "anatomy": 0.27702702702702703, + "ancient_chinese": 0.2926829268292683, + "arts": 0.34375, + "astronomy": 0.3151515151515151, + "business_ethics": 0.3444976076555024, + "chinese_civil_service_exam": 0.3, + "chinese_driving_rule": 0.35877862595419846, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.2616822429906542, + "chinese_history": 0.3560371517027864, + "chinese_literature": 0.24019607843137256, + "chinese_teacher_qualification": 0.35195530726256985, + "clinical_knowledge": 0.2616033755274262, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.5233644859813084, + "college_engineering_hydrology": 0.3113207547169811, + "college_law": 0.25925925925925924, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.39622641509433965, + "college_medicine": 0.2600732600732601, + "computer_science": 0.3284313725490196, + "computer_security": 0.3567251461988304, + "conceptual_physics": 0.32653061224489793, + "construction_project_management": 0.2949640287769784, + "economics": 0.4025157232704403, + "education": 0.34355828220858897, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.25793650793650796, + "elementary_commonsense": 0.29797979797979796, + "elementary_information_and_technology": 0.38235294117647056, + "elementary_mathematics": 0.30869565217391304, + "ethnology": 0.35555555555555557, + "food_science": 0.38461538461538464, + "genetics": 0.2727272727272727, + "global_facts": 0.2953020134228188, + "high_school_biology": 0.22485207100591717, + "high_school_chemistry": 0.29545454545454547, + "high_school_geography": 0.288135593220339, + "high_school_mathematics": 0.17073170731707318, + "high_school_physics": 0.33636363636363636, + "high_school_politics": 0.2517482517482518, + "human_sexuality": 0.3492063492063492, + "international_law": 0.34054054054054056, + "journalism": 0.4011627906976744, + "jurisprudence": 0.291970802919708, + "legal_and_moral_basis": 0.5233644859813084, + "logical": 0.34959349593495936, + "machine_learning": 0.3442622950819672, + "management": 0.3523809523809524, + "marketing": 0.4166666666666667, + "marxist_theory": 0.3862433862433862, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.2896551724137931, + "philosophy": 0.45714285714285713, + "professional_accounting": 0.3485714285714286, + "professional_law": 0.2843601895734597, + "professional_medicine": 0.2393617021276596, + "professional_psychology": 0.4051724137931034, + "public_relations": 0.3850574712643678, + "security_study": 0.362962962962963, + "sociology": 0.3672566371681416, + "sports_science": 0.3515151515151515, + "traditional_chinese_medicine": 0.2594594594594595, + "virology": 0.4260355029585799, + "world_history": 0.32298136645962733, + "world_religions": 0.33125 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.24242424242424243 + }, + "prompt_3": { + "accuracy": 0.24242424242424243 + }, + "prompt_4": { + "accuracy": 0.18181818181818182 + }, + "prompt_5": { + "accuracy": 0.2727272727272727 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.45 + }, + "prompt_2": { + "accuracy": 0.45227272727272727 + }, + "prompt_3": { + "accuracy": 0.4340909090909091 + }, + "prompt_4": { + "accuracy": 0.44545454545454544 + }, + "prompt_5": { + "accuracy": 0.4113636363636364 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42033898305084744 + }, + "prompt_2": { + "accuracy": 0.39220338983050845 + }, + "prompt_3": { + "accuracy": 0.4006779661016949 + }, + "prompt_4": { + "accuracy": 0.3579661016949153 + }, + "prompt_5": { + "accuracy": 0.4135593220338983 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6178010471204188 + }, + "prompt_2": { + "accuracy": 0.631264023934181 + }, + "prompt_3": { + "accuracy": 0.618922961854899 + }, + "prompt_4": { + "accuracy": 0.6207928197456993 + }, + "prompt_5": { + "accuracy": 0.5964846671652955 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6761391474767271 + }, + "prompt_2": { + "accuracy": 0.6815286624203821 + }, + "prompt_3": { + "accuracy": 0.699657030867222 + }, + "prompt_4": { + "accuracy": 0.6883880450759432 + }, + "prompt_5": { + "accuracy": 0.679078882900539 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.15549086207762408, + "rouge2": 0.05856047064940938, + "rougeL": 0.11721291727305654, + "avg_rouge": 0.11042141666669668 + }, + "prompt_2": { + "rouge1": 0.1561604721300699, + "rouge2": 0.056604867522829866, + "rougeL": 0.11779266446571382, + "avg_rouge": 0.1101860013728712 + }, + "prompt_3": { + "rouge1": 0.15848435746925935, + "rouge2": 0.05751922805889506, + "rougeL": 0.11825043935494871, + "avg_rouge": 0.11141800829436771 + }, + "prompt_4": { + "rouge1": 0.14862530342477928, + "rouge2": 0.051499107780456924, + "rougeL": 0.11085909016872368, + "avg_rouge": 0.1036611671246533 + }, + "prompt_5": { + "rouge1": 0.15833000081632223, + "rouge2": 0.05575315181996886, + "rougeL": 0.1171455537326825, + "avg_rouge": 0.11040956878965785 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.14307168996351716, + "rouge2": 0.04293583239044555, + "rougeL": 0.10790821364621331, + "avg_rouge": 0.09797191200005867 + }, + "prompt_2": { + "rouge1": 0.1581518512626931, + "rouge2": 0.05133101461044942, + "rougeL": 0.122266711273308, + "avg_rouge": 0.11058319238215018 + }, + "prompt_3": { + "rouge1": 0.15316223807401713, + "rouge2": 0.04939228118577924, + "rougeL": 0.12009615552998361, + "avg_rouge": 0.10755022492992666 + }, + "prompt_4": { + "rouge1": 0.13963198214134287, + "rouge2": 0.04128555645392314, + "rougeL": 0.10474062390007628, + "avg_rouge": 0.09521938749844744 + }, + "prompt_5": { + "rouge1": 0.16002077402889608, + "rouge2": 0.03981780247525948, + "rougeL": 0.11794685139818367, + "avg_rouge": 0.10592847596744641 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.875 + }, + "prompt_2": { + "accuracy": 0.8715596330275229 + }, + "prompt_3": { + "accuracy": 0.8830275229357798 + }, + "prompt_4": { + "accuracy": 0.8681192660550459 + }, + "prompt_5": { + "accuracy": 0.8727064220183486 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6116970278044104 + }, + "prompt_2": { + "accuracy": 0.62320230105465 + }, + "prompt_3": { + "accuracy": 0.6145733461169702 + }, + "prompt_4": { + "accuracy": 0.5877277085330777 + }, + "prompt_5": { + "accuracy": 0.5819750719079578 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4985 + }, + "prompt_2": { + "accuracy": 0.4925 + }, + "prompt_3": { + "accuracy": 0.505 + }, + "prompt_4": { + "accuracy": 0.5015 + }, + "prompt_5": { + "accuracy": 0.5 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.52 + }, + "prompt_2": { + "accuracy": 0.509 + }, + "prompt_3": { + "accuracy": 0.512 + }, + "prompt_4": { + "accuracy": 0.489 + }, + "prompt_5": { + "accuracy": 0.4995 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5415 + }, + "prompt_2": { + "accuracy": 0.571 + }, + "prompt_3": { + "accuracy": 0.586 + }, + "prompt_4": { + "accuracy": 0.531 + }, + "prompt_5": { + "accuracy": 0.5705 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4084507042253521 + }, + "prompt_2": { + "accuracy": 0.5070422535211268 + }, + "prompt_3": { + "accuracy": 0.4225352112676056 + }, + "prompt_4": { + "accuracy": 0.4225352112676056 + }, + "prompt_5": { + "accuracy": 0.5352112676056338 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.49097472924187724 + }, + "prompt_2": { + "accuracy": 0.4729241877256318 + }, + "prompt_3": { + "accuracy": 0.5595667870036101 + }, + "prompt_4": { + "accuracy": 0.4620938628158845 + }, + "prompt_5": { + "accuracy": 0.5415162454873647 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5220588235294118 + }, + "prompt_2": { + "accuracy": 0.4803921568627451 + }, + "prompt_3": { + "accuracy": 0.5490196078431373 + }, + "prompt_4": { + "accuracy": 0.5220588235294118 + }, + "prompt_5": { + "accuracy": 0.4877450980392157 + } } }, "five_shot": { @@ -14200,95 +126317,1825 @@ "model_link": "https://openai.com/blog/chatgpt", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.661904761904762, + "language_acc": { + "Vietnamese": 0.6133333333333333, + "Malay": 0.6133333333333333, + "Filipino": 0.6266666666666667, + "Indonesian": 0.66, + "Chinese": 0.6533333333333333, + "Spanish": 0.72, + "English": 0.7466666666666667 + }, + "consistency_score_2": 0.6663492063492062, + "consistency_score_3": 0.5316190476190475, + "consistency_score_4": 0.449904761904762, + "consistency_score_5": 0.3923809523809524, + "consistency_score_6": 0.3485714285714286, + "consistency_score_7": 0.31333333333333335, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5666666666666667, + "Vietnamese,Filipino": 0.6133333333333333, + "Vietnamese,Indonesian": 0.6466666666666666, + "Vietnamese,Chinese": 0.5933333333333334, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,English": 0.64, + "Malay,Filipino": 0.62, + "Malay,Indonesian": 0.6866666666666666, + "Malay,Chinese": 0.64, + "Malay,Spanish": 0.6933333333333334, + "Malay,English": 0.6733333333333333, + "Filipino,Indonesian": 0.6733333333333333, + "Filipino,Chinese": 0.6133333333333333, + "Filipino,Spanish": 0.72, + "Filipino,English": 0.6666666666666666, + "Indonesian,Chinese": 0.6666666666666666, + "Indonesian,Spanish": 0.76, + "Indonesian,English": 0.7333333333333333, + "Chinese,Spanish": 0.6666666666666666, + "Chinese,English": 0.6533333333333333, + "Spanish,English": 0.8 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.4533333333333333, + "Vietnamese,Malay,Indonesian": 0.4866666666666667, + "Vietnamese,Malay,Chinese": 0.4533333333333333, + "Vietnamese,Malay,Spanish": 0.49333333333333335, + "Vietnamese,Malay,English": 0.4866666666666667, + "Vietnamese,Filipino,Indonesian": 0.5133333333333333, + "Vietnamese,Filipino,Chinese": 0.4533333333333333, + "Vietnamese,Filipino,Spanish": 0.54, + "Vietnamese,Filipino,English": 0.5133333333333333, + "Vietnamese,Indonesian,Chinese": 0.5, + "Vietnamese,Indonesian,Spanish": 0.5733333333333334, + "Vietnamese,Indonesian,English": 0.56, + "Vietnamese,Chinese,Spanish": 0.5, + "Vietnamese,Chinese,English": 0.4866666666666667, + "Vietnamese,Spanish,English": 0.58, + "Malay,Filipino,Indonesian": 0.5266666666666666, + "Malay,Filipino,Chinese": 0.48, + "Malay,Filipino,Spanish": 0.54, + "Malay,Filipino,English": 0.52, + "Malay,Indonesian,Chinese": 0.5266666666666666, + "Malay,Indonesian,Spanish": 0.5933333333333334, + "Malay,Indonesian,English": 0.5733333333333334, + "Malay,Chinese,Spanish": 0.5133333333333333, + "Malay,Chinese,English": 0.5133333333333333, + "Malay,Spanish,English": 0.6066666666666667, + "Filipino,Indonesian,Chinese": 0.49333333333333335, + "Filipino,Indonesian,Spanish": 0.5866666666666667, + "Filipino,Indonesian,English": 0.5533333333333333, + "Filipino,Chinese,Spanish": 0.5133333333333333, + "Filipino,Chinese,English": 0.5, + "Filipino,Spanish,English": 0.6066666666666667, + "Indonesian,Chinese,Spanish": 0.5666666666666667, + "Indonesian,Chinese,English": 0.5466666666666666, + "Indonesian,Spanish,English": 0.6733333333333333, + "Chinese,Spanish,English": 0.58 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.41333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.38, + "Vietnamese,Malay,Filipino,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Filipino,English": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.4533333333333333, + "Vietnamese,Malay,Indonesian,English": 0.4533333333333333, + "Vietnamese,Malay,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Malay,Chinese,English": 0.4, + "Vietnamese,Malay,Spanish,English": 0.46, + "Vietnamese,Filipino,Indonesian,Chinese": 0.4, + "Vietnamese,Filipino,Indonesian,Spanish": 0.48, + "Vietnamese,Filipino,Indonesian,English": 0.46, + "Vietnamese,Filipino,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Filipino,Chinese,English": 0.4, + "Vietnamese,Filipino,Spanish,English": 0.48, + "Vietnamese,Indonesian,Chinese,Spanish": 0.46, + "Vietnamese,Indonesian,Chinese,English": 0.4533333333333333, + "Vietnamese,Indonesian,Spanish,English": 0.5266666666666666, + "Vietnamese,Chinese,Spanish,English": 0.4533333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.41333333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.4866666666666667, + "Malay,Filipino,Indonesian,English": 0.4666666666666667, + "Malay,Filipino,Chinese,Spanish": 0.43333333333333335, + "Malay,Filipino,Chinese,English": 0.42, + "Malay,Filipino,Spanish,English": 0.48, + "Malay,Indonesian,Chinese,Spanish": 0.46, + "Malay,Indonesian,Chinese,English": 0.46, + "Malay,Indonesian,Spanish,English": 0.54, + "Malay,Chinese,Spanish,English": 0.4666666666666667, + "Filipino,Indonesian,Chinese,Spanish": 0.44666666666666666, + "Filipino,Indonesian,Chinese,English": 0.43333333333333335, + "Filipino,Indonesian,Spanish,English": 0.5266666666666666, + "Filipino,Chinese,Spanish,English": 0.4666666666666667, + "Indonesian,Chinese,Spanish,English": 0.5133333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.34, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.4, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.38666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.36, + "Vietnamese,Malay,Filipino,Chinese,English": 0.34, + "Vietnamese,Malay,Filipino,Spanish,English": 0.3933333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.38, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.38, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.4266666666666667, + "Vietnamese,Malay,Chinese,Spanish,English": 0.38, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.38, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.36666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.44, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.38, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.4266666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.38, + "Malay,Filipino,Indonesian,Spanish,English": 0.44666666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.4, + "Malay,Indonesian,Chinese,Spanish,English": 0.43333333333333335, + "Filipino,Indonesian,Chinese,Spanish,English": 0.41333333333333333 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.32, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.37333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.36, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.35333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.36666666666666664 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.31333333333333335 + } + }, + "AC3_2": 0.6641195483429715, + "AC3_3": 0.5896508746637111, + "AC3_4": 0.5356926666021684, + "AC3_5": 0.49269153004317184, + "AC3_6": 0.45665813917656617, + "AC3_7": 0.4253255207897209 + }, + "prompt_2": { + "overall_acc": 0.6885714285714286, + "language_acc": { + "Vietnamese": 0.6333333333333333, + "Malay": 0.6533333333333333, + "Filipino": 0.6666666666666666, + "Indonesian": 0.6933333333333334, + "Chinese": 0.6533333333333333, + "Spanish": 0.7333333333333333, + "English": 0.7866666666666666 + }, + "consistency_score_2": 0.6926984126984127, + "consistency_score_3": 0.5647619047619048, + "consistency_score_4": 0.4864761904761906, + "consistency_score_5": 0.43142857142857144, + "consistency_score_6": 0.3904761904761905, + "consistency_score_7": 0.36, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.62, + "Vietnamese,Filipino": 0.66, + "Vietnamese,Indonesian": 0.7066666666666667, + "Vietnamese,Chinese": 0.6533333333333333, + "Vietnamese,Spanish": 0.6666666666666666, + "Vietnamese,English": 0.68, + "Malay,Filipino": 0.6466666666666666, + "Malay,Indonesian": 0.6866666666666666, + "Malay,Chinese": 0.66, + "Malay,Spanish": 0.68, + "Malay,English": 0.6733333333333333, + "Filipino,Indonesian": 0.72, + "Filipino,Chinese": 0.6066666666666667, + "Filipino,Spanish": 0.7066666666666667, + "Filipino,English": 0.7333333333333333, + "Indonesian,Chinese": 0.6933333333333334, + "Indonesian,Spanish": 0.7666666666666667, + "Indonesian,English": 0.7933333333333333, + "Chinese,Spanish": 0.6533333333333333, + "Chinese,English": 0.7, + "Spanish,English": 0.84 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.49333333333333335, + "Vietnamese,Malay,Indonesian": 0.5333333333333333, + "Vietnamese,Malay,Chinese": 0.5, + "Vietnamese,Malay,Spanish": 0.5066666666666667, + "Vietnamese,Malay,English": 0.52, + "Vietnamese,Filipino,Indonesian": 0.5666666666666667, + "Vietnamese,Filipino,Chinese": 0.4866666666666667, + "Vietnamese,Filipino,Spanish": 0.54, + "Vietnamese,Filipino,English": 0.5666666666666667, + "Vietnamese,Indonesian,Chinese": 0.5466666666666666, + "Vietnamese,Indonesian,Spanish": 0.58, + "Vietnamese,Indonesian,English": 0.6133333333333333, + "Vietnamese,Chinese,Spanish": 0.5066666666666667, + "Vietnamese,Chinese,English": 0.54, + "Vietnamese,Spanish,English": 0.6066666666666667, + "Malay,Filipino,Indonesian": 0.56, + "Malay,Filipino,Chinese": 0.49333333333333335, + "Malay,Filipino,Spanish": 0.5533333333333333, + "Malay,Filipino,English": 0.56, + "Malay,Indonesian,Chinese": 0.54, + "Malay,Indonesian,Spanish": 0.5933333333333334, + "Malay,Indonesian,English": 0.6066666666666667, + "Malay,Chinese,Spanish": 0.5333333333333333, + "Malay,Chinese,English": 0.5533333333333333, + "Malay,Spanish,English": 0.6133333333333333, + "Filipino,Indonesian,Chinese": 0.5333333333333333, + "Filipino,Indonesian,Spanish": 0.6333333333333333, + "Filipino,Indonesian,English": 0.6533333333333333, + "Filipino,Chinese,Spanish": 0.52, + "Filipino,Chinese,English": 0.5533333333333333, + "Filipino,Spanish,English": 0.66, + "Indonesian,Chinese,Spanish": 0.5733333333333334, + "Indonesian,Chinese,English": 0.6133333333333333, + "Indonesian,Spanish,English": 0.7066666666666667, + "Chinese,Spanish,English": 0.6066666666666667 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.46, + "Vietnamese,Malay,Filipino,Chinese": 0.4066666666666667, + "Vietnamese,Malay,Filipino,Spanish": 0.44, + "Vietnamese,Malay,Filipino,English": 0.46, + "Vietnamese,Malay,Indonesian,Chinese": 0.44, + "Vietnamese,Malay,Indonesian,Spanish": 0.48, + "Vietnamese,Malay,Indonesian,English": 0.4866666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Chinese,English": 0.44666666666666666, + "Vietnamese,Malay,Spanish,English": 0.47333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.44, + "Vietnamese,Filipino,Indonesian,Spanish": 0.5066666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.52, + "Vietnamese,Filipino,Chinese,Spanish": 0.4266666666666667, + "Vietnamese,Filipino,Chinese,English": 0.46, + "Vietnamese,Filipino,Spanish,English": 0.52, + "Vietnamese,Indonesian,Chinese,Spanish": 0.4666666666666667, + "Vietnamese,Indonesian,Chinese,English": 0.5, + "Vietnamese,Indonesian,Spanish,English": 0.5533333333333333, + "Vietnamese,Chinese,Spanish,English": 0.4866666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.44666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.5133333333333333, + "Malay,Filipino,Indonesian,English": 0.5266666666666666, + "Malay,Filipino,Chinese,Spanish": 0.44666666666666666, + "Malay,Filipino,Chinese,English": 0.47333333333333333, + "Malay,Filipino,Spanish,English": 0.5133333333333333, + "Malay,Indonesian,Chinese,Spanish": 0.4866666666666667, + "Malay,Indonesian,Chinese,English": 0.5066666666666667, + "Malay,Indonesian,Spanish,English": 0.56, + "Malay,Chinese,Spanish,English": 0.5133333333333333, + "Filipino,Indonesian,Chinese,Spanish": 0.47333333333333333, + "Filipino,Indonesian,Chinese,English": 0.5, + "Filipino,Indonesian,Spanish,English": 0.6066666666666667, + "Filipino,Chinese,Spanish,English": 0.5066666666666667, + "Indonesian,Chinese,Spanish,English": 0.5533333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.38, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.43333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Malay,Filipino,Chinese,English": 0.4, + "Vietnamese,Malay,Filipino,Spanish,English": 0.42, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.42, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.4533333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.41333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.4, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.42, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.4866666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.4266666666666667, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.46, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.42, + "Malay,Filipino,Indonesian,Chinese,English": 0.44, + "Malay,Filipino,Indonesian,Spanish,English": 0.49333333333333335, + "Malay,Filipino,Chinese,Spanish,English": 0.44, + "Malay,Indonesian,Chinese,Spanish,English": 0.48, + "Filipino,Indonesian,Chinese,Spanish,English": 0.4666666666666667 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.36, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.37333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.4066666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.4, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.4, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.42 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.36 + } + }, + "AC3_2": 0.6906287552443499, + "AC3_3": 0.6205514545748909, + "AC3_4": 0.570144732832474, + "AC3_5": 0.5304810495153178, + "AC3_6": 0.49834825364187546, + "AC3_7": 0.4728065394644463 + }, + "prompt_3": { + "overall_acc": 0.7009523809523809, + "language_acc": { + "Vietnamese": 0.6266666666666667, + "Malay": 0.68, + "Filipino": 0.6933333333333334, + "Indonesian": 0.6933333333333334, + "Chinese": 0.6533333333333333, + "Spanish": 0.7333333333333333, + "English": 0.8266666666666667 + }, + "consistency_score_2": 0.6974603174603174, + "consistency_score_3": 0.5767619047619048, + "consistency_score_4": 0.5036190476190475, + "consistency_score_5": 0.45269841269841266, + "consistency_score_6": 0.41523809523809524, + "consistency_score_7": 0.38666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.6466666666666666, + "Vietnamese,Filipino": 0.68, + "Vietnamese,Indonesian": 0.64, + "Vietnamese,Chinese": 0.62, + "Vietnamese,Spanish": 0.6533333333333333, + "Vietnamese,English": 0.64, + "Malay,Filipino": 0.7, + "Malay,Indonesian": 0.7466666666666667, + "Malay,Chinese": 0.6466666666666666, + "Malay,Spanish": 0.7666666666666667, + "Malay,English": 0.74, + "Filipino,Indonesian": 0.7, + "Filipino,Chinese": 0.6466666666666666, + "Filipino,Spanish": 0.7266666666666667, + "Filipino,English": 0.7466666666666667, + "Indonesian,Chinese": 0.6466666666666666, + "Indonesian,Spanish": 0.74, + "Indonesian,English": 0.76, + "Chinese,Spanish": 0.68, + "Chinese,English": 0.6866666666666666, + "Spanish,English": 0.8333333333333334 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.5466666666666666, + "Vietnamese,Malay,Indonesian": 0.5533333333333333, + "Vietnamese,Malay,Chinese": 0.5133333333333333, + "Vietnamese,Malay,Spanish": 0.56, + "Vietnamese,Malay,English": 0.5466666666666666, + "Vietnamese,Filipino,Indonesian": 0.5533333333333333, + "Vietnamese,Filipino,Chinese": 0.5133333333333333, + "Vietnamese,Filipino,Spanish": 0.5666666666666667, + "Vietnamese,Filipino,English": 0.5666666666666667, + "Vietnamese,Indonesian,Chinese": 0.49333333333333335, + "Vietnamese,Indonesian,Spanish": 0.5466666666666666, + "Vietnamese,Indonesian,English": 0.56, + "Vietnamese,Chinese,Spanish": 0.52, + "Vietnamese,Chinese,English": 0.52, + "Vietnamese,Spanish,English": 0.5933333333333334, + "Malay,Filipino,Indonesian": 0.6, + "Malay,Filipino,Chinese": 0.5266666666666666, + "Malay,Filipino,Spanish": 0.62, + "Malay,Filipino,English": 0.62, + "Malay,Indonesian,Chinese": 0.5466666666666666, + "Malay,Indonesian,Spanish": 0.64, + "Malay,Indonesian,English": 0.6466666666666666, + "Malay,Chinese,Spanish": 0.5733333333333334, + "Malay,Chinese,English": 0.5666666666666667, + "Malay,Spanish,English": 0.6866666666666666, + "Filipino,Indonesian,Chinese": 0.5266666666666666, + "Filipino,Indonesian,Spanish": 0.62, + "Filipino,Indonesian,English": 0.6333333333333333, + "Filipino,Chinese,Spanish": 0.5666666666666667, + "Filipino,Chinese,English": 0.56, + "Filipino,Spanish,English": 0.6666666666666666, + "Indonesian,Chinese,Spanish": 0.56, + "Indonesian,Chinese,English": 0.5666666666666667, + "Indonesian,Spanish,English": 0.6866666666666666, + "Chinese,Spanish,English": 0.62 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.49333333333333335, + "Vietnamese,Malay,Filipino,Chinese": 0.44666666666666666, + "Vietnamese,Malay,Filipino,Spanish": 0.5066666666666667, + "Vietnamese,Malay,Filipino,English": 0.5, + "Vietnamese,Malay,Indonesian,Chinese": 0.44, + "Vietnamese,Malay,Indonesian,Spanish": 0.5, + "Vietnamese,Malay,Indonesian,English": 0.49333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.47333333333333333, + "Vietnamese,Malay,Chinese,English": 0.4666666666666667, + "Vietnamese,Malay,Spanish,English": 0.5266666666666666, + "Vietnamese,Filipino,Indonesian,Chinese": 0.44666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.5, + "Vietnamese,Filipino,Indonesian,English": 0.5133333333333333, + "Vietnamese,Filipino,Chinese,Spanish": 0.47333333333333333, + "Vietnamese,Filipino,Chinese,English": 0.47333333333333333, + "Vietnamese,Filipino,Spanish,English": 0.54, + "Vietnamese,Indonesian,Chinese,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.46, + "Vietnamese,Indonesian,Spanish,English": 0.5266666666666666, + "Vietnamese,Chinese,Spanish,English": 0.49333333333333335, + "Malay,Filipino,Indonesian,Chinese": 0.4666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.54, + "Malay,Filipino,Indonesian,English": 0.5533333333333333, + "Malay,Filipino,Chinese,Spanish": 0.5, + "Malay,Filipino,Chinese,English": 0.4866666666666667, + "Malay,Filipino,Spanish,English": 0.58, + "Malay,Indonesian,Chinese,Spanish": 0.5066666666666667, + "Malay,Indonesian,Chinese,English": 0.5066666666666667, + "Malay,Indonesian,Spanish,English": 0.6066666666666667, + "Malay,Chinese,Spanish,English": 0.54, + "Filipino,Indonesian,Chinese,Spanish": 0.4866666666666667, + "Filipino,Indonesian,Chinese,English": 0.4866666666666667, + "Filipino,Indonesian,Spanish,English": 0.5866666666666667, + "Filipino,Chinese,Spanish,English": 0.5266666666666666, + "Indonesian,Chinese,Spanish,English": 0.5266666666666666 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.4066666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.46, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.46, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.43333333333333335, + "Vietnamese,Malay,Filipino,Chinese,English": 0.4266666666666667, + "Vietnamese,Malay,Filipino,Spanish,English": 0.4866666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.42, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.48, + "Vietnamese,Malay,Chinese,Spanish,English": 0.4533333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.42, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.4266666666666667, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.4866666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.4533333333333333, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.44, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.44666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.44, + "Malay,Filipino,Indonesian,Spanish,English": 0.52, + "Malay,Filipino,Chinese,Spanish,English": 0.47333333333333333, + "Malay,Indonesian,Chinese,Spanish,English": 0.4866666666666667, + "Filipino,Indonesian,Chinese,Spanish,English": 0.46 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.4, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.3933333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.44666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.42, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.41333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.4066666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.4266666666666667 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.38666666666666666 + } + }, + "AC3_2": 0.6992019890316715, + "AC3_3": 0.6328216485860745, + "AC3_4": 0.5861221047072226, + "AC3_5": 0.5501145267757028, + "AC3_6": 0.5215277100134094, + "AC3_7": 0.4984004669711045 + }, + "prompt_4": { + "overall_acc": 0.6647619047619048, + "language_acc": { + "Vietnamese": 0.5733333333333334, + "Malay": 0.6333333333333333, + "Filipino": 0.6266666666666667, + "Indonesian": 0.6933333333333334, + "Chinese": 0.6266666666666667, + "Spanish": 0.72, + "English": 0.78 + }, + "consistency_score_2": 0.6571428571428571, + "consistency_score_3": 0.520952380952381, + "consistency_score_4": 0.44552380952380943, + "consistency_score_5": 0.39587301587301593, + "consistency_score_6": 0.35999999999999993, + "consistency_score_7": 0.3333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.58, + "Vietnamese,Filipino": 0.6333333333333333, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Chinese": 0.6533333333333333, + "Vietnamese,Spanish": 0.6133333333333333, + "Vietnamese,English": 0.6, + "Malay,Filipino": 0.62, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Chinese": 0.5666666666666667, + "Malay,Spanish": 0.68, + "Malay,English": 0.6266666666666667, + "Filipino,Indonesian": 0.7, + "Filipino,Chinese": 0.6, + "Filipino,Spanish": 0.68, + "Filipino,English": 0.68, + "Indonesian,Chinese": 0.64, + "Indonesian,Spanish": 0.72, + "Indonesian,English": 0.7466666666666667, + "Chinese,Spanish": 0.6333333333333333, + "Chinese,English": 0.7, + "Spanish,English": 0.8333333333333334 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.46, + "Vietnamese,Malay,Indonesian": 0.47333333333333333, + "Vietnamese,Malay,Chinese": 0.4533333333333333, + "Vietnamese,Malay,Spanish": 0.49333333333333335, + "Vietnamese,Malay,English": 0.4533333333333333, + "Vietnamese,Filipino,Indonesian": 0.5133333333333333, + "Vietnamese,Filipino,Chinese": 0.48, + "Vietnamese,Filipino,Spanish": 0.5, + "Vietnamese,Filipino,English": 0.5, + "Vietnamese,Indonesian,Chinese": 0.5133333333333333, + "Vietnamese,Indonesian,Spanish": 0.5066666666666667, + "Vietnamese,Indonesian,English": 0.5333333333333333, + "Vietnamese,Chinese,Spanish": 0.4866666666666667, + "Vietnamese,Chinese,English": 0.5133333333333333, + "Vietnamese,Spanish,English": 0.54, + "Malay,Filipino,Indonesian": 0.5333333333333333, + "Malay,Filipino,Chinese": 0.44, + "Malay,Filipino,Spanish": 0.5133333333333333, + "Malay,Filipino,English": 0.5066666666666667, + "Malay,Indonesian,Chinese": 0.4666666666666667, + "Malay,Indonesian,Spanish": 0.5733333333333334, + "Malay,Indonesian,English": 0.56, + "Malay,Chinese,Spanish": 0.4866666666666667, + "Malay,Chinese,English": 0.4866666666666667, + "Malay,Spanish,English": 0.5933333333333334, + "Filipino,Indonesian,Chinese": 0.5066666666666667, + "Filipino,Indonesian,Spanish": 0.5733333333333334, + "Filipino,Indonesian,English": 0.5866666666666667, + "Filipino,Chinese,Spanish": 0.5, + "Filipino,Chinese,English": 0.52, + "Filipino,Spanish,English": 0.6133333333333333, + "Indonesian,Chinese,Spanish": 0.5266666666666666, + "Indonesian,Chinese,English": 0.56, + "Indonesian,Spanish,English": 0.6666666666666666, + "Chinese,Spanish,English": 0.6 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.41333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.38666666666666666, + "Vietnamese,Malay,Filipino,Spanish": 0.4266666666666667, + "Vietnamese,Malay,Filipino,English": 0.4, + "Vietnamese,Malay,Indonesian,Chinese": 0.3933333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.44, + "Vietnamese,Malay,Indonesian,English": 0.4266666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Malay,Chinese,English": 0.4, + "Vietnamese,Malay,Spanish,English": 0.44666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese": 0.42, + "Vietnamese,Filipino,Indonesian,Spanish": 0.4533333333333333, + "Vietnamese,Filipino,Indonesian,English": 0.46, + "Vietnamese,Filipino,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Filipino,Chinese,English": 0.43333333333333335, + "Vietnamese,Filipino,Spanish,English": 0.4666666666666667, + "Vietnamese,Indonesian,Chinese,Spanish": 0.42, + "Vietnamese,Indonesian,Chinese,English": 0.46, + "Vietnamese,Indonesian,Spanish,English": 0.48, + "Vietnamese,Chinese,Spanish,English": 0.4666666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.3933333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.48, + "Malay,Filipino,Indonesian,English": 0.47333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.41333333333333333, + "Malay,Filipino,Chinese,English": 0.4, + "Malay,Filipino,Spanish,English": 0.4866666666666667, + "Malay,Indonesian,Chinese,Spanish": 0.44, + "Malay,Indonesian,Chinese,English": 0.44, + "Malay,Indonesian,Spanish,English": 0.54, + "Malay,Chinese,Spanish,English": 0.47333333333333333, + "Filipino,Indonesian,Chinese,Spanish": 0.4533333333333333, + "Filipino,Indonesian,Chinese,English": 0.47333333333333333, + "Filipino,Indonesian,Spanish,English": 0.5333333333333333, + "Filipino,Chinese,Spanish,English": 0.47333333333333333, + "Indonesian,Chinese,Spanish,English": 0.5066666666666667 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.3466666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.4, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.38, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Filipino,Chinese,English": 0.35333333333333333, + "Vietnamese,Malay,Filipino,Spanish,English": 0.4, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.42, + "Vietnamese,Malay,Chinese,Spanish,English": 0.3933333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.4, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.4266666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.4, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.41333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.38, + "Malay,Filipino,Indonesian,Spanish,English": 0.46, + "Malay,Filipino,Chinese,Spanish,English": 0.4, + "Malay,Indonesian,Chinese,Spanish,English": 0.43333333333333335, + "Filipino,Indonesian,Chinese,Spanish,English": 0.43333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.34, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.3333333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.38, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.36666666666666664, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.36666666666666664, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.38 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.3333333333333333 + } + }, + "AC3_2": 0.6609304239928177, + "AC3_3": 0.5841361636531642, + "AC3_4": 0.5334973735110832, + "AC3_5": 0.4962335199999633, + "AC3_6": 0.4670631969804445, + "AC3_7": 0.4440203561896099 + }, + "prompt_5": { + "overall_acc": 0.699047619047619, + "language_acc": { + "Vietnamese": 0.62, + "Malay": 0.68, + "Filipino": 0.6733333333333333, + "Indonesian": 0.7, + "Chinese": 0.6933333333333334, + "Spanish": 0.7266666666666667, + "English": 0.8 + }, + "consistency_score_2": 0.7066666666666666, + "consistency_score_3": 0.5843809523809523, + "consistency_score_4": 0.5060952380952382, + "consistency_score_5": 0.448888888888889, + "consistency_score_6": 0.4038095238095238, + "consistency_score_7": 0.36666666666666664, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.6533333333333333, + "Vietnamese,Filipino": 0.6533333333333333, + "Vietnamese,Indonesian": 0.66, + "Vietnamese,Chinese": 0.64, + "Vietnamese,Spanish": 0.6333333333333333, + "Vietnamese,English": 0.6533333333333333, + "Malay,Filipino": 0.6866666666666666, + "Malay,Indonesian": 0.7466666666666667, + "Malay,Chinese": 0.6733333333333333, + "Malay,Spanish": 0.72, + "Malay,English": 0.74, + "Filipino,Indonesian": 0.72, + "Filipino,Chinese": 0.68, + "Filipino,Spanish": 0.7466666666666667, + "Filipino,English": 0.7333333333333333, + "Indonesian,Chinese": 0.6933333333333334, + "Indonesian,Spanish": 0.7466666666666667, + "Indonesian,English": 0.76, + "Chinese,Spanish": 0.7066666666666667, + "Chinese,English": 0.7666666666666667, + "Spanish,English": 0.8266666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.52, + "Vietnamese,Malay,Indonesian": 0.5466666666666666, + "Vietnamese,Malay,Chinese": 0.5066666666666667, + "Vietnamese,Malay,Spanish": 0.5466666666666666, + "Vietnamese,Malay,English": 0.56, + "Vietnamese,Filipino,Indonesian": 0.54, + "Vietnamese,Filipino,Chinese": 0.5133333333333333, + "Vietnamese,Filipino,Spanish": 0.5466666666666666, + "Vietnamese,Filipino,English": 0.5533333333333333, + "Vietnamese,Indonesian,Chinese": 0.5133333333333333, + "Vietnamese,Indonesian,Spanish": 0.5466666666666666, + "Vietnamese,Indonesian,English": 0.5666666666666667, + "Vietnamese,Chinese,Spanish": 0.5133333333333333, + "Vietnamese,Chinese,English": 0.5533333333333333, + "Vietnamese,Spanish,English": 0.5866666666666667, + "Malay,Filipino,Indonesian": 0.6066666666666667, + "Malay,Filipino,Chinese": 0.54, + "Malay,Filipino,Spanish": 0.6066666666666667, + "Malay,Filipino,English": 0.6066666666666667, + "Malay,Indonesian,Chinese": 0.58, + "Malay,Indonesian,Spanish": 0.6333333333333333, + "Malay,Indonesian,English": 0.6533333333333333, + "Malay,Chinese,Spanish": 0.5666666666666667, + "Malay,Chinese,English": 0.6066666666666667, + "Malay,Spanish,English": 0.66, + "Filipino,Indonesian,Chinese": 0.5666666666666667, + "Filipino,Indonesian,Spanish": 0.6333333333333333, + "Filipino,Indonesian,English": 0.64, + "Filipino,Chinese,Spanish": 0.5933333333333334, + "Filipino,Chinese,English": 0.6133333333333333, + "Filipino,Spanish,English": 0.68, + "Indonesian,Chinese,Spanish": 0.5866666666666667, + "Indonesian,Chinese,English": 0.6266666666666667, + "Indonesian,Spanish,English": 0.68, + "Chinese,Spanish,English": 0.66 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.4666666666666667, + "Vietnamese,Malay,Filipino,Chinese": 0.43333333333333335, + "Vietnamese,Malay,Filipino,Spanish": 0.47333333333333333, + "Vietnamese,Malay,Filipino,English": 0.48, + "Vietnamese,Malay,Indonesian,Chinese": 0.44666666666666666, + "Vietnamese,Malay,Indonesian,Spanish": 0.4866666666666667, + "Vietnamese,Malay,Indonesian,English": 0.5, + "Vietnamese,Malay,Chinese,Spanish": 0.4533333333333333, + "Vietnamese,Malay,Chinese,English": 0.48, + "Vietnamese,Malay,Spanish,English": 0.5133333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.44, + "Vietnamese,Filipino,Indonesian,Spanish": 0.4866666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.5, + "Vietnamese,Filipino,Chinese,Spanish": 0.46, + "Vietnamese,Filipino,Chinese,English": 0.4866666666666667, + "Vietnamese,Filipino,Spanish,English": 0.52, + "Vietnamese,Indonesian,Chinese,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian,Chinese,English": 0.4866666666666667, + "Vietnamese,Indonesian,Spanish,English": 0.52, + "Vietnamese,Chinese,Spanish,English": 0.5, + "Malay,Filipino,Indonesian,Chinese": 0.4866666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.5533333333333333, + "Malay,Filipino,Indonesian,English": 0.5533333333333333, + "Malay,Filipino,Chinese,Spanish": 0.5, + "Malay,Filipino,Chinese,English": 0.52, + "Malay,Filipino,Spanish,English": 0.5666666666666667, + "Malay,Indonesian,Chinese,Spanish": 0.5133333333333333, + "Malay,Indonesian,Chinese,English": 0.5466666666666666, + "Malay,Indonesian,Spanish,English": 0.5933333333333334, + "Malay,Chinese,Spanish,English": 0.54, + "Filipino,Indonesian,Chinese,Spanish": 0.5133333333333333, + "Filipino,Indonesian,Chinese,English": 0.5333333333333333, + "Filipino,Indonesian,Spanish,English": 0.6, + "Filipino,Chinese,Spanish,English": 0.56, + "Indonesian,Chinese,Spanish,English": 0.5533333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.38666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.43333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.44, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Malay,Filipino,Chinese,English": 0.4266666666666667, + "Vietnamese,Malay,Filipino,Spanish,English": 0.4533333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.43333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.4666666666666667, + "Vietnamese,Malay,Chinese,Spanish,English": 0.44, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.4066666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.43333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.47333333333333333, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.4533333333333333, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.44, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.46, + "Malay,Filipino,Indonesian,Chinese,English": 0.47333333333333333, + "Malay,Filipino,Indonesian,Spanish,English": 0.5266666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.48, + "Malay,Indonesian,Chinese,Spanish,English": 0.49333333333333335, + "Filipino,Indonesian,Chinese,Spanish,English": 0.49333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.38666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.42, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.4, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.4, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.4066666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.44666666666666666 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.36666666666666664 + } + }, + "AC3_2": 0.7028364949816184, + "AC3_3": 0.6365918952422053, + "AC3_4": 0.5871248691814143, + "AC3_5": 0.5467109144066519, + "AC3_6": 0.5119105189107344, + "AC3_7": 0.48102472441122346 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.47970779220779225, + "language_acc": { + "English": 0.5625, + "Filipino": 0.3806818181818182, + "Vietnamese": 0.4375, + "Chinese": 0.5568181818181818, + "Indonesian": 0.4375, + "Malay": 0.4431818181818182, + "Spanish": 0.5397727272727273 + }, + "consistency_score_2": 0.549512987012987, + "consistency_score_3": 0.37905844155844165, + "consistency_score_4": 0.29107142857142865, + "consistency_score_5": 0.2372835497835498, + "consistency_score_6": 0.2012987012987013, + "consistency_score_7": 0.17613636363636365, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.4772727272727273, + "English,Vietnamese": 0.5738636363636364, + "English,Chinese": 0.5795454545454546, + "English,Indonesian": 0.6079545454545454, + "English,Malay": 0.6193181818181818, + "English,Spanish": 0.7443181818181818, + "Filipino,Vietnamese": 0.4602272727272727, + "Filipino,Chinese": 0.44886363636363635, + "Filipino,Indonesian": 0.44886363636363635, + "Filipino,Malay": 0.48863636363636365, + "Filipino,Spanish": 0.4602272727272727, + "Vietnamese,Chinese": 0.5170454545454546, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Malay": 0.5170454545454546, + "Vietnamese,Spanish": 0.5965909090909091, + "Chinese,Indonesian": 0.5511363636363636, + "Chinese,Malay": 0.5170454545454546, + "Chinese,Spanish": 0.6022727272727273, + "Indonesian,Malay": 0.5909090909090909, + "Indonesian,Spanish": 0.625, + "Malay,Spanish": 0.5738636363636364 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3068181818181818, + "English,Filipino,Chinese": 0.3125, + "English,Filipino,Indonesian": 0.3409090909090909, + "English,Filipino,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.3693181818181818, + "English,Vietnamese,Chinese": 0.375, + "English,Vietnamese,Indonesian": 0.42045454545454547, + "English,Vietnamese,Malay": 0.4090909090909091, + "English,Vietnamese,Spanish": 0.48295454545454547, + "English,Chinese,Indonesian": 0.42613636363636365, + "English,Chinese,Malay": 0.4034090909090909, + "English,Chinese,Spanish": 0.48863636363636365, + "English,Indonesian,Malay": 0.4659090909090909, + "English,Indonesian,Spanish": 0.5170454545454546, + "English,Malay,Spanish": 0.5, + "Filipino,Vietnamese,Chinese": 0.2840909090909091, + "Filipino,Vietnamese,Indonesian": 0.29545454545454547, + "Filipino,Vietnamese,Malay": 0.30113636363636365, + "Filipino,Vietnamese,Spanish": 0.3125, + "Filipino,Chinese,Indonesian": 0.30113636363636365, + "Filipino,Chinese,Malay": 0.29545454545454547, + "Filipino,Chinese,Spanish": 0.3181818181818182, + "Filipino,Indonesian,Malay": 0.32954545454545453, + "Filipino,Indonesian,Spanish": 0.3181818181818182, + "Filipino,Malay,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian": 0.375, + "Vietnamese,Chinese,Malay": 0.32954545454545453, + "Vietnamese,Chinese,Spanish": 0.4090909090909091, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Indonesian,Spanish": 0.4431818181818182, + "Vietnamese,Malay,Spanish": 0.38636363636363635, + "Chinese,Indonesian,Malay": 0.3977272727272727, + "Chinese,Indonesian,Spanish": 0.4431818181818182, + "Chinese,Malay,Spanish": 0.38636363636363635, + "Indonesian,Malay,Spanish": 0.4602272727272727 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.23295454545454544, + "English,Filipino,Vietnamese,Indonesian": 0.2556818181818182, + "English,Filipino,Vietnamese,Malay": 0.2556818181818182, + "English,Filipino,Vietnamese,Spanish": 0.2727272727272727, + "English,Filipino,Chinese,Indonesian": 0.25, + "English,Filipino,Chinese,Malay": 0.25, + "English,Filipino,Chinese,Spanish": 0.2727272727272727, + "English,Filipino,Indonesian,Malay": 0.29545454545454547, + "English,Filipino,Indonesian,Spanish": 0.2840909090909091, + "English,Filipino,Malay,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Indonesian": 0.3125, + "English,Vietnamese,Chinese,Malay": 0.2840909090909091, + "English,Vietnamese,Chinese,Spanish": 0.3409090909090909, + "English,Vietnamese,Indonesian,Malay": 0.32954545454545453, + "English,Vietnamese,Indonesian,Spanish": 0.375, + "English,Vietnamese,Malay,Spanish": 0.3465909090909091, + "English,Chinese,Indonesian,Malay": 0.3465909090909091, + "English,Chinese,Indonesian,Spanish": 0.38636363636363635, + "English,Chinese,Malay,Spanish": 0.35795454545454547, + "English,Indonesian,Malay,Spanish": 0.4090909090909091, + "Filipino,Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.2215909090909091, + "Filipino,Vietnamese,Chinese,Spanish": 0.25, + "Filipino,Vietnamese,Indonesian,Malay": 0.25, + "Filipino,Vietnamese,Indonesian,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Malay,Spanish": 0.23863636363636365, + "Filipino,Chinese,Indonesian,Malay": 0.2556818181818182, + "Filipino,Chinese,Indonesian,Spanish": 0.24431818181818182, + "Filipino,Chinese,Malay,Spanish": 0.24431818181818182, + "Filipino,Indonesian,Malay,Spanish": 0.2727272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3409090909090909, + "Vietnamese,Chinese,Malay,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Malay,Spanish": 0.32954545454545453, + "Chinese,Indonesian,Malay,Spanish": 0.3409090909090909 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "English,Filipino,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.2215909090909091, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "English,Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Indonesian,Malay": 0.22727272727272727, + "English,Filipino,Chinese,Indonesian,Spanish": 0.22727272727272727, + "English,Filipino,Chinese,Malay,Spanish": 0.22727272727272727, + "English,Filipino,Indonesian,Malay,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2897727272727273, + "English,Vietnamese,Chinese,Malay,Spanish": 0.26136363636363635, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay,Spanish": 0.3181818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.20454545454545456, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.2159090909090909, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.26136363636363635 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1875, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.1875, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.20454545454545456, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.23863636363636365, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1875 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365 + } + }, + "AC3_2": 0.5122431786957747, + "AC3_3": 0.42348495072769815, + "AC3_4": 0.3623066854747182, + "AC3_5": 0.3175122518547604, + "AC3_6": 0.2835936411205318, + "AC3_7": 0.2576648289436108 + }, + "prompt_2": { + "overall_acc": 0.4878246753246754, + "language_acc": { + "English": 0.5909090909090909, + "Filipino": 0.4147727272727273, + "Vietnamese": 0.4318181818181818, + "Chinese": 0.5625, + "Indonesian": 0.4772727272727273, + "Malay": 0.4318181818181818, + "Spanish": 0.5056818181818182 + }, + "consistency_score_2": 0.5744047619047619, + "consistency_score_3": 0.40649350649350646, + "consistency_score_4": 0.3180194805194805, + "consistency_score_5": 0.26379870129870125, + "consistency_score_6": 0.22646103896103895, + "consistency_score_7": 0.19886363636363635, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5284090909090909, + "English,Vietnamese": 0.5454545454545454, + "English,Chinese": 0.5795454545454546, + "English,Indonesian": 0.6818181818181818, + "English,Malay": 0.5795454545454546, + "English,Spanish": 0.6875, + "Filipino,Vietnamese": 0.5170454545454546, + "Filipino,Chinese": 0.5227272727272727, + "Filipino,Indonesian": 0.5568181818181818, + "Filipino,Malay": 0.5965909090909091, + "Filipino,Spanish": 0.5511363636363636, + "Vietnamese,Chinese": 0.5170454545454546, + "Vietnamese,Indonesian": 0.5625, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,Spanish": 0.5681818181818182, + "Chinese,Indonesian": 0.5795454545454546, + "Chinese,Malay": 0.5681818181818182, + "Chinese,Spanish": 0.5852272727272727, + "Indonesian,Malay": 0.6420454545454546, + "Indonesian,Spanish": 0.6136363636363636, + "Malay,Spanish": 0.5511363636363636 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3522727272727273, + "English,Filipino,Chinese": 0.36363636363636365, + "English,Filipino,Indonesian": 0.42613636363636365, + "English,Filipino,Malay": 0.4034090909090909, + "English,Filipino,Spanish": 0.42045454545454547, + "English,Vietnamese,Chinese": 0.375, + "English,Vietnamese,Indonesian": 0.44886363636363635, + "English,Vietnamese,Malay": 0.39204545454545453, + "English,Vietnamese,Spanish": 0.42613636363636365, + "English,Chinese,Indonesian": 0.44886363636363635, + "English,Chinese,Malay": 0.3977272727272727, + "English,Chinese,Spanish": 0.4602272727272727, + "English,Indonesian,Malay": 0.4772727272727273, + "English,Indonesian,Spanish": 0.5227272727272727, + "English,Malay,Spanish": 0.4318181818181818, + "Filipino,Vietnamese,Chinese": 0.32954545454545453, + "Filipino,Vietnamese,Indonesian": 0.3806818181818182, + "Filipino,Vietnamese,Malay": 0.375, + "Filipino,Vietnamese,Spanish": 0.375, + "Filipino,Chinese,Indonesian": 0.3806818181818182, + "Filipino,Chinese,Malay": 0.39204545454545453, + "Filipino,Chinese,Spanish": 0.3806818181818182, + "Filipino,Indonesian,Malay": 0.4431818181818182, + "Filipino,Indonesian,Spanish": 0.3977272727272727, + "Filipino,Malay,Spanish": 0.4034090909090909, + "Vietnamese,Chinese,Indonesian": 0.38636363636363635, + "Vietnamese,Chinese,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Spanish": 0.38636363636363635, + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Spanish": 0.42045454545454547, + "Vietnamese,Malay,Spanish": 0.39204545454545453, + "Chinese,Indonesian,Malay": 0.4147727272727273, + "Chinese,Indonesian,Spanish": 0.42613636363636365, + "Chinese,Malay,Spanish": 0.39204545454545453, + "Indonesian,Malay,Spanish": 0.4375 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2556818181818182, + "English,Filipino,Vietnamese,Indonesian": 0.3181818181818182, + "English,Filipino,Vietnamese,Malay": 0.29545454545454547, + "English,Filipino,Vietnamese,Spanish": 0.30113636363636365, + "English,Filipino,Chinese,Indonesian": 0.3068181818181818, + "English,Filipino,Chinese,Malay": 0.29545454545454547, + "English,Filipino,Chinese,Spanish": 0.30113636363636365, + "English,Filipino,Indonesian,Malay": 0.3465909090909091, + "English,Filipino,Indonesian,Spanish": 0.3522727272727273, + "English,Filipino,Malay,Spanish": 0.3409090909090909, + "English,Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.3352272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "English,Vietnamese,Malay,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.32954545454545453, + "English,Chinese,Indonesian,Spanish": 0.3806818181818182, + "English,Chinese,Malay,Spanish": 0.32386363636363635, + "English,Indonesian,Malay,Spanish": 0.38636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.26704545454545453, + "Filipino,Vietnamese,Chinese,Malay": 0.2784090909090909, + "Filipino,Vietnamese,Chinese,Spanish": 0.2840909090909091, + "Filipino,Vietnamese,Indonesian,Malay": 0.3125, + "Filipino,Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "Filipino,Vietnamese,Malay,Spanish": 0.3125, + "Filipino,Chinese,Indonesian,Malay": 0.3125, + "Filipino,Chinese,Indonesian,Spanish": 0.30113636363636365, + "Filipino,Chinese,Malay,Spanish": 0.3125, + "Filipino,Indonesian,Malay,Spanish": 0.3465909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Chinese,Malay,Spanish": 0.30113636363636365, + "Vietnamese,Indonesian,Malay,Spanish": 0.32954545454545453, + "Chinese,Indonesian,Malay,Spanish": 0.32954545454545453 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "English,Filipino,Vietnamese,Chinese,Malay": 0.22727272727272727, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.23863636363636365, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.26136363636363635, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Filipino,Vietnamese,Malay,Spanish": 0.26704545454545453, + "English,Filipino,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Chinese,Indonesian,Spanish": 0.26704545454545453, + "English,Filipino,Chinese,Malay,Spanish": 0.2556818181818182, + "English,Filipino,Indonesian,Malay,Spanish": 0.3068181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Malay,Spanish": 0.26136363636363635, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian,Malay,Spanish": 0.29545454545454547, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.25, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.2556818181818182, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.2784090909090909, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2727272727272727 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.2159090909090909, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.24431818181818182, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.19886363636363635 + } + }, + "AC3_2": 0.5275862382153784, + "AC3_3": 0.44346087742835405, + "AC3_4": 0.38503164336923823, + "AC3_5": 0.3424255279901211, + "AC3_6": 0.30932519181029544, + "AC3_7": 0.28254620670718816 + }, + "prompt_3": { + "overall_acc": 0.4805194805194805, + "language_acc": { + "English": 0.5454545454545454, + "Filipino": 0.4375, + "Vietnamese": 0.4715909090909091, + "Chinese": 0.5397727272727273, + "Indonesian": 0.4431818181818182, + "Malay": 0.4375, + "Spanish": 0.48863636363636365 + }, + "consistency_score_2": 0.5708874458874458, + "consistency_score_3": 0.40568181818181825, + "consistency_score_4": 0.3199675324675324, + "consistency_score_5": 0.2673160173160173, + "consistency_score_6": 0.23133116883116883, + "consistency_score_7": 0.20454545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5284090909090909, + "English,Vietnamese": 0.5738636363636364, + "English,Chinese": 0.5738636363636364, + "English,Indonesian": 0.5795454545454546, + "English,Malay": 0.5852272727272727, + "English,Spanish": 0.6931818181818182, + "Filipino,Vietnamese": 0.5170454545454546, + "Filipino,Chinese": 0.5340909090909091, + "Filipino,Indonesian": 0.5340909090909091, + "Filipino,Malay": 0.5909090909090909, + "Filipino,Spanish": 0.5909090909090909, + "Vietnamese,Chinese": 0.5284090909090909, + "Vietnamese,Indonesian": 0.5625, + "Vietnamese,Malay": 0.5397727272727273, + "Vietnamese,Spanish": 0.6079545454545454, + "Chinese,Indonesian": 0.5397727272727273, + "Chinese,Malay": 0.5397727272727273, + "Chinese,Spanish": 0.5625, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Spanish": 0.5909090909090909, + "Malay,Spanish": 0.6022727272727273 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.36363636363636365, + "English,Filipino,Chinese": 0.3693181818181818, + "English,Filipino,Indonesian": 0.3693181818181818, + "English,Filipino,Malay": 0.4090909090909091, + "English,Filipino,Spanish": 0.4375, + "English,Vietnamese,Chinese": 0.3806818181818182, + "English,Vietnamese,Indonesian": 0.4034090909090909, + "English,Vietnamese,Malay": 0.4034090909090909, + "English,Vietnamese,Spanish": 0.4943181818181818, + "English,Chinese,Indonesian": 0.3977272727272727, + "English,Chinese,Malay": 0.39204545454545453, + "English,Chinese,Spanish": 0.4772727272727273, + "English,Indonesian,Malay": 0.4431818181818182, + "English,Indonesian,Spanish": 0.4715909090909091, + "English,Malay,Spanish": 0.48863636363636365, + "Filipino,Vietnamese,Chinese": 0.3465909090909091, + "Filipino,Vietnamese,Indonesian": 0.3693181818181818, + "Filipino,Vietnamese,Malay": 0.375, + "Filipino,Vietnamese,Spanish": 0.42613636363636365, + "Filipino,Chinese,Indonesian": 0.36363636363636365, + "Filipino,Chinese,Malay": 0.38636363636363635, + "Filipino,Chinese,Spanish": 0.3977272727272727, + "Filipino,Indonesian,Malay": 0.4147727272727273, + "Filipino,Indonesian,Spanish": 0.4034090909090909, + "Filipino,Malay,Spanish": 0.4431818181818182, + "Vietnamese,Chinese,Indonesian": 0.3522727272727273, + "Vietnamese,Chinese,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Spanish": 0.39204545454545453, + "Vietnamese,Indonesian,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Spanish": 0.42613636363636365, + "Vietnamese,Malay,Spanish": 0.4375, + "Chinese,Indonesian,Malay": 0.38636363636363635, + "Chinese,Indonesian,Spanish": 0.39204545454545453, + "Chinese,Malay,Spanish": 0.3977272727272727, + "Indonesian,Malay,Spanish": 0.4375 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2727272727272727, + "English,Filipino,Vietnamese,Indonesian": 0.2897727272727273, + "English,Filipino,Vietnamese,Malay": 0.29545454545454547, + "English,Filipino,Vietnamese,Spanish": 0.3522727272727273, + "English,Filipino,Chinese,Indonesian": 0.29545454545454547, + "English,Filipino,Chinese,Malay": 0.3068181818181818, + "English,Filipino,Chinese,Spanish": 0.3409090909090909, + "English,Filipino,Indonesian,Malay": 0.32954545454545453, + "English,Filipino,Indonesian,Spanish": 0.32954545454545453, + "English,Filipino,Malay,Spanish": 0.3693181818181818, + "English,Vietnamese,Chinese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Chinese,Spanish": 0.3522727272727273, + "English,Vietnamese,Indonesian,Malay": 0.32954545454545453, + "English,Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "English,Vietnamese,Malay,Spanish": 0.375, + "English,Chinese,Indonesian,Malay": 0.32386363636363635, + "English,Chinese,Indonesian,Spanish": 0.35795454545454547, + "English,Chinese,Malay,Spanish": 0.36363636363636365, + "English,Indonesian,Malay,Spanish": 0.375, + "Filipino,Vietnamese,Chinese,Indonesian": 0.26136363636363635, + "Filipino,Vietnamese,Chinese,Malay": 0.2556818181818182, + "Filipino,Vietnamese,Chinese,Spanish": 0.30113636363636365, + "Filipino,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Filipino,Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "Filipino,Vietnamese,Malay,Spanish": 0.3352272727272727, + "Filipino,Chinese,Indonesian,Malay": 0.29545454545454547, + "Filipino,Chinese,Indonesian,Spanish": 0.3125, + "Filipino,Chinese,Malay,Spanish": 0.3125, + "Filipino,Indonesian,Malay,Spanish": 0.35795454545454547, + "Vietnamese,Chinese,Indonesian,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Malay,Spanish": 0.29545454545454547, + "Vietnamese,Indonesian,Malay,Spanish": 0.3465909090909091, + "Chinese,Indonesian,Malay,Spanish": 0.3125 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.23295454545454544, + "English,Filipino,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.2727272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Filipino,Vietnamese,Malay,Spanish": 0.2897727272727273, + "English,Filipino,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2840909090909091, + "English,Filipino,Chinese,Malay,Spanish": 0.2897727272727273, + "English,Filipino,Indonesian,Malay,Spanish": 0.30113636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2840909090909091, + "English,Vietnamese,Chinese,Malay,Spanish": 0.2784090909090909, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.3068181818181818, + "English,Chinese,Indonesian,Malay,Spanish": 0.30113636363636365, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.2840909090909091, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.25 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.23295454545454544, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.2215909090909091, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.25, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.21022727272727273 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.20454545454545456 + } + }, + "AC3_2": 0.5218199196086831, + "AC3_3": 0.43994071503303905, + "AC3_4": 0.38414272804120053, + "AC3_5": 0.3435262277060514, + "AC3_6": 0.31231026665635947, + "AC3_7": 0.2869452821640599 + }, + "prompt_4": { + "overall_acc": 0.5137987012987013, + "language_acc": { + "English": 0.5681818181818182, + "Filipino": 0.4772727272727273, + "Vietnamese": 0.4318181818181818, + "Chinese": 0.5397727272727273, + "Indonesian": 0.5284090909090909, + "Malay": 0.5056818181818182, + "Spanish": 0.5454545454545454 + }, + "consistency_score_2": 0.5868506493506492, + "consistency_score_3": 0.4237012987012987, + "consistency_score_4": 0.34139610389610386, + "consistency_score_5": 0.2924783549783549, + "consistency_score_6": 0.25892857142857145, + "consistency_score_7": 0.23295454545454544, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5625, + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.5681818181818182, + "English,Indonesian": 0.6136363636363636, + "English,Malay": 0.6022727272727273, + "English,Spanish": 0.7443181818181818, + "Filipino,Vietnamese": 0.5625, + "Filipino,Chinese": 0.5454545454545454, + "Filipino,Indonesian": 0.5454545454545454, + "Filipino,Malay": 0.5852272727272727, + "Filipino,Spanish": 0.5625, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Spanish": 0.6079545454545454, + "Chinese,Indonesian": 0.5454545454545454, + "Chinese,Malay": 0.5795454545454546, + "Chinese,Spanish": 0.5511363636363636, + "Indonesian,Malay": 0.6931818181818182, + "Indonesian,Spanish": 0.6306818181818182, + "Malay,Spanish": 0.6761363636363636 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3977272727272727, + "English,Filipino,Chinese": 0.38636363636363635, + "English,Filipino,Indonesian": 0.4090909090909091, + "English,Filipino,Malay": 0.42613636363636365, + "English,Filipino,Spanish": 0.4659090909090909, + "English,Vietnamese,Chinese": 0.3693181818181818, + "English,Vietnamese,Indonesian": 0.4090909090909091, + "English,Vietnamese,Malay": 0.4147727272727273, + "English,Vietnamese,Spanish": 0.4943181818181818, + "English,Chinese,Indonesian": 0.3977272727272727, + "English,Chinese,Malay": 0.42045454545454547, + "English,Chinese,Spanish": 0.4772727272727273, + "English,Indonesian,Malay": 0.4943181818181818, + "English,Indonesian,Spanish": 0.5170454545454546, + "English,Malay,Spanish": 0.5397727272727273, + "Filipino,Vietnamese,Chinese": 0.3465909090909091, + "Filipino,Vietnamese,Indonesian": 0.3806818181818182, + "Filipino,Vietnamese,Malay": 0.3977272727272727, + "Filipino,Vietnamese,Spanish": 0.4147727272727273, + "Filipino,Chinese,Indonesian": 0.375, + "Filipino,Chinese,Malay": 0.4090909090909091, + "Filipino,Chinese,Spanish": 0.3806818181818182, + "Filipino,Indonesian,Malay": 0.4602272727272727, + "Filipino,Indonesian,Spanish": 0.4318181818181818, + "Filipino,Malay,Spanish": 0.4602272727272727, + "Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "Vietnamese,Chinese,Malay": 0.36363636363636365, + "Vietnamese,Chinese,Spanish": 0.3806818181818182, + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Spanish": 0.4090909090909091, + "Vietnamese,Malay,Spanish": 0.45454545454545453, + "Chinese,Indonesian,Malay": 0.4375, + "Chinese,Indonesian,Spanish": 0.4034090909090909, + "Chinese,Malay,Spanish": 0.4375, + "Indonesian,Malay,Spanish": 0.5340909090909091 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2784090909090909, + "English,Filipino,Vietnamese,Indonesian": 0.32954545454545453, + "English,Filipino,Vietnamese,Malay": 0.3409090909090909, + "English,Filipino,Vietnamese,Spanish": 0.35795454545454547, + "English,Filipino,Chinese,Indonesian": 0.3125, + "English,Filipino,Chinese,Malay": 0.32954545454545453, + "English,Filipino,Chinese,Spanish": 0.3465909090909091, + "English,Filipino,Indonesian,Malay": 0.3693181818181818, + "English,Filipino,Indonesian,Spanish": 0.3806818181818182, + "English,Filipino,Malay,Spanish": 0.3977272727272727, + "English,Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "English,Vietnamese,Chinese,Malay": 0.29545454545454547, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.3465909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "English,Vietnamese,Malay,Spanish": 0.39204545454545453, + "English,Chinese,Indonesian,Malay": 0.36363636363636365, + "English,Chinese,Indonesian,Spanish": 0.36363636363636365, + "English,Chinese,Malay,Spanish": 0.39204545454545453, + "English,Indonesian,Malay,Spanish": 0.44886363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "Filipino,Vietnamese,Chinese,Malay": 0.3068181818181818, + "Filipino,Vietnamese,Chinese,Spanish": 0.2840909090909091, + "Filipino,Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Filipino,Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Filipino,Vietnamese,Malay,Spanish": 0.36363636363636365, + "Filipino,Chinese,Indonesian,Malay": 0.32954545454545453, + "Filipino,Chinese,Indonesian,Spanish": 0.3125, + "Filipino,Chinese,Malay,Spanish": 0.3352272727272727, + "Filipino,Indonesian,Malay,Spanish": 0.4090909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Malay,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.36363636363636365, + "Chinese,Indonesian,Malay,Spanish": 0.375 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.25, + "English,Filipino,Vietnamese,Chinese,Malay": 0.26136363636363635, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.26136363636363635, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.3068181818181818, + "English,Filipino,Vietnamese,Malay,Spanish": 0.32954545454545453, + "English,Filipino,Chinese,Indonesian,Malay": 0.29545454545454547, + "English,Filipino,Chinese,Indonesian,Spanish": 0.30113636363636365, + "English,Filipino,Chinese,Malay,Spanish": 0.3125, + "English,Filipino,Indonesian,Malay,Spanish": 0.35795454545454547, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay,Spanish": 0.2840909090909091, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.32954545454545453, + "English,Chinese,Indonesian,Malay,Spanish": 0.3465909090909091, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.25, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.3181818181818182, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2727272727272727 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.23863636363636365, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.25, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.29545454545454547, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.2897727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2556818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.23295454545454544 + } + }, + "AC3_2": 0.5479003849634371, + "AC3_3": 0.46442064424123597, + "AC3_4": 0.4102196919912001, + "AC3_5": 0.3727626819026909, + "AC3_6": 0.3443314825484767, + "AC3_7": 0.32056571141952267 + }, + "prompt_5": { + "overall_acc": 0.49025974025974023, + "language_acc": { + "English": 0.5454545454545454, + "Filipino": 0.4090909090909091, + "Vietnamese": 0.48295454545454547, + "Chinese": 0.5397727272727273, + "Indonesian": 0.42613636363636365, + "Malay": 0.4715909090909091, + "Spanish": 0.5568181818181818 + }, + "consistency_score_2": 0.5622294372294371, + "consistency_score_3": 0.39285714285714296, + "consistency_score_4": 0.30909090909090897, + "consistency_score_5": 0.26136363636363635, + "consistency_score_6": 0.23133116883116883, + "consistency_score_7": 0.21022727272727273, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.48863636363636365, + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.5625, + "English,Indonesian": 0.5738636363636364, + "English,Malay": 0.6193181818181818, + "English,Spanish": 0.7045454545454546, + "Filipino,Vietnamese": 0.5340909090909091, + "Filipino,Chinese": 0.5113636363636364, + "Filipino,Indonesian": 0.5340909090909091, + "Filipino,Malay": 0.5568181818181818, + "Filipino,Spanish": 0.5056818181818182, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.48863636363636365, + "Vietnamese,Malay": 0.5909090909090909, + "Vietnamese,Spanish": 0.5738636363636364, + "Chinese,Indonesian": 0.5397727272727273, + "Chinese,Malay": 0.5511363636363636, + "Chinese,Spanish": 0.5511363636363636, + "Indonesian,Malay": 0.625, + "Indonesian,Spanish": 0.6022727272727273, + "Malay,Spanish": 0.5965909090909091 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.35795454545454547, + "English,Filipino,Chinese": 0.3409090909090909, + "English,Filipino,Indonesian": 0.3465909090909091, + "English,Filipino,Malay": 0.3693181818181818, + "English,Filipino,Spanish": 0.38636363636363635, + "English,Vietnamese,Chinese": 0.3693181818181818, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Malay": 0.4375, + "English,Vietnamese,Spanish": 0.4659090909090909, + "English,Chinese,Indonesian": 0.39204545454545453, + "English,Chinese,Malay": 0.3977272727272727, + "English,Chinese,Spanish": 0.44886363636363635, + "English,Indonesian,Malay": 0.44886363636363635, + "English,Indonesian,Spanish": 0.4715909090909091, + "English,Malay,Spanish": 0.48863636363636365, + "Filipino,Vietnamese,Chinese": 0.3465909090909091, + "Filipino,Vietnamese,Indonesian": 0.3465909090909091, + "Filipino,Vietnamese,Malay": 0.39204545454545453, + "Filipino,Vietnamese,Spanish": 0.36363636363636365, + "Filipino,Chinese,Indonesian": 0.3465909090909091, + "Filipino,Chinese,Malay": 0.375, + "Filipino,Chinese,Spanish": 0.3352272727272727, + "Filipino,Indonesian,Malay": 0.4034090909090909, + "Filipino,Indonesian,Spanish": 0.375, + "Filipino,Malay,Spanish": 0.3693181818181818, + "Vietnamese,Chinese,Indonesian": 0.3352272727272727, + "Vietnamese,Chinese,Malay": 0.36363636363636365, + "Vietnamese,Chinese,Spanish": 0.38636363636363635, + "Vietnamese,Indonesian,Malay": 0.3977272727272727, + "Vietnamese,Indonesian,Spanish": 0.3977272727272727, + "Vietnamese,Malay,Spanish": 0.42045454545454547, + "Chinese,Indonesian,Malay": 0.4147727272727273, + "Chinese,Indonesian,Spanish": 0.42045454545454547, + "Chinese,Malay,Spanish": 0.39204545454545453, + "Indonesian,Malay,Spanish": 0.4659090909090909 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.26704545454545453, + "English,Filipino,Vietnamese,Indonesian": 0.2840909090909091, + "English,Filipino,Vietnamese,Malay": 0.30113636363636365, + "English,Filipino,Vietnamese,Spanish": 0.3125, + "English,Filipino,Chinese,Indonesian": 0.2727272727272727, + "English,Filipino,Chinese,Malay": 0.2784090909090909, + "English,Filipino,Chinese,Spanish": 0.2840909090909091, + "English,Filipino,Indonesian,Malay": 0.30113636363636365, + "English,Filipino,Indonesian,Spanish": 0.3068181818181818, + "English,Filipino,Malay,Spanish": 0.3068181818181818, + "English,Vietnamese,Chinese,Indonesian": 0.2840909090909091, + "English,Vietnamese,Chinese,Malay": 0.2897727272727273, + "English,Vietnamese,Chinese,Spanish": 0.3352272727272727, + "English,Vietnamese,Indonesian,Malay": 0.32954545454545453, + "English,Vietnamese,Indonesian,Spanish": 0.3409090909090909, + "English,Vietnamese,Malay,Spanish": 0.35795454545454547, + "English,Chinese,Indonesian,Malay": 0.3352272727272727, + "English,Chinese,Indonesian,Spanish": 0.36363636363636365, + "English,Chinese,Malay,Spanish": 0.3465909090909091, + "English,Indonesian,Malay,Spanish": 0.39204545454545453, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.2840909090909091, + "Filipino,Vietnamese,Chinese,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Filipino,Vietnamese,Indonesian,Spanish": 0.30113636363636365, + "Filipino,Vietnamese,Malay,Spanish": 0.3068181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.30113636363636365, + "Filipino,Chinese,Indonesian,Spanish": 0.29545454545454547, + "Filipino,Chinese,Malay,Spanish": 0.2727272727272727, + "Filipino,Indonesian,Malay,Spanish": 0.3125, + "Vietnamese,Chinese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Spanish": 0.3125, + "Vietnamese,Chinese,Malay,Spanish": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay,Spanish": 0.3522727272727273 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.23863636363636365, + "English,Filipino,Vietnamese,Chinese,Malay": 0.23295454545454544, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.24431818181818182, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.26704545454545453, + "English,Filipino,Vietnamese,Malay,Spanish": 0.26704545454545453, + "English,Filipino,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Filipino,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Filipino,Chinese,Malay,Spanish": 0.24431818181818182, + "English,Filipino,Indonesian,Malay,Spanish": 0.2727272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Chinese,Malay,Spanish": 0.26704545454545453, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay,Spanish": 0.32386363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.23863636363636365, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.26704545454545453, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2784090909090909 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.23295454545454544, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.2159090909090909, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.23863636363636365, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.25, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.21022727272727273 + } + }, + "AC3_2": 0.5237839281043418, + "AC3_3": 0.43618697474052426, + "AC3_4": 0.37914481938983285, + "AC3_5": 0.3409581778458603, + "AC3_6": 0.31433976592725005, + "AC3_7": 0.29426946166661816 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6407766990291263 + }, + "prompt_2": { + "accuracy": 0.6213592233009708 + }, + "prompt_3": { + "accuracy": 0.6407766990291263 + }, + "prompt_4": { + "accuracy": 0.6699029126213593 + }, + "prompt_5": { + "accuracy": 0.6407766990291263 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4666666666666667 + }, + "prompt_2": { + "accuracy": 0.4666666666666667 + }, + "prompt_3": { + "accuracy": 0.47619047619047616 + }, + "prompt_4": { + "accuracy": 0.49523809523809526 + }, + "prompt_5": { + "accuracy": 0.5142857142857142 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7570093457943925 + }, + "prompt_2": { + "accuracy": 0.7476635514018691 + }, + "prompt_3": { + "accuracy": 0.7570093457943925 + }, + "prompt_4": { + "accuracy": 0.7476635514018691 + }, + "prompt_5": { + "accuracy": 0.7383177570093458 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.61, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.7, + "history": 0.4666666666666667, + "literature": 0.6, + "politics": 0.9, + "culture": 0.8, + "film": 0.5, + "law": 0.6, + "geography": 0.7 + } + }, + "prompt_2": { + "accuracy": 0.58, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.8, + "history": 0.4666666666666667, + "literature": 0.5, + "politics": 0.9, + "culture": 0.8, + "film": 0.4, + "law": 0.6, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.55, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.7, + "history": 0.4, + "literature": 0.5, + "politics": 0.8, + "culture": 0.7, + "film": 0.6, + "law": 0.6, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.6, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.7, + "history": 0.5333333333333333, + "literature": 0.4, + "politics": 0.8, + "culture": 0.8, + "film": 0.6, + "law": 0.7, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.58, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.8, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.6, + "film": 0.4, + "law": 0.6, + "geography": 0.8 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 - }, - "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.2794452574404052 + }, + "prompt_2": { + "bleu_score": 0.2674775026836445 + }, + "prompt_3": { + "bleu_score": 0.2722339627774794 + }, + "prompt_4": { + "bleu_score": 0.26992687523049974 + }, + "prompt_5": { + "bleu_score": 0.2339828457497402 + } }, - "flores_zsm2eng": { + "indommlu": { "prompt_1": -1, "prompt_2": -1, "prompt_3": -1, "prompt_4": -1, "prompt_5": -1 }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.38084676314905563 + }, + "prompt_2": { + "bleu_score": 0.3911982896928193 + }, + "prompt_3": { + "bleu_score": 0.39332694536667584 + }, + "prompt_4": { + "bleu_score": 0.3948636422462026 + }, + "prompt_5": { + "bleu_score": 0.36809815124796963 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.3111744174894432 + }, + "prompt_2": { + "bleu_score": 0.3175458861102973 + }, + "prompt_3": { + "bleu_score": 0.31221700320284107 + }, + "prompt_4": { + "bleu_score": 0.31870733530187106 + }, + "prompt_5": { + "bleu_score": 0.2972648972980948 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.24954533306484694 + }, + "prompt_2": { + "bleu_score": 0.24872110088444996 + }, + "prompt_3": { + "bleu_score": 0.24951988951653503 + }, + "prompt_4": { + "bleu_score": 0.2497913431912669 + }, + "prompt_5": { + "bleu_score": 0.23957916096630588 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.3901065940971776 + }, + "prompt_2": { + "bleu_score": 0.4022235711140328 + }, + "prompt_3": { + "bleu_score": 0.4029939240859687 + }, + "prompt_4": { + "bleu_score": 0.4027192292749885 + }, + "prompt_5": { + "bleu_score": 0.3765211117066765 + } + }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6721120186697783 + }, + "prompt_2": { + "accuracy": 0.6779463243873979 + }, + "prompt_3": { + "accuracy": 0.661610268378063 + }, + "prompt_4": { + "accuracy": 0.6779463243873979 + }, + "prompt_5": { + "accuracy": 0.6837806301050176 + } }, "mmlu_full": { "prompt_1": -1, @@ -14538,235 +128385,3250 @@ "model_link": "https://huggingface.co/SeaLLMs/SeaLLM-7B-v2", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.6, + "language_acc": { + "Vietnamese": 0.5733333333333334, + "Malay": 0.52, + "Filipino": 0.56, + "Indonesian": 0.6, + "Chinese": 0.5866666666666667, + "Spanish": 0.6333333333333333, + "English": 0.7266666666666667 + }, + "consistency_score_2": 0.6171428571428572, + "consistency_score_3": 0.4723809523809524, + "consistency_score_4": 0.39066666666666655, + "consistency_score_5": 0.3358730158730159, + "consistency_score_6": 0.2942857142857143, + "consistency_score_7": 0.26, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,Filipino": 0.5266666666666666, + "Vietnamese,Indonesian": 0.6, + "Vietnamese,Chinese": 0.6066666666666667, + "Vietnamese,Spanish": 0.58, + "Vietnamese,English": 0.5933333333333334, + "Malay,Filipino": 0.5533333333333333, + "Malay,Indonesian": 0.6466666666666666, + "Malay,Chinese": 0.5866666666666667, + "Malay,Spanish": 0.64, + "Malay,English": 0.6, + "Filipino,Indonesian": 0.5933333333333334, + "Filipino,Chinese": 0.58, + "Filipino,Spanish": 0.64, + "Filipino,English": 0.6266666666666667, + "Indonesian,Chinese": 0.6866666666666666, + "Indonesian,Spanish": 0.6666666666666666, + "Indonesian,English": 0.6466666666666666, + "Chinese,Spanish": 0.66, + "Chinese,English": 0.6266666666666667, + "Spanish,English": 0.7533333333333333 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.3933333333333333, + "Vietnamese,Malay,Indonesian": 0.44, + "Vietnamese,Malay,Chinese": 0.41333333333333333, + "Vietnamese,Malay,Spanish": 0.4266666666666667, + "Vietnamese,Malay,English": 0.4266666666666667, + "Vietnamese,Filipino,Indonesian": 0.4066666666666667, + "Vietnamese,Filipino,Chinese": 0.42, + "Vietnamese,Filipino,Spanish": 0.4266666666666667, + "Vietnamese,Filipino,English": 0.4266666666666667, + "Vietnamese,Indonesian,Chinese": 0.49333333333333335, + "Vietnamese,Indonesian,Spanish": 0.46, + "Vietnamese,Indonesian,English": 0.47333333333333333, + "Vietnamese,Chinese,Spanish": 0.4666666666666667, + "Vietnamese,Chinese,English": 0.46, + "Vietnamese,Spanish,English": 0.5, + "Malay,Filipino,Indonesian": 0.46, + "Malay,Filipino,Chinese": 0.43333333333333335, + "Malay,Filipino,Spanish": 0.48, + "Malay,Filipino,English": 0.4533333333333333, + "Malay,Indonesian,Chinese": 0.48, + "Malay,Indonesian,Spanish": 0.52, + "Malay,Indonesian,English": 0.4866666666666667, + "Malay,Chinese,Spanish": 0.4866666666666667, + "Malay,Chinese,English": 0.46, + "Malay,Spanish,English": 0.5333333333333333, + "Filipino,Indonesian,Chinese": 0.4866666666666667, + "Filipino,Indonesian,Spanish": 0.49333333333333335, + "Filipino,Indonesian,English": 0.47333333333333333, + "Filipino,Chinese,Spanish": 0.49333333333333335, + "Filipino,Chinese,English": 0.46, + "Filipino,Spanish,English": 0.54, + "Indonesian,Chinese,Spanish": 0.5466666666666666, + "Indonesian,Chinese,English": 0.52, + "Indonesian,Spanish,English": 0.56, + "Chinese,Spanish,English": 0.5333333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.34, + "Vietnamese,Malay,Filipino,Spanish": 0.36, + "Vietnamese,Malay,Filipino,English": 0.3466666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,English": 0.36666666666666664, + "Vietnamese,Malay,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Chinese,English": 0.36, + "Vietnamese,Malay,Spanish,English": 0.38666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese": 0.38, + "Vietnamese,Filipino,Indonesian,Spanish": 0.35333333333333333, + "Vietnamese,Filipino,Indonesian,English": 0.35333333333333333, + "Vietnamese,Filipino,Chinese,Spanish": 0.35333333333333333, + "Vietnamese,Filipino,Chinese,English": 0.35333333333333333, + "Vietnamese,Filipino,Spanish,English": 0.3933333333333333, + "Vietnamese,Indonesian,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.41333333333333333, + "Vietnamese,Indonesian,Spanish,English": 0.4066666666666667, + "Vietnamese,Chinese,Spanish,English": 0.4066666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.38, + "Malay,Filipino,Indonesian,Spanish": 0.42, + "Malay,Filipino,Indonesian,English": 0.3933333333333333, + "Malay,Filipino,Chinese,Spanish": 0.4, + "Malay,Filipino,Chinese,English": 0.37333333333333335, + "Malay,Filipino,Spanish,English": 0.4266666666666667, + "Malay,Indonesian,Chinese,Spanish": 0.4266666666666667, + "Malay,Indonesian,Chinese,English": 0.4, + "Malay,Indonesian,Spanish,English": 0.4533333333333333, + "Malay,Chinese,Spanish,English": 0.4066666666666667, + "Filipino,Indonesian,Chinese,Spanish": 0.4266666666666667, + "Filipino,Indonesian,Chinese,English": 0.41333333333333333, + "Filipino,Indonesian,Spanish,English": 0.44, + "Filipino,Chinese,Spanish,English": 0.4266666666666667, + "Indonesian,Chinese,Spanish,English": 0.4666666666666667 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.3, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Filipino,Chinese,English": 0.3, + "Vietnamese,Malay,Filipino,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.32666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.32666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.32666666666666666, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.32666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.36666666666666664, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.36, + "Malay,Filipino,Indonesian,Chinese,English": 0.34, + "Malay,Filipino,Indonesian,Spanish,English": 0.38, + "Malay,Filipino,Chinese,Spanish,English": 0.35333333333333333, + "Malay,Indonesian,Chinese,Spanish,English": 0.37333333333333335, + "Filipino,Indonesian,Chinese,Spanish,English": 0.38666666666666666 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.3, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.3, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.32666666666666666 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.26 + } + }, + "AC3_2": 0.6084507041753622, + "AC3_3": 0.5285968027926263, + "AC3_4": 0.4732166890504827, + "AC3_5": 0.4306648574845117, + "AC3_6": 0.39488817886958116, + "AC3_7": 0.36279069763223365 + }, + "prompt_2": { + "overall_acc": 0.5838095238095239, + "language_acc": { + "Vietnamese": 0.5466666666666666, + "Malay": 0.54, + "Filipino": 0.5066666666666667, + "Indonesian": 0.6, + "Chinese": 0.58, + "Spanish": 0.6133333333333333, + "English": 0.7 + }, + "consistency_score_2": 0.5666666666666667, + "consistency_score_3": 0.40647619047619044, + "consistency_score_4": 0.3211428571428572, + "consistency_score_5": 0.266984126984127, + "consistency_score_6": 0.23047619047619047, + "consistency_score_7": 0.20666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5533333333333333, + "Vietnamese,Filipino": 0.46, + "Vietnamese,Indonesian": 0.54, + "Vietnamese,Chinese": 0.62, + "Vietnamese,Spanish": 0.5533333333333333, + "Vietnamese,English": 0.5933333333333334, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Chinese": 0.56, + "Malay,Spanish": 0.5533333333333333, + "Malay,English": 0.5533333333333333, + "Filipino,Indonesian": 0.52, + "Filipino,Chinese": 0.5, + "Filipino,Spanish": 0.5333333333333333, + "Filipino,English": 0.5933333333333334, + "Indonesian,Chinese": 0.6333333333333333, + "Indonesian,Spanish": 0.6133333333333333, + "Indonesian,English": 0.5933333333333334, + "Chinese,Spanish": 0.5533333333333333, + "Chinese,English": 0.5733333333333334, + "Spanish,English": 0.7066666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian": 0.42, + "Vietnamese,Malay,Chinese": 0.4266666666666667, + "Vietnamese,Malay,Spanish": 0.4, + "Vietnamese,Malay,English": 0.4066666666666667, + "Vietnamese,Filipino,Indonesian": 0.32, + "Vietnamese,Filipino,Chinese": 0.3466666666666667, + "Vietnamese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Filipino,English": 0.38, + "Vietnamese,Indonesian,Chinese": 0.44, + "Vietnamese,Indonesian,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian,English": 0.42, + "Vietnamese,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Chinese,English": 0.43333333333333335, + "Vietnamese,Spanish,English": 0.46, + "Malay,Filipino,Indonesian": 0.36666666666666664, + "Malay,Filipino,Chinese": 0.32666666666666666, + "Malay,Filipino,Spanish": 0.37333333333333335, + "Malay,Filipino,English": 0.36666666666666664, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Indonesian,Spanish": 0.4533333333333333, + "Malay,Indonesian,English": 0.4533333333333333, + "Malay,Chinese,Spanish": 0.4066666666666667, + "Malay,Chinese,English": 0.41333333333333333, + "Malay,Spanish,English": 0.4533333333333333, + "Filipino,Indonesian,Chinese": 0.37333333333333335, + "Filipino,Indonesian,Spanish": 0.4066666666666667, + "Filipino,Indonesian,English": 0.3933333333333333, + "Filipino,Chinese,Spanish": 0.36, + "Filipino,Chinese,English": 0.3933333333333333, + "Filipino,Spanish,English": 0.4533333333333333, + "Indonesian,Chinese,Spanish": 0.44666666666666666, + "Indonesian,Chinese,English": 0.46, + "Indonesian,Spanish,English": 0.49333333333333335, + "Chinese,Spanish,English": 0.4666666666666667 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Chinese": 0.26, + "Vietnamese,Malay,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Filipino,English": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Chinese": 0.36, + "Vietnamese,Malay,Indonesian,Spanish": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,English": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Spanish": 0.34, + "Vietnamese,Malay,Chinese,English": 0.3466666666666667, + "Vietnamese,Malay,Spanish,English": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian,Chinese": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.28, + "Vietnamese,Filipino,Indonesian,English": 0.2866666666666667, + "Vietnamese,Filipino,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Chinese,English": 0.3, + "Vietnamese,Filipino,Spanish,English": 0.30666666666666664, + "Vietnamese,Indonesian,Chinese,Spanish": 0.3466666666666667, + "Vietnamese,Indonesian,Chinese,English": 0.36, + "Vietnamese,Indonesian,Spanish,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,English": 0.3466666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.29333333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.32666666666666666, + "Malay,Filipino,Indonesian,English": 0.30666666666666664, + "Malay,Filipino,Chinese,Spanish": 0.28, + "Malay,Filipino,Chinese,English": 0.2866666666666667, + "Malay,Filipino,Spanish,English": 0.34, + "Malay,Indonesian,Chinese,Spanish": 0.36, + "Malay,Indonesian,Chinese,English": 0.35333333333333333, + "Malay,Indonesian,Spanish,English": 0.3933333333333333, + "Malay,Chinese,Spanish,English": 0.36, + "Filipino,Indonesian,Chinese,Spanish": 0.3, + "Filipino,Indonesian,Chinese,English": 0.32, + "Filipino,Indonesian,Spanish,English": 0.35333333333333333, + "Filipino,Chinese,Spanish,English": 0.3333333333333333, + "Indonesian,Chinese,Spanish,English": 0.3933333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.22, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.3, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.29333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.3, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.26, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.26, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.3, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.26, + "Malay,Filipino,Indonesian,Chinese,English": 0.25333333333333335, + "Malay,Filipino,Indonesian,Spanish,English": 0.3, + "Malay,Filipino,Chinese,Spanish,English": 0.26666666666666666, + "Malay,Indonesian,Chinese,Spanish,English": 0.32, + "Filipino,Indonesian,Chinese,Spanish,English": 0.2866666666666667 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.22, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.23333333333333334, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.24666666666666667 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.20666666666666667 + } + }, + "AC3_2": 0.5751103752259492, + "AC3_3": 0.47926506008221453, + "AC3_4": 0.4143560753669075, + "AC3_5": 0.36640582795977655, + "AC3_6": 0.3304839877065599, + "AC3_7": 0.3052690762666025 + }, + "prompt_3": { + "overall_acc": 0.5828571428571427, + "language_acc": { + "Vietnamese": 0.5733333333333334, + "Malay": 0.5466666666666666, + "Filipino": 0.5133333333333333, + "Indonesian": 0.5866666666666667, + "Chinese": 0.5266666666666666, + "Spanish": 0.6333333333333333, + "English": 0.7 + }, + "consistency_score_2": 0.5625396825396826, + "consistency_score_3": 0.40628571428571425, + "consistency_score_4": 0.32228571428571434, + "consistency_score_5": 0.2666666666666667, + "consistency_score_6": 0.2257142857142857, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,Filipino": 0.5266666666666666, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Chinese": 0.5666666666666667, + "Vietnamese,Spanish": 0.5533333333333333, + "Vietnamese,English": 0.5866666666666667, + "Malay,Filipino": 0.5133333333333333, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Chinese": 0.52, + "Malay,Spanish": 0.5733333333333334, + "Malay,English": 0.5133333333333333, + "Filipino,Indonesian": 0.56, + "Filipino,Chinese": 0.49333333333333335, + "Filipino,Spanish": 0.56, + "Filipino,English": 0.5733333333333334, + "Indonesian,Chinese": 0.58, + "Indonesian,Spanish": 0.6133333333333333, + "Indonesian,English": 0.6266666666666667, + "Chinese,Spanish": 0.5266666666666666, + "Chinese,English": 0.5533333333333333, + "Spanish,English": 0.6866666666666666 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.35333333333333333, + "Vietnamese,Malay,Indonesian": 0.4066666666666667, + "Vietnamese,Malay,Chinese": 0.38666666666666666, + "Vietnamese,Malay,Spanish": 0.38666666666666666, + "Vietnamese,Malay,English": 0.38666666666666666, + "Vietnamese,Filipino,Indonesian": 0.38666666666666666, + "Vietnamese,Filipino,Chinese": 0.35333333333333333, + "Vietnamese,Filipino,Spanish": 0.37333333333333335, + "Vietnamese,Filipino,English": 0.4, + "Vietnamese,Indonesian,Chinese": 0.43333333333333335, + "Vietnamese,Indonesian,Spanish": 0.4266666666666667, + "Vietnamese,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,English": 0.4, + "Vietnamese,Spanish,English": 0.44666666666666666, + "Malay,Filipino,Indonesian": 0.4066666666666667, + "Malay,Filipino,Chinese": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.3933333333333333, + "Malay,Filipino,English": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.42, + "Malay,Indonesian,Spanish": 0.43333333333333335, + "Malay,Indonesian,English": 0.4266666666666667, + "Malay,Chinese,Spanish": 0.3933333333333333, + "Malay,Chinese,English": 0.36666666666666664, + "Malay,Spanish,English": 0.43333333333333335, + "Filipino,Indonesian,Chinese": 0.4, + "Filipino,Indonesian,Spanish": 0.41333333333333333, + "Filipino,Indonesian,English": 0.4266666666666667, + "Filipino,Chinese,Spanish": 0.36, + "Filipino,Chinese,English": 0.38666666666666666, + "Filipino,Spanish,English": 0.4533333333333333, + "Indonesian,Chinese,Spanish": 0.41333333333333333, + "Indonesian,Chinese,English": 0.44666666666666666, + "Indonesian,Spanish,English": 0.5066666666666667, + "Chinese,Spanish,English": 0.43333333333333335 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.28, + "Vietnamese,Malay,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Filipino,English": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.3466666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.32666666666666666, + "Vietnamese,Malay,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese,English": 0.3, + "Vietnamese,Malay,Spanish,English": 0.32, + "Vietnamese,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian,English": 0.32666666666666666, + "Vietnamese,Filipino,Chinese,Spanish": 0.28, + "Vietnamese,Filipino,Chinese,English": 0.29333333333333333, + "Vietnamese,Filipino,Spanish,English": 0.32, + "Vietnamese,Indonesian,Chinese,Spanish": 0.34, + "Vietnamese,Indonesian,Chinese,English": 0.36, + "Vietnamese,Indonesian,Spanish,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish,English": 0.32, + "Malay,Filipino,Indonesian,Chinese": 0.32, + "Malay,Filipino,Indonesian,Spanish": 0.34, + "Malay,Filipino,Indonesian,English": 0.3333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.29333333333333333, + "Malay,Filipino,Chinese,English": 0.2866666666666667, + "Malay,Filipino,Spanish,English": 0.32666666666666666, + "Malay,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Malay,Indonesian,Chinese,English": 0.32666666666666666, + "Malay,Indonesian,Spanish,English": 0.38, + "Malay,Chinese,Spanish,English": 0.3333333333333333, + "Filipino,Indonesian,Chinese,Spanish": 0.31333333333333335, + "Filipino,Indonesian,Chinese,English": 0.34, + "Filipino,Indonesian,Spanish,English": 0.36666666666666664, + "Filipino,Chinese,Spanish,English": 0.32, + "Indonesian,Chinese,Spanish,English": 0.36666666666666664 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.2866666666666667, + "Vietnamese,Malay,Chinese,Spanish,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.24, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.29333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.26666666666666666, + "Malay,Filipino,Indonesian,Spanish,English": 0.3, + "Malay,Filipino,Chinese,Spanish,English": 0.26, + "Malay,Indonesian,Chinese,Spanish,English": 0.29333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.2866666666666667 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.22, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.22, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.24 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.19333333333333333 + } + }, + "AC3_2": 0.5725182134438388, + "AC3_3": 0.4788115869780849, + "AC3_4": 0.4150649350190788, + "AC3_5": 0.3659192824681373, + "AC3_6": 0.325411408339361, + "AC3_7": 0.29035582818345107 + }, + "prompt_4": { + "overall_acc": 0.5942857142857143, + "language_acc": { + "Vietnamese": 0.5933333333333334, + "Malay": 0.5333333333333333, + "Filipino": 0.5266666666666666, + "Indonesian": 0.58, + "Chinese": 0.5933333333333334, + "Spanish": 0.62, + "English": 0.7133333333333334 + }, + "consistency_score_2": 0.5923809523809523, + "consistency_score_3": 0.4419047619047618, + "consistency_score_4": 0.3563809523809524, + "consistency_score_5": 0.29873015873015873, + "consistency_score_6": 0.2571428571428572, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.54, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Indonesian": 0.58, + "Vietnamese,Chinese": 0.58, + "Vietnamese,Spanish": 0.6, + "Vietnamese,English": 0.6133333333333333, + "Malay,Filipino": 0.47333333333333333, + "Malay,Indonesian": 0.62, + "Malay,Chinese": 0.5266666666666666, + "Malay,Spanish": 0.6, + "Malay,English": 0.5933333333333334, + "Filipino,Indonesian": 0.5333333333333333, + "Filipino,Chinese": 0.5333333333333333, + "Filipino,Spanish": 0.56, + "Filipino,English": 0.6133333333333333, + "Indonesian,Chinese": 0.66, + "Indonesian,Spanish": 0.6333333333333333, + "Indonesian,English": 0.66, + "Chinese,Spanish": 0.6533333333333333, + "Chinese,English": 0.6066666666666667, + "Spanish,English": 0.7533333333333333 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.34, + "Vietnamese,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Malay,Chinese": 0.37333333333333335, + "Vietnamese,Malay,Spanish": 0.41333333333333333, + "Vietnamese,Malay,English": 0.4266666666666667, + "Vietnamese,Filipino,Indonesian": 0.37333333333333335, + "Vietnamese,Filipino,Chinese": 0.38, + "Vietnamese,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,Filipino,English": 0.41333333333333333, + "Vietnamese,Indonesian,Chinese": 0.4533333333333333, + "Vietnamese,Indonesian,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian,English": 0.47333333333333333, + "Vietnamese,Chinese,Spanish": 0.47333333333333333, + "Vietnamese,Chinese,English": 0.44666666666666666, + "Vietnamese,Spanish,English": 0.52, + "Malay,Filipino,Indonesian": 0.4, + "Malay,Filipino,Chinese": 0.36, + "Malay,Filipino,Spanish": 0.3933333333333333, + "Malay,Filipino,English": 0.41333333333333333, + "Malay,Indonesian,Chinese": 0.43333333333333335, + "Malay,Indonesian,Spanish": 0.47333333333333333, + "Malay,Indonesian,English": 0.4866666666666667, + "Malay,Chinese,Spanish": 0.44666666666666666, + "Malay,Chinese,English": 0.42, + "Malay,Spanish,English": 0.52, + "Filipino,Indonesian,Chinese": 0.42, + "Filipino,Indonesian,Spanish": 0.4266666666666667, + "Filipino,Indonesian,English": 0.44666666666666666, + "Filipino,Chinese,Spanish": 0.43333333333333335, + "Filipino,Chinese,English": 0.4266666666666667, + "Filipino,Spanish,English": 0.5066666666666667, + "Indonesian,Chinese,Spanish": 0.5133333333333333, + "Indonesian,Chinese,English": 0.5, + "Indonesian,Spanish,English": 0.5533333333333333, + "Chinese,Spanish,English": 0.5533333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.3, + "Vietnamese,Malay,Filipino,English": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,Chinese": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Spanish": 0.34, + "Vietnamese,Malay,Indonesian,English": 0.36, + "Vietnamese,Malay,Chinese,Spanish": 0.3466666666666667, + "Vietnamese,Malay,Chinese,English": 0.3333333333333333, + "Vietnamese,Malay,Spanish,English": 0.3933333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian,English": 0.3333333333333333, + "Vietnamese,Filipino,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Filipino,Chinese,English": 0.32666666666666666, + "Vietnamese,Filipino,Spanish,English": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese,Spanish": 0.4, + "Vietnamese,Indonesian,Chinese,English": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish,English": 0.41333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.41333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.32, + "Malay,Filipino,Indonesian,Spanish": 0.34, + "Malay,Filipino,Indonesian,English": 0.35333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.3333333333333333, + "Malay,Filipino,Chinese,English": 0.32666666666666666, + "Malay,Filipino,Spanish,English": 0.37333333333333335, + "Malay,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Malay,Indonesian,Chinese,English": 0.37333333333333335, + "Malay,Indonesian,Spanish,English": 0.43333333333333335, + "Malay,Chinese,Spanish,English": 0.4066666666666667, + "Filipino,Indonesian,Chinese,Spanish": 0.36666666666666664, + "Filipino,Indonesian,Chinese,English": 0.36666666666666664, + "Filipino,Indonesian,Spanish,English": 0.38666666666666666, + "Filipino,Chinese,Spanish,English": 0.38666666666666666, + "Indonesian,Chinese,Spanish,English": 0.46 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Chinese,English": 0.26, + "Vietnamese,Malay,Filipino,Spanish,English": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.3, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.32666666666666666, + "Vietnamese,Malay,Chinese,Spanish,English": 0.32666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2866666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.36, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.3, + "Malay,Filipino,Indonesian,Chinese,English": 0.29333333333333333, + "Malay,Filipino,Indonesian,Spanish,English": 0.32, + "Malay,Filipino,Chinese,Spanish,English": 0.31333333333333335, + "Malay,Indonesian,Chinese,Spanish,English": 0.36, + "Filipino,Indonesian,Chinese,Spanish,English": 0.3333333333333333 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.24, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.29333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.26, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.28 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22666666666666666 + } + }, + "AC3_2": 0.5933318045819653, + "AC3_3": 0.5068907562536022, + "AC3_4": 0.4455654463280347, + "AC3_5": 0.3975989030076327, + "AC3_6": 0.35896452536531825, + "AC3_7": 0.3281670533242952 + }, + "prompt_5": { + "overall_acc": 0.5885714285714286, + "language_acc": { + "Vietnamese": 0.5533333333333333, + "Malay": 0.5533333333333333, + "Filipino": 0.5533333333333333, + "Indonesian": 0.58, + "Chinese": 0.58, + "Spanish": 0.5733333333333334, + "English": 0.7266666666666667 + }, + "consistency_score_2": 0.5828571428571429, + "consistency_score_3": 0.42723809523809525, + "consistency_score_4": 0.3434285714285715, + "consistency_score_5": 0.2911111111111111, + "consistency_score_6": 0.25428571428571434, + "consistency_score_7": 0.22666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5466666666666666, + "Vietnamese,Filipino": 0.5266666666666666, + "Vietnamese,Indonesian": 0.5866666666666667, + "Vietnamese,Chinese": 0.5266666666666666, + "Vietnamese,Spanish": 0.58, + "Vietnamese,English": 0.5866666666666667, + "Malay,Filipino": 0.5333333333333333, + "Malay,Indonesian": 0.6266666666666667, + "Malay,Chinese": 0.58, + "Malay,Spanish": 0.6066666666666667, + "Malay,English": 0.5866666666666667, + "Filipino,Indonesian": 0.56, + "Filipino,Chinese": 0.5333333333333333, + "Filipino,Spanish": 0.6133333333333333, + "Filipino,English": 0.6, + "Indonesian,Chinese": 0.64, + "Indonesian,Spanish": 0.5733333333333334, + "Indonesian,English": 0.62, + "Chinese,Spanish": 0.5866666666666667, + "Chinese,English": 0.56, + "Spanish,English": 0.6666666666666666 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.42, + "Vietnamese,Malay,Chinese": 0.37333333333333335, + "Vietnamese,Malay,Spanish": 0.41333333333333333, + "Vietnamese,Malay,English": 0.4066666666666667, + "Vietnamese,Filipino,Indonesian": 0.3933333333333333, + "Vietnamese,Filipino,Chinese": 0.37333333333333335, + "Vietnamese,Filipino,Spanish": 0.41333333333333333, + "Vietnamese,Filipino,English": 0.4066666666666667, + "Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Vietnamese,Indonesian,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian,English": 0.44666666666666666, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,English": 0.4, + "Vietnamese,Spanish,English": 0.46, + "Malay,Filipino,Indonesian": 0.43333333333333335, + "Malay,Filipino,Chinese": 0.4, + "Malay,Filipino,Spanish": 0.43333333333333335, + "Malay,Filipino,English": 0.41333333333333333, + "Malay,Indonesian,Chinese": 0.46, + "Malay,Indonesian,Spanish": 0.4666666666666667, + "Malay,Indonesian,English": 0.47333333333333333, + "Malay,Chinese,Spanish": 0.44, + "Malay,Chinese,English": 0.41333333333333333, + "Malay,Spanish,English": 0.4866666666666667, + "Filipino,Indonesian,Chinese": 0.42, + "Filipino,Indonesian,Spanish": 0.4266666666666667, + "Filipino,Indonesian,English": 0.43333333333333335, + "Filipino,Chinese,Spanish": 0.4266666666666667, + "Filipino,Chinese,English": 0.41333333333333333, + "Filipino,Spanish,English": 0.48, + "Indonesian,Chinese,Spanish": 0.4533333333333333, + "Indonesian,Chinese,English": 0.46, + "Indonesian,Spanish,English": 0.47333333333333333, + "Chinese,Spanish,English": 0.44 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.32, + "Vietnamese,Malay,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Malay,Filipino,English": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,Chinese": 0.32, + "Vietnamese,Malay,Indonesian,Spanish": 0.34, + "Vietnamese,Malay,Indonesian,English": 0.3466666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese,English": 0.3, + "Vietnamese,Malay,Spanish,English": 0.36, + "Vietnamese,Filipino,Indonesian,Chinese": 0.32, + "Vietnamese,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian,English": 0.34, + "Vietnamese,Filipino,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Chinese,English": 0.31333333333333335, + "Vietnamese,Filipino,Spanish,English": 0.3466666666666667, + "Vietnamese,Indonesian,Chinese,Spanish": 0.34, + "Vietnamese,Indonesian,Chinese,English": 0.36, + "Vietnamese,Indonesian,Spanish,English": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.32, + "Malay,Filipino,Indonesian,Chinese": 0.34, + "Malay,Filipino,Indonesian,Spanish": 0.38, + "Malay,Filipino,Indonesian,English": 0.35333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.3466666666666667, + "Malay,Filipino,Chinese,English": 0.32666666666666666, + "Malay,Filipino,Spanish,English": 0.38, + "Malay,Indonesian,Chinese,Spanish": 0.37333333333333335, + "Malay,Indonesian,Chinese,English": 0.36666666666666664, + "Malay,Indonesian,Spanish,English": 0.41333333333333333, + "Malay,Chinese,Spanish,English": 0.36666666666666664, + "Filipino,Indonesian,Chinese,Spanish": 0.35333333333333333, + "Filipino,Indonesian,Chinese,English": 0.35333333333333333, + "Filipino,Indonesian,Spanish,English": 0.36666666666666664, + "Filipino,Chinese,Spanish,English": 0.36, + "Indonesian,Chinese,Spanish,English": 0.38 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.28, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Chinese,English": 0.26, + "Vietnamese,Malay,Filipino,Spanish,English": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.30666666666666664, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.29333333333333333, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2866666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.29333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.31333333333333335, + "Malay,Filipino,Indonesian,Chinese,English": 0.29333333333333333, + "Malay,Filipino,Indonesian,Spanish,English": 0.3333333333333333, + "Malay,Filipino,Chinese,Spanish,English": 0.30666666666666664, + "Malay,Indonesian,Chinese,Spanish,English": 0.3333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.31333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.24, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.26, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.28 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22666666666666666 + } + }, + "AC3_2": 0.585700348382057, + "AC3_3": 0.49509308612629, + "AC3_4": 0.43376018213792106, + "AC3_5": 0.38954889927004405, + "AC3_6": 0.3551380144857101, + "AC3_7": 0.32728971958602165 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.44237012987012986, + "language_acc": { + "English": 0.5284090909090909, + "Filipino": 0.4602272727272727, + "Vietnamese": 0.38636363636363635, + "Chinese": 0.39204545454545453, + "Indonesian": 0.45454545454545453, + "Malay": 0.4147727272727273, + "Spanish": 0.4602272727272727 + }, + "consistency_score_2": 0.5186688311688311, + "consistency_score_3": 0.33766233766233766, + "consistency_score_4": 0.24139610389610397, + "consistency_score_5": 0.1812770562770563, + "consistency_score_6": 0.14123376623376624, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5681818181818182, + "English,Vietnamese": 0.5056818181818182, + "English,Chinese": 0.48863636363636365, + "English,Indonesian": 0.6193181818181818, + "English,Malay": 0.5852272727272727, + "English,Spanish": 0.7215909090909091, + "Filipino,Vietnamese": 0.4147727272727273, + "Filipino,Chinese": 0.4659090909090909, + "Filipino,Indonesian": 0.5340909090909091, + "Filipino,Malay": 0.48863636363636365, + "Filipino,Spanish": 0.5397727272727273, + "Vietnamese,Chinese": 0.42613636363636365, + "Vietnamese,Indonesian": 0.4943181818181818, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,Spanish": 0.5170454545454546, + "Chinese,Indonesian": 0.4318181818181818, + "Chinese,Malay": 0.4147727272727273, + "Chinese,Spanish": 0.4431818181818182, + "Indonesian,Malay": 0.6420454545454546, + "Indonesian,Spanish": 0.5511363636363636, + "Malay,Spanish": 0.5625 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.30113636363636365, + "English,Filipino,Chinese": 0.32386363636363635, + "English,Filipino,Indonesian": 0.4090909090909091, + "English,Filipino,Malay": 0.39204545454545453, + "English,Filipino,Spanish": 0.4602272727272727, + "English,Vietnamese,Chinese": 0.2840909090909091, + "English,Vietnamese,Indonesian": 0.3693181818181818, + "English,Vietnamese,Malay": 0.3522727272727273, + "English,Vietnamese,Spanish": 0.4090909090909091, + "English,Chinese,Indonesian": 0.32954545454545453, + "English,Chinese,Malay": 0.30113636363636365, + "English,Chinese,Spanish": 0.35795454545454547, + "English,Indonesian,Malay": 0.45454545454545453, + "English,Indonesian,Spanish": 0.48863636363636365, + "English,Malay,Spanish": 0.4715909090909091, + "Filipino,Vietnamese,Chinese": 0.23863636363636365, + "Filipino,Vietnamese,Indonesian": 0.29545454545454547, + "Filipino,Vietnamese,Malay": 0.26704545454545453, + "Filipino,Vietnamese,Spanish": 0.29545454545454547, + "Filipino,Chinese,Indonesian": 0.2840909090909091, + "Filipino,Chinese,Malay": 0.2727272727272727, + "Filipino,Chinese,Spanish": 0.30113636363636365, + "Filipino,Indonesian,Malay": 0.38636363636363635, + "Filipino,Indonesian,Spanish": 0.3693181818181818, + "Filipino,Malay,Spanish": 0.3693181818181818, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Malay": 0.23863636363636365, + "Vietnamese,Chinese,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay": 0.36363636363636365, + "Vietnamese,Indonesian,Spanish": 0.3352272727272727, + "Vietnamese,Malay,Spanish": 0.32386363636363635, + "Chinese,Indonesian,Malay": 0.30113636363636365, + "Chinese,Indonesian,Spanish": 0.2727272727272727, + "Chinese,Malay,Spanish": 0.2727272727272727, + "Indonesian,Malay,Spanish": 0.4147727272727273 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.19886363636363635, + "English,Filipino,Vietnamese,Indonesian": 0.25, + "English,Filipino,Vietnamese,Malay": 0.22727272727272727, + "English,Filipino,Vietnamese,Spanish": 0.26136363636363635, + "English,Filipino,Chinese,Indonesian": 0.24431818181818182, + "English,Filipino,Chinese,Malay": 0.23295454545454544, + "English,Filipino,Chinese,Spanish": 0.26704545454545453, + "English,Filipino,Indonesian,Malay": 0.3125, + "English,Filipino,Indonesian,Spanish": 0.3409090909090909, + "English,Filipino,Malay,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Chinese,Malay": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.3125, + "English,Vietnamese,Malay,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Chinese,Malay,Spanish": 0.23863636363636365, + "English,Indonesian,Malay,Spanish": 0.3806818181818182, + "Filipino,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Malay": 0.18181818181818182, + "Filipino,Vietnamese,Chinese,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "Filipino,Vietnamese,Malay,Spanish": 0.21022727272727273, + "Filipino,Chinese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Chinese,Indonesian,Spanish": 0.2215909090909091, + "Filipino,Chinese,Malay,Spanish": 0.2159090909090909, + "Filipino,Indonesian,Malay,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish": 0.26704545454545453, + "Chinese,Indonesian,Malay,Spanish": 0.21022727272727273 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "English,Filipino,Vietnamese,Chinese,Malay": 0.1590909090909091, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "English,Filipino,Vietnamese,Malay,Spanish": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian,Malay": 0.1875, + "English,Filipino,Chinese,Indonesian,Spanish": 0.21022727272727273, + "English,Filipino,Chinese,Malay,Spanish": 0.19886363636363635, + "English,Filipino,Indonesian,Malay,Spanish": 0.26704545454545453, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Malay,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.25, + "English,Chinese,Indonesian,Malay,Spanish": 0.19886363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.14772727272727273, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1875, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363 + } + }, + "AC3_2": 0.4774907313472965, + "AC3_3": 0.38298849948466585, + "AC3_4": 0.3123477602187465, + "AC3_5": 0.2571696199266482, + "AC3_6": 0.2141096038503851, + "AC3_7": 0.18082282677570657 + }, + "prompt_2": { + "overall_acc": 0.4391233766233767, + "language_acc": { + "English": 0.5284090909090909, + "Filipino": 0.4318181818181818, + "Vietnamese": 0.3977272727272727, + "Chinese": 0.39204545454545453, + "Indonesian": 0.45454545454545453, + "Malay": 0.3977272727272727, + "Spanish": 0.4715909090909091 + }, + "consistency_score_2": 0.5048701298701299, + "consistency_score_3": 0.3222402597402597, + "consistency_score_4": 0.22662337662337673, + "consistency_score_5": 0.16856060606060605, + "consistency_score_6": 0.1314935064935065, + "consistency_score_7": 0.10795454545454546, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5454545454545454, + "English,Vietnamese": 0.5170454545454546, + "English,Chinese": 0.5227272727272727, + "English,Indonesian": 0.5965909090909091, + "English,Malay": 0.5795454545454546, + "English,Spanish": 0.7556818181818182, + "Filipino,Vietnamese": 0.42045454545454547, + "Filipino,Chinese": 0.4090909090909091, + "Filipino,Indonesian": 0.5227272727272727, + "Filipino,Malay": 0.4375, + "Filipino,Spanish": 0.5113636363636364, + "Vietnamese,Chinese": 0.38636363636363635, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Malay": 0.4659090909090909, + "Vietnamese,Spanish": 0.5, + "Chinese,Indonesian": 0.42613636363636365, + "Chinese,Malay": 0.3977272727272727, + "Chinese,Spanish": 0.4659090909090909, + "Indonesian,Malay": 0.5965909090909091, + "Indonesian,Spanish": 0.5340909090909091, + "Malay,Spanish": 0.5056818181818182 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3068181818181818, + "English,Filipino,Chinese": 0.29545454545454547, + "English,Filipino,Indonesian": 0.38636363636363635, + "English,Filipino,Malay": 0.3409090909090909, + "English,Filipino,Spanish": 0.44886363636363635, + "English,Vietnamese,Chinese": 0.2840909090909091, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Malay": 0.3522727272727273, + "English,Vietnamese,Spanish": 0.4318181818181818, + "English,Chinese,Indonesian": 0.32954545454545453, + "English,Chinese,Malay": 0.3068181818181818, + "English,Chinese,Spanish": 0.4090909090909091, + "English,Indonesian,Malay": 0.42613636363636365, + "English,Indonesian,Spanish": 0.48863636363636365, + "English,Malay,Spanish": 0.4659090909090909, + "Filipino,Vietnamese,Chinese": 0.20454545454545456, + "Filipino,Vietnamese,Indonesian": 0.2897727272727273, + "Filipino,Vietnamese,Malay": 0.24431818181818182, + "Filipino,Vietnamese,Spanish": 0.29545454545454547, + "Filipino,Chinese,Indonesian": 0.26136363636363635, + "Filipino,Chinese,Malay": 0.20454545454545456, + "Filipino,Chinese,Spanish": 0.2840909090909091, + "Filipino,Indonesian,Malay": 0.3125, + "Filipino,Indonesian,Spanish": 0.3409090909090909, + "Filipino,Malay,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Spanish": 0.26136363636363635, + "Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Spanish": 0.32954545454545453, + "Vietnamese,Malay,Spanish": 0.3068181818181818, + "Chinese,Indonesian,Malay": 0.2897727272727273, + "Chinese,Indonesian,Spanish": 0.2840909090909091, + "Chinese,Malay,Spanish": 0.26704545454545453, + "Indonesian,Malay,Spanish": 0.35795454545454547 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.17613636363636365, + "English,Filipino,Vietnamese,Indonesian": 0.2556818181818182, + "English,Filipino,Vietnamese,Malay": 0.20454545454545456, + "English,Filipino,Vietnamese,Spanish": 0.2784090909090909, + "English,Filipino,Chinese,Indonesian": 0.2215909090909091, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Spanish": 0.26704545454545453, + "English,Filipino,Indonesian,Malay": 0.25, + "English,Filipino,Indonesian,Spanish": 0.32954545454545453, + "English,Filipino,Malay,Spanish": 0.29545454545454547, + "English,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Spanish": 0.24431818181818182, + "English,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "English,Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "English,Vietnamese,Malay,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Chinese,Indonesian,Spanish": 0.2840909090909091, + "English,Chinese,Malay,Spanish": 0.26136363636363635, + "English,Indonesian,Malay,Spanish": 0.3522727272727273, + "Filipino,Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Malay": 0.11931818181818182, + "Filipino,Vietnamese,Chinese,Spanish": 0.17045454545454544, + "Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Filipino,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "Filipino,Vietnamese,Malay,Spanish": 0.19886363636363635, + "Filipino,Chinese,Indonesian,Malay": 0.17045454545454544, + "Filipino,Chinese,Indonesian,Spanish": 0.20454545454545456, + "Filipino,Chinese,Malay,Spanish": 0.17045454545454544, + "Filipino,Indonesian,Malay,Spanish": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Malay,Spanish": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Spanish": 0.23295454545454544, + "Chinese,Indonesian,Malay,Spanish": 0.2159090909090909 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "English,Filipino,Vietnamese,Chinese,Malay": 0.11363636363636363, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.17045454545454544, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "English,Filipino,Vietnamese,Malay,Spanish": 0.1875, + "English,Filipino,Chinese,Indonesian,Malay": 0.14772727272727273, + "English,Filipino,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Filipino,Chinese,Malay,Spanish": 0.17045454545454544, + "English,Filipino,Indonesian,Malay,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Malay,Spanish": 0.1590909090909091, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.23295454545454544, + "English,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.11363636363636363, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11363636363636363, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13636363636363635 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11363636363636363, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.10795454545454546 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.10795454545454546 + } + }, + "AC3_2": 0.46970720589992493, + "AC3_3": 0.3717099797368511, + "AC3_4": 0.2989593921844207, + "AC3_5": 0.24360985181776007, + "AC3_6": 0.20238403129593216, + "AC3_7": 0.17330388451107887 + }, + "prompt_3": { + "overall_acc": 0.43019480519480513, + "language_acc": { + "English": 0.5113636363636364, + "Filipino": 0.4090909090909091, + "Vietnamese": 0.39204545454545453, + "Chinese": 0.3806818181818182, + "Indonesian": 0.45454545454545453, + "Malay": 0.4034090909090909, + "Spanish": 0.4602272727272727 + }, + "consistency_score_2": 0.503517316017316, + "consistency_score_3": 0.32840909090909093, + "consistency_score_4": 0.24107142857142858, + "consistency_score_5": 0.18912337662337664, + "consistency_score_6": 0.15503246753246752, + "consistency_score_7": 0.13068181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5284090909090909, + "English,Vietnamese": 0.5056818181818182, + "English,Chinese": 0.5, + "English,Indonesian": 0.625, + "English,Malay": 0.5852272727272727, + "English,Spanish": 0.6988636363636364, + "Filipino,Vietnamese": 0.4375, + "Filipino,Chinese": 0.39204545454545453, + "Filipino,Indonesian": 0.48863636363636365, + "Filipino,Malay": 0.45454545454545453, + "Filipino,Spanish": 0.4772727272727273, + "Vietnamese,Chinese": 0.4375, + "Vietnamese,Indonesian": 0.48863636363636365, + "Vietnamese,Malay": 0.48863636363636365, + "Vietnamese,Spanish": 0.5227272727272727, + "Chinese,Indonesian": 0.4659090909090909, + "Chinese,Malay": 0.4375, + "Chinese,Spanish": 0.4375, + "Indonesian,Malay": 0.5965909090909091, + "Indonesian,Spanish": 0.5284090909090909, + "Malay,Spanish": 0.4772727272727273 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.2897727272727273, + "English,Filipino,Chinese": 0.2897727272727273, + "English,Filipino,Indonesian": 0.3806818181818182, + "English,Filipino,Malay": 0.32954545454545453, + "English,Filipino,Spanish": 0.4090909090909091, + "English,Vietnamese,Chinese": 0.3068181818181818, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Malay": 0.3522727272727273, + "English,Vietnamese,Spanish": 0.4147727272727273, + "English,Chinese,Indonesian": 0.35795454545454547, + "English,Chinese,Malay": 0.32954545454545453, + "English,Chinese,Spanish": 0.375, + "English,Indonesian,Malay": 0.4375, + "English,Indonesian,Spanish": 0.48295454545454547, + "English,Malay,Spanish": 0.4375, + "Filipino,Vietnamese,Chinese": 0.24431818181818182, + "Filipino,Vietnamese,Indonesian": 0.2897727272727273, + "Filipino,Vietnamese,Malay": 0.2727272727272727, + "Filipino,Vietnamese,Spanish": 0.30113636363636365, + "Filipino,Chinese,Indonesian": 0.2784090909090909, + "Filipino,Chinese,Malay": 0.25, + "Filipino,Chinese,Spanish": 0.26136363636363635, + "Filipino,Indonesian,Malay": 0.3181818181818182, + "Filipino,Indonesian,Spanish": 0.32954545454545453, + "Filipino,Malay,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "Vietnamese,Chinese,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Vietnamese,Indonesian,Spanish": 0.3352272727272727, + "Vietnamese,Malay,Spanish": 0.32386363636363635, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.29545454545454547, + "Chinese,Malay,Spanish": 0.2840909090909091, + "Indonesian,Malay,Spanish": 0.36363636363636365 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian": 0.24431818181818182, + "English,Filipino,Vietnamese,Malay": 0.2159090909090909, + "English,Filipino,Vietnamese,Spanish": 0.2556818181818182, + "English,Filipino,Chinese,Indonesian": 0.24431818181818182, + "English,Filipino,Chinese,Malay": 0.20454545454545456, + "English,Filipino,Chinese,Spanish": 0.25, + "English,Filipino,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Indonesian,Spanish": 0.3125, + "English,Filipino,Malay,Spanish": 0.26704545454545453, + "English,Vietnamese,Chinese,Indonesian": 0.23295454545454544, + "English,Vietnamese,Chinese,Malay": 0.22727272727272727, + "English,Vietnamese,Chinese,Spanish": 0.26136363636363635, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "English,Vietnamese,Malay,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.2727272727272727, + "English,Chinese,Indonesian,Spanish": 0.29545454545454547, + "English,Chinese,Malay,Spanish": 0.2727272727272727, + "English,Indonesian,Malay,Spanish": 0.35795454545454547, + "Filipino,Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "Filipino,Vietnamese,Chinese,Malay": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Spanish": 0.1875, + "Filipino,Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "Filipino,Vietnamese,Malay,Spanish": 0.21022727272727273, + "Filipino,Chinese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Chinese,Indonesian,Spanish": 0.2159090909090909, + "Filipino,Chinese,Malay,Spanish": 0.1875, + "Filipino,Indonesian,Malay,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Malay,Spanish": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Spanish": 0.25, + "Chinese,Indonesian,Malay,Spanish": 0.23863636363636365 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Filipino,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.17613636363636365, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Vietnamese,Malay,Spanish": 0.1875, + "English,Filipino,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Chinese,Malay,Spanish": 0.18181818181818182, + "English,Filipino,Indonesian,Malay,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Vietnamese,Chinese,Malay,Spanish": 0.20454545454545456, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Malay,Spanish": 0.23863636363636365, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.14772727272727273, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.14204545454545456, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818 + } + }, + "AC3_2": 0.4639771268512254, + "AC3_3": 0.3724733964452681, + "AC3_4": 0.308991190142257, + "AC3_5": 0.262740208634875, + "AC3_6": 0.22792568038326644, + "AC3_7": 0.2004670437742832 + }, + "prompt_4": { + "overall_acc": 0.43181818181818177, + "language_acc": { + "English": 0.5340909090909091, + "Filipino": 0.42613636363636365, + "Vietnamese": 0.3806818181818182, + "Chinese": 0.4090909090909091, + "Indonesian": 0.4318181818181818, + "Malay": 0.4090909090909091, + "Spanish": 0.4318181818181818 + }, + "consistency_score_2": 0.5105519480519481, + "consistency_score_3": 0.3303571428571429, + "consistency_score_4": 0.23717532467532462, + "consistency_score_5": 0.1810064935064935, + "consistency_score_6": 0.1444805194805195, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5170454545454546, + "English,Vietnamese": 0.5113636363636364, + "English,Chinese": 0.5227272727272727, + "English,Indonesian": 0.6022727272727273, + "English,Malay": 0.5738636363636364, + "English,Spanish": 0.7102272727272727, + "Filipino,Vietnamese": 0.4318181818181818, + "Filipino,Chinese": 0.4375, + "Filipino,Indonesian": 0.5170454545454546, + "Filipino,Malay": 0.4431818181818182, + "Filipino,Spanish": 0.5113636363636364, + "Vietnamese,Chinese": 0.44886363636363635, + "Vietnamese,Indonesian": 0.5113636363636364, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,Spanish": 0.5227272727272727, + "Chinese,Indonesian": 0.42613636363636365, + "Chinese,Malay": 0.4318181818181818, + "Chinese,Spanish": 0.45454545454545453, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Spanish": 0.5397727272727273, + "Malay,Spanish": 0.5113636363636364 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.2897727272727273, + "English,Filipino,Chinese": 0.3068181818181818, + "English,Filipino,Indonesian": 0.375, + "English,Filipino,Malay": 0.3352272727272727, + "English,Filipino,Spanish": 0.4318181818181818, + "English,Vietnamese,Chinese": 0.29545454545454547, + "English,Vietnamese,Indonesian": 0.3693181818181818, + "English,Vietnamese,Malay": 0.36363636363636365, + "English,Vietnamese,Spanish": 0.4147727272727273, + "English,Chinese,Indonesian": 0.3409090909090909, + "English,Chinese,Malay": 0.32954545454545453, + "English,Chinese,Spanish": 0.375, + "English,Indonesian,Malay": 0.4431818181818182, + "English,Indonesian,Spanish": 0.4772727272727273, + "English,Malay,Spanish": 0.4431818181818182, + "Filipino,Vietnamese,Chinese": 0.24431818181818182, + "Filipino,Vietnamese,Indonesian": 0.2897727272727273, + "Filipino,Vietnamese,Malay": 0.2556818181818182, + "Filipino,Vietnamese,Spanish": 0.3125, + "Filipino,Chinese,Indonesian": 0.26704545454545453, + "Filipino,Chinese,Malay": 0.24431818181818182, + "Filipino,Chinese,Spanish": 0.29545454545454547, + "Filipino,Indonesian,Malay": 0.3465909090909091, + "Filipino,Indonesian,Spanish": 0.3352272727272727, + "Filipino,Malay,Spanish": 0.3125, + "Vietnamese,Chinese,Indonesian": 0.26704545454545453, + "Vietnamese,Chinese,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,Spanish": 0.3522727272727273, + "Vietnamese,Malay,Spanish": 0.3181818181818182, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Indonesian,Spanish": 0.2784090909090909, + "Chinese,Malay,Spanish": 0.26704545454545453, + "Indonesian,Malay,Spanish": 0.375 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.1875, + "English,Filipino,Vietnamese,Indonesian": 0.23863636363636365, + "English,Filipino,Vietnamese,Malay": 0.21022727272727273, + "English,Filipino,Vietnamese,Spanish": 0.26704545454545453, + "English,Filipino,Chinese,Indonesian": 0.23863636363636365, + "English,Filipino,Chinese,Malay": 0.2159090909090909, + "English,Filipino,Chinese,Spanish": 0.26136363636363635, + "English,Filipino,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Indonesian,Spanish": 0.3181818181818182, + "English,Filipino,Malay,Spanish": 0.2897727272727273, + "English,Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "English,Vietnamese,Chinese,Malay": 0.2159090909090909, + "English,Vietnamese,Chinese,Spanish": 0.23863636363636365, + "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "English,Vietnamese,Malay,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian,Malay": 0.2727272727272727, + "English,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Chinese,Malay,Spanish": 0.24431818181818182, + "English,Indonesian,Malay,Spanish": 0.3522727272727273, + "Filipino,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Malay": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Spanish": 0.19886363636363635, + "Filipino,Vietnamese,Indonesian,Malay": 0.21022727272727273, + "Filipino,Vietnamese,Indonesian,Spanish": 0.23295454545454544, + "Filipino,Vietnamese,Malay,Spanish": 0.20454545454545456, + "Filipino,Chinese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Chinese,Indonesian,Spanish": 0.20454545454545456, + "Filipino,Chinese,Malay,Spanish": 0.19318181818181818, + "Filipino,Indonesian,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.2556818181818182, + "Chinese,Indonesian,Malay,Spanish": 0.2215909090909091 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.17613636363636365, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "English,Filipino,Vietnamese,Malay,Spanish": 0.1875, + "English,Filipino,Chinese,Indonesian,Malay": 0.1875, + "English,Filipino,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Filipino,Chinese,Malay,Spanish": 0.1875, + "English,Filipino,Indonesian,Malay,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Malay,Spanish": 0.17045454545454544, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13068181818181818, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + } + }, + "AC3_2": 0.46789601435799133, + "AC3_3": 0.37433439824694564, + "AC3_4": 0.30618120045719954, + "AC3_5": 0.25508729676752623, + "AC3_6": 0.2165172854937997, + "AC3_7": 0.18697282095951462 + }, + "prompt_5": { + "overall_acc": 0.4391233766233767, + "language_acc": { + "English": 0.5284090909090909, + "Filipino": 0.4318181818181818, + "Vietnamese": 0.3977272727272727, + "Chinese": 0.42045454545454547, + "Indonesian": 0.4431818181818182, + "Malay": 0.42613636363636365, + "Spanish": 0.42613636363636365 + }, + "consistency_score_2": 0.5170454545454546, + "consistency_score_3": 0.34042207792207796, + "consistency_score_4": 0.2521103896103896, + "consistency_score_5": 0.20102813852813858, + "consistency_score_6": 0.16883116883116883, + "consistency_score_7": 0.14772727272727273, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.5340909090909091, + "English,Vietnamese": 0.5454545454545454, + "English,Chinese": 0.5170454545454546, + "English,Indonesian": 0.5738636363636364, + "English,Malay": 0.5568181818181818, + "English,Spanish": 0.7159090909090909, + "Filipino,Vietnamese": 0.48295454545454547, + "Filipino,Chinese": 0.4602272727272727, + "Filipino,Indonesian": 0.5340909090909091, + "Filipino,Malay": 0.48295454545454547, + "Filipino,Spanish": 0.5397727272727273, + "Vietnamese,Chinese": 0.4147727272727273, + "Vietnamese,Indonesian": 0.5170454545454546, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,Spanish": 0.5397727272727273, + "Chinese,Indonesian": 0.4659090909090909, + "Chinese,Malay": 0.42613636363636365, + "Chinese,Spanish": 0.45454545454545453, + "Indonesian,Malay": 0.5795454545454546, + "Indonesian,Spanish": 0.5397727272727273, + "Malay,Spanish": 0.5 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.32954545454545453, + "English,Filipino,Chinese": 0.3181818181818182, + "English,Filipino,Indonesian": 0.3693181818181818, + "English,Filipino,Malay": 0.35795454545454547, + "English,Filipino,Spanish": 0.4318181818181818, + "English,Vietnamese,Chinese": 0.30113636363636365, + "English,Vietnamese,Indonesian": 0.3806818181818182, + "English,Vietnamese,Malay": 0.3693181818181818, + "English,Vietnamese,Spanish": 0.4431818181818182, + "English,Chinese,Indonesian": 0.3465909090909091, + "English,Chinese,Malay": 0.32386363636363635, + "English,Chinese,Spanish": 0.38636363636363635, + "English,Indonesian,Malay": 0.4034090909090909, + "English,Indonesian,Spanish": 0.4659090909090909, + "English,Malay,Spanish": 0.4431818181818182, + "Filipino,Vietnamese,Chinese": 0.26136363636363635, + "Filipino,Vietnamese,Indonesian": 0.3409090909090909, + "Filipino,Vietnamese,Malay": 0.29545454545454547, + "Filipino,Vietnamese,Spanish": 0.3465909090909091, + "Filipino,Chinese,Indonesian": 0.30113636363636365, + "Filipino,Chinese,Malay": 0.2727272727272727, + "Filipino,Chinese,Spanish": 0.30113636363636365, + "Filipino,Indonesian,Malay": 0.3409090909090909, + "Filipino,Indonesian,Spanish": 0.36363636363636365, + "Filipino,Malay,Spanish": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "Vietnamese,Chinese,Malay": 0.24431818181818182, + "Vietnamese,Chinese,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "Vietnamese,Malay,Spanish": 0.32954545454545453, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.3068181818181818, + "Chinese,Malay,Spanish": 0.2784090909090909, + "Indonesian,Malay,Spanish": 0.3522727272727273 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2159090909090909, + "English,Filipino,Vietnamese,Indonesian": 0.2727272727272727, + "English,Filipino,Vietnamese,Malay": 0.2556818181818182, + "English,Filipino,Vietnamese,Spanish": 0.2897727272727273, + "English,Filipino,Chinese,Indonesian": 0.24431818181818182, + "English,Filipino,Chinese,Malay": 0.23295454545454544, + "English,Filipino,Chinese,Spanish": 0.26704545454545453, + "English,Filipino,Indonesian,Malay": 0.26136363636363635, + "English,Filipino,Indonesian,Spanish": 0.3181818181818182, + "English,Filipino,Malay,Spanish": 0.30113636363636365, + "English,Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Chinese,Spanish": 0.2556818181818182, + "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "English,Vietnamese,Indonesian,Spanish": 0.3352272727272727, + "English,Vietnamese,Malay,Spanish": 0.3068181818181818, + "English,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Chinese,Indonesian,Spanish": 0.2897727272727273, + "English,Chinese,Malay,Spanish": 0.26704545454545453, + "English,Indonesian,Malay,Spanish": 0.32954545454545453, + "Filipino,Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "Filipino,Vietnamese,Chinese,Malay": 0.19318181818181818, + "Filipino,Vietnamese,Chinese,Spanish": 0.21022727272727273, + "Filipino,Vietnamese,Indonesian,Malay": 0.25, + "Filipino,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.23295454545454544, + "Filipino,Chinese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Chinese,Indonesian,Spanish": 0.23295454545454544, + "Filipino,Chinese,Malay,Spanish": 0.2159090909090909, + "Filipino,Indonesian,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.26136363636363635, + "Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.17613636363636365, + "English,Filipino,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.19886363636363635, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.2159090909090909, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.25, + "English,Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Indonesian,Malay": 0.1875, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Chinese,Malay,Spanish": 0.21022727272727273, + "English,Filipino,Indonesian,Malay,Spanish": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.25, + "English,Chinese,Indonesian,Malay,Spanish": 0.2215909090909091, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.16477272727272727, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.21022727272727273, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14772727272727273 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14772727272727273 + } + }, + "AC3_2": 0.4749093223766325, + "AC3_3": 0.38352424850524586, + "AC3_4": 0.32031874297672414, + "AC3_5": 0.27579769127658627, + "AC3_6": 0.2438922892460309, + "AC3_7": 0.22108009552375105 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5728155339805825 + }, + "prompt_2": { + "accuracy": 0.5922330097087378 + }, + "prompt_3": { + "accuracy": 0.5922330097087378 + }, + "prompt_4": { + "accuracy": 0.6213592233009708 + }, + "prompt_5": { + "accuracy": 0.5922330097087378 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.2571428571428571 + }, + "prompt_3": { + "accuracy": 0.29523809523809524 + }, + "prompt_4": { + "accuracy": 0.3142857142857143 + }, + "prompt_5": { + "accuracy": 0.2857142857142857 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6728971962616822 + }, + "prompt_2": { + "accuracy": 0.6542056074766355 + }, + "prompt_3": { + "accuracy": 0.6728971962616822 + }, + "prompt_4": { + "accuracy": 0.6915887850467289 + }, + "prompt_5": { + "accuracy": 0.6728971962616822 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.6, + "demographics": 0.0, + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.8, + "culture": 0.4, + "film": 0.5, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.45, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.6, + "history": 0.13333333333333333, + "literature": 0.2, + "politics": 0.9, + "culture": 0.5, + "film": 0.5, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_3": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.7, + "culture": 0.6, + "film": 0.4, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.47, + "category_acc": { + "brand": 0.5, + "demographics": 0.6, + "biology": 0.4, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.9, + "culture": 0.4, + "film": 0.5, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.5, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.7, + "culture": 0.5, + "film": 0.5, + "law": 0.4, + "geography": 0.7 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.25259519002554215 + }, + "prompt_2": { + "bleu_score": 0.2330912217496277 + }, + "prompt_3": { + "bleu_score": 0.23155185313667795 + }, + "prompt_4": { + "bleu_score": 0.22534838939314503 + }, + "prompt_5": { + "bleu_score": 0.2124562824864275 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4166499766339542, + "category_acc": { + "History": 0.3654618473895582, + "Geography": 0.33877551020408164, + "Lampungic": 0.3741496598639456, + "Social science": 0.656093489148581, + "Balinese": 0.30148619957537154, + "Makassarese": 0.34408602150537637, + "Banjarese": 0.3125, + "Chemistry": 0.24671532846715327, + "Biology": 0.3668639053254438, + "Science": 0.5799793601651186, + "Christian religion": 0.46766169154228854, + "Art": 0.5108153078202995, + "Islam religion": 0.4850640113798009, + "Hindu religion": 0.4, + "Madurese": 0.3423728813559322, + "Sport": 0.3716216216216216, + "Indonesian language": 0.4735367372353674, + "Physics": 0.3939393939393939, + "Minangkabau culture": 0.36683417085427134, + "Dayak language": 0.27522935779816515, + "Sociology": 0.3709677419354839, + "Economy": 0.35655737704918034, + "Sundanese": 0.337942955920484, + "Javanese": 0.3034274193548387, + "Civic education": 0.4663805436337625 + } + }, + "prompt_2": { + "accuracy": 0.4250617531210361, + "category_acc": { + "History": 0.3714859437751004, + "Geography": 0.3816326530612245, + "Lampungic": 0.3877551020408163, + "Social science": 0.67779632721202, + "Balinese": 0.29723991507430997, + "Makassarese": 0.34946236559139787, + "Banjarese": 0.3819444444444444, + "Chemistry": 0.25985401459854013, + "Biology": 0.34911242603550297, + "Science": 0.5593395252837977, + "Christian religion": 0.4975124378109453, + "Art": 0.4908485856905158, + "Islam religion": 0.49359886201991465, + "Hindu religion": 0.46, + "Madurese": 0.29152542372881357, + "Sport": 0.38513513513513514, + "Indonesian language": 0.488480697384807, + "Physics": 0.40606060606060607, + "Minangkabau culture": 0.32160804020100503, + "Dayak language": 0.27522935779816515, + "Sociology": 0.3951612903225806, + "Economy": 0.36885245901639346, + "Sundanese": 0.34572169403630076, + "Javanese": 0.32056451612903225, + "Civic education": 0.49356223175965663 + } + }, + "prompt_3": { + "accuracy": 0.41778489885840175, + "category_acc": { + "History": 0.3654618473895582, + "Geography": 0.35714285714285715, + "Lampungic": 0.3741496598639456, + "Social science": 0.679465776293823, + "Balinese": 0.27388535031847133, + "Makassarese": 0.3655913978494624, + "Banjarese": 0.3194444444444444, + "Chemistry": 0.27153284671532846, + "Biology": 0.35384615384615387, + "Science": 0.5851393188854489, + "Christian religion": 0.48756218905472637, + "Art": 0.46921797004991683, + "Islam religion": 0.5021337126600285, + "Hindu religion": 0.38, + "Madurese": 0.2576271186440678, + "Sport": 0.41216216216216217, + "Indonesian language": 0.476027397260274, + "Physics": 0.40606060606060607, + "Minangkabau culture": 0.33668341708542715, + "Dayak language": 0.29357798165137616, + "Sociology": 0.3588709677419355, + "Economy": 0.35450819672131145, + "Sundanese": 0.36041486603284356, + "Javanese": 0.2963709677419355, + "Civic education": 0.4663805436337625 + } + }, + "prompt_4": { + "accuracy": 0.4221243073636424, + "category_acc": { + "History": 0.35542168674698793, + "Geography": 0.3816326530612245, + "Lampungic": 0.3197278911564626, + "Social science": 0.6978297161936561, + "Balinese": 0.28874734607218683, + "Makassarese": 0.3333333333333333, + "Banjarese": 0.3472222222222222, + "Chemistry": 0.25985401459854013, + "Biology": 0.357396449704142, + "Science": 0.5789473684210527, + "Christian religion": 0.48258706467661694, + "Art": 0.47920133111480867, + "Islam religion": 0.5092460881934566, + "Hindu religion": 0.4266666666666667, + "Madurese": 0.3016949152542373, + "Sport": 0.42567567567567566, + "Indonesian language": 0.4828767123287671, + "Physics": 0.4202020202020202, + "Minangkabau culture": 0.3165829145728643, + "Dayak language": 0.30275229357798167, + "Sociology": 0.3649193548387097, + "Economy": 0.3668032786885246, + "Sundanese": 0.334485738980121, + "Javanese": 0.31048387096774194, + "Civic education": 0.48068669527896996 + } + }, + "prompt_5": { + "accuracy": 0.4134454903531611, + "category_acc": { + "History": 0.35542168674698793, + "Geography": 0.3510204081632653, + "Lampungic": 0.35374149659863946, + "Social science": 0.67779632721202, + "Balinese": 0.27176220806794055, + "Makassarese": 0.3172043010752688, + "Banjarese": 0.3541666666666667, + "Chemistry": 0.25255474452554744, + "Biology": 0.3349112426035503, + "Science": 0.5593395252837977, + "Christian religion": 0.47761194029850745, + "Art": 0.4875207986688852, + "Islam religion": 0.4950213371266003, + "Hindu religion": 0.44666666666666666, + "Madurese": 0.31186440677966104, + "Sport": 0.3716216216216216, + "Indonesian language": 0.4735367372353674, + "Physics": 0.4101010101010101, + "Minangkabau culture": 0.32160804020100503, + "Dayak language": 0.3119266055045872, + "Sociology": 0.3528225806451613, + "Economy": 0.35860655737704916, + "Sundanese": 0.3353500432152118, + "Javanese": 0.3094758064516129, + "Civic education": 0.474964234620887 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.33300390246575023 + }, + "prompt_2": { + "bleu_score": 0.34674112370474053 + }, + "prompt_3": { + "bleu_score": 0.34387993201923095 + }, + "prompt_4": { + "bleu_score": 0.34698977924130403 + }, + "prompt_5": { + "bleu_score": 0.3369188272083297 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.274444315024161 + }, + "prompt_2": { + "bleu_score": 0.2889501163371845 + }, + "prompt_3": { + "bleu_score": 0.2846942084178902 + }, + "prompt_4": { + "bleu_score": 0.2872952831071052 + }, + "prompt_5": { + "bleu_score": 0.2763493733862075 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.20740875967603606 + }, + "prompt_2": { + "bleu_score": 0.21254922040711977 + }, + "prompt_3": { + "bleu_score": 0.2115340825408013 + }, + "prompt_4": { + "bleu_score": 0.21459259570199105 + }, + "prompt_5": { + "bleu_score": 0.2052531321409591 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.34818077124088853 + }, + "prompt_2": { + "bleu_score": 0.3571418474707277 + }, + "prompt_3": { + "bleu_score": 0.35689746343996687 + }, + "prompt_4": { + "bleu_score": 0.35632465039273786 + }, + "prompt_5": { + "bleu_score": 0.3433176562680806 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6126021003500584 + }, + "prompt_2": { + "accuracy": 0.5892648774795799 + }, + "prompt_3": { + "accuracy": 0.6126021003500584 + }, + "prompt_4": { + "accuracy": 0.6056009334889149 + }, + "prompt_5": { + "accuracy": 0.6079346557759626 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5809081158383983, + "category_acc": { + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.6477272727272727, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.7076271186440678, + "virology": 0.49696969696969695, + "high_school_microeconomics": 0.6329113924050633, + "econometrics": 0.46017699115044247, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7540453074433657, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.41637010676156583, + "philosophy": 0.6225806451612903, + "professional_medicine": 0.5940959409594095, + "nutrition": 0.6131147540983607, + "global_facts": 0.23232323232323232, + "machine_learning": 0.36936936936936937, + "security_studies": 0.6926229508196722, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.5826513911620295, + "prehistory": 0.6594427244582043, + "anatomy": 0.5373134328358209, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5523255813953488, + "high_school_government_and_politics": 0.8385416666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.691358024691358, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.5437665782493368, + "human_aging": 0.6621621621621622, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.8014705882352942, + "formal_logic": 0.376, + "high_school_statistics": 0.4186046511627907, + "international_law": 0.7833333333333333, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.5512820512820513, + "miscellaneous": 0.7915601023017903, + "high_school_chemistry": 0.4306930693069307, + "marketing": 0.8412017167381974, + "professional_law": 0.4233529028049576, + "management": 0.7647058823529411, + "college_physics": 0.4158415841584158, + "jurisprudence": 0.719626168224299, + "world_religions": 0.8352941176470589, + "sociology": 0.8, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.6246786632390745, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.32102908277404923, + "moral_disputes": 0.6550724637681159, + "electrical_engineering": 0.5555555555555556, + "astronomy": 0.6291390728476821, + "college_biology": 0.6853146853146853 + } + }, + "prompt_2": { + "accuracy": 0.5845548802288166, + "category_acc": { + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.5252525252525253, + "clinical_knowledge": 0.6818181818181818, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.7245762711864406, + "virology": 0.48484848484848486, + "high_school_microeconomics": 0.6329113924050633, + "econometrics": 0.415929203539823, + "college_computer_science": 0.45454545454545453, + "high_school_biology": 0.7216828478964401, + "abstract_algebra": 0.25252525252525254, + "professional_accounting": 0.43416370106761565, + "philosophy": 0.6225806451612903, + "professional_medicine": 0.6088560885608856, + "nutrition": 0.6131147540983607, + "global_facts": 0.31313131313131315, + "machine_learning": 0.40540540540540543, + "security_studies": 0.6721311475409836, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.5842880523731587, + "prehistory": 0.6656346749226006, + "anatomy": 0.5597014925373134, + "human_sexuality": 0.6307692307692307, + "college_medicine": 0.5465116279069767, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.7160493827160493, + "high_school_geography": 0.7461928934010152, + "elementary_mathematics": 0.583554376657825, + "human_aging": 0.6711711711711712, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.8033088235294118, + "formal_logic": 0.408, + "high_school_statistics": 0.42790697674418604, + "international_law": 0.7833333333333333, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7979539641943734, + "high_school_chemistry": 0.4158415841584158, + "marketing": 0.8497854077253219, + "professional_law": 0.4155251141552511, + "management": 0.7843137254901961, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.7289719626168224, + "world_religions": 0.8117647058823529, + "sociology": 0.82, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.6298200514138818, + "computer_security": 0.7070707070707071, + "moral_scenarios": 0.34451901565995524, + "moral_disputes": 0.6376811594202898, + "electrical_engineering": 0.5625, + "astronomy": 0.5960264900662252, + "college_biology": 0.6783216783216783 + } + }, + "prompt_3": { + "accuracy": 0.5863425098319628, + "category_acc": { + "high_school_european_history": 0.6707317073170732, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.6742424242424242, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.7161016949152542, + "virology": 0.4909090909090909, + "high_school_microeconomics": 0.6244725738396625, + "econometrics": 0.4336283185840708, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.7281553398058253, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.41637010676156583, + "philosophy": 0.6483870967741936, + "professional_medicine": 0.6051660516605166, + "nutrition": 0.6131147540983607, + "global_facts": 0.36363636363636365, + "machine_learning": 0.42342342342342343, + "security_studies": 0.680327868852459, + "public_relations": 0.6330275229357798, + "professional_psychology": 0.5875613747954174, + "prehistory": 0.6687306501547987, + "anatomy": 0.5597014925373134, + "human_sexuality": 0.6076923076923076, + "college_medicine": 0.5872093023255814, + "high_school_government_and_politics": 0.8385416666666666, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.7098765432098766, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.5755968169761273, + "human_aging": 0.6801801801801802, + "college_mathematics": 0.3939393939393939, + "high_school_psychology": 0.8106617647058824, + "formal_logic": 0.384, + "high_school_statistics": 0.4372093023255814, + "international_law": 0.775, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.5858585858585859, + "conceptual_physics": 0.5341880341880342, + "miscellaneous": 0.7979539641943734, + "high_school_chemistry": 0.4207920792079208, + "marketing": 0.8454935622317596, + "professional_law": 0.4187866927592955, + "management": 0.7941176470588235, + "college_physics": 0.42574257425742573, + "jurisprudence": 0.7383177570093458, + "world_religions": 0.8294117647058824, + "sociology": 0.805, + "us_foreign_policy": 0.8181818181818182, + "high_school_macroeconomics": 0.609254498714653, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.3288590604026846, + "moral_disputes": 0.6521739130434783, + "electrical_engineering": 0.5486111111111112, + "astronomy": 0.6026490066225165, + "college_biology": 0.6783216783216783 + } + }, + "prompt_4": { + "accuracy": 0.5881301394351091, + "category_acc": { + "high_school_european_history": 0.676829268292683, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.678030303030303, + "medical_genetics": 0.6565656565656566, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.7033898305084746, + "virology": 0.4909090909090909, + "high_school_microeconomics": 0.6371308016877637, + "econometrics": 0.415929203539823, + "college_computer_science": 0.494949494949495, + "high_school_biology": 0.7281553398058253, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.4092526690391459, + "philosophy": 0.6451612903225806, + "professional_medicine": 0.6162361623616236, + "nutrition": 0.6491803278688525, + "global_facts": 0.32323232323232326, + "machine_learning": 0.35135135135135137, + "security_studies": 0.6967213114754098, + "public_relations": 0.6422018348623854, + "professional_psychology": 0.5957446808510638, + "prehistory": 0.6780185758513931, + "anatomy": 0.5373134328358209, + "human_sexuality": 0.6692307692307692, + "college_medicine": 0.5872093023255814, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.6851851851851852, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.5809018567639257, + "human_aging": 0.6846846846846847, + "college_mathematics": 0.2727272727272727, + "high_school_psychology": 0.8051470588235294, + "formal_logic": 0.392, + "high_school_statistics": 0.4511627906976744, + "international_law": 0.7833333333333333, + "high_school_mathematics": 0.21561338289962825, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.5427350427350427, + "miscellaneous": 0.7979539641943734, + "high_school_chemistry": 0.43564356435643564, + "marketing": 0.8540772532188842, + "professional_law": 0.42400521852576645, + "management": 0.7745098039215687, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.8235294117647058, + "sociology": 0.815, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.6272493573264781, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.3221476510067114, + "moral_disputes": 0.6695652173913044, + "electrical_engineering": 0.5416666666666666, + "astronomy": 0.6291390728476821, + "college_biology": 0.6783216783216783 + } + }, + "prompt_5": { + "accuracy": 0.586271004647837, + "category_acc": { + "high_school_european_history": 0.6646341463414634, + "business_ethics": 0.5151515151515151, + "clinical_knowledge": 0.6515151515151515, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.7438423645320197, + "high_school_physics": 0.32, + "high_school_world_history": 0.7076271186440678, + "virology": 0.4909090909090909, + "high_school_microeconomics": 0.6413502109704642, + "econometrics": 0.4336283185840708, + "college_computer_science": 0.5050505050505051, + "high_school_biology": 0.7249190938511327, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.42704626334519574, + "philosophy": 0.6451612903225806, + "professional_medicine": 0.5867158671586716, + "nutrition": 0.6229508196721312, + "global_facts": 0.35353535353535354, + "machine_learning": 0.38738738738738737, + "security_studies": 0.7049180327868853, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.5859247135842881, + "prehistory": 0.6780185758513931, + "anatomy": 0.5447761194029851, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5872093023255814, + "high_school_government_and_politics": 0.8333333333333334, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.7037037037037037, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.6047745358090185, + "human_aging": 0.6621621621621622, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.8033088235294118, + "formal_logic": 0.408, + "high_school_statistics": 0.4511627906976744, + "international_law": 0.7666666666666667, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.5555555555555556, + "miscellaneous": 0.7992327365728901, + "high_school_chemistry": 0.43564356435643564, + "marketing": 0.8412017167381974, + "professional_law": 0.42400521852576645, + "management": 0.7647058823529411, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.7009345794392523, + "world_religions": 0.8352941176470589, + "sociology": 0.81, + "us_foreign_policy": 0.8282828282828283, + "high_school_macroeconomics": 0.6195372750642674, + "computer_security": 0.696969696969697, + "moral_scenarios": 0.3243847874720358, + "moral_disputes": 0.6463768115942029, + "electrical_engineering": 0.5555555555555556, + "astronomy": 0.6225165562913907, + "college_biology": 0.6503496503496503 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3789004457652303 + }, + "prompt_2": { + "accuracy": 0.36775631500742945 + }, + "prompt_3": { + "accuracy": 0.3848439821693908 + }, + "prompt_4": { + "accuracy": 0.38410104011887075 + }, + "prompt_5": { + "accuracy": 0.39450222882615155 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.41469489414694893, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.42857142857142855, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.47619047619047616, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.5416666666666666, + "middle_school_biology": 0.6538461538461539, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.4, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.4166666666666667, + "business_administration": 0.2894736842105263, + "marxism": 0.5416666666666666, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4411764705882353, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.5833333333333334, + "high_school_geography": 0.5, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.625, + "logic": 0.5185185185185185, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.52, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.36538461538461536, + "sports_science": 0.3333333333333333, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.3333333333333333, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.5, + "tax_accountant": 0.37037037037037035, + "physician": 0.42592592592592593 + } + }, + "prompt_2": { + "accuracy": 0.41594022415940224, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.5, + "college_physics": 0.25, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.5238095238095238, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.375, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.7307692307692307, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.4166666666666667, + "business_administration": 0.34210526315789475, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4482758620689655, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.6122448979591837, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.5185185185185185, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.375, + "high_school_history": 0.4, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.34615384615384615, + "sports_science": 0.3333333333333333, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.5098039215686274, + "accountant": 0.3148148148148148, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.4074074074074074, + "physician": 0.4444444444444444 + } + }, + "prompt_3": { + "accuracy": 0.41344956413449563, + "category_acc": { + "computer_network": 0.2916666666666667, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.5238095238095238, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.5238095238095238, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.43478260869565216, + "high_school_physics": 0.25, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.4583333333333333, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.375, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.36666666666666664, + "business_administration": 0.4473684210526316, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.673469387755102, + "high_school_politics": 0.5, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.5882352941176471, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.37037037037037035, + "law": 0.4827586206896552, + "chinese_language_and_literature": 0.32142857142857145, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.48, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.36538461538461536, + "sports_science": 0.2916666666666667, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.3148148148148148, + "fire_engineer": 0.2222222222222222, + "environmental_impact_assessment_engineer": 0.5277777777777778, + "tax_accountant": 0.37037037037037035, + "physician": 0.4074074074074074 + } + }, + "prompt_4": { + "accuracy": 0.40597758405977585, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.38461538461538464, + "college_programming": 0.5714285714285714, + "college_physics": 0.25, + "college_chemistry": 0.27586206896551724, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.6923076923076923, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.3, + "business_administration": 0.39473684210526316, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.5, + "teacher_qualification": 0.5918367346938775, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.6153846153846154, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.4444444444444444, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.42105263157894735, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.32142857142857145, + "high_school_chinese": 0.25, + "high_school_history": 0.56, + "middle_school_history": 0.4074074074074074, + "civil_servant": 0.36538461538461536, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.5, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.37037037037037035, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.3888888888888889, + "physician": 0.3888888888888889 + } + }, + "prompt_5": { + "accuracy": 0.3904109589041096, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.3076923076923077, + "college_programming": 0.5238095238095238, + "college_physics": 0.25, + "college_chemistry": 0.20689655172413793, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.6206896551724138, + "high_school_mathematics": 0.43478260869565216, + "high_school_physics": 0.16666666666666666, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.4583333333333333, + "middle_school_chemistry": 0.44, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.34210526315789475, + "marxism": 0.4166666666666667, + "mao_zedong_thought": 0.41379310344827586, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.6530612244897959, + "high_school_politics": 0.5, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.6538461538461539, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.2857142857142857, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.5555555555555556, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.3684210526315789, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.25, + "high_school_chinese": 0.3333333333333333, + "high_school_history": 0.52, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.3269230769230769, + "sports_science": 0.375, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.375, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.37254901960784315, + "accountant": 0.3333333333333333, + "fire_engineer": 0.2777777777777778, + "environmental_impact_assessment_engineer": 0.4444444444444444, + "tax_accountant": 0.24074074074074073, + "physician": 0.3888888888888889 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4551971326164875 + }, + "prompt_2": { + "accuracy": 0.4767025089605735 + }, + "prompt_3": { + "accuracy": 0.4910394265232975 + }, + "prompt_4": { + "accuracy": 0.44086021505376344 + }, + "prompt_5": { + "accuracy": 0.46236559139784944 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4194439647729235, + "category_acc": { + "agronomy": 0.4437869822485207, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.27439024390243905, + "arts": 0.46875, + "astronomy": 0.2606060606060606, + "business_ethics": 0.5023923444976076, + "chinese_civil_service_exam": 0.35625, + "chinese_driving_rule": 0.5801526717557252, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.3925233644859813, + "chinese_history": 0.4458204334365325, + "chinese_literature": 0.3235294117647059, + "chinese_teacher_qualification": 0.49162011173184356, + "clinical_knowledge": 0.29535864978902954, + "college_actuarial_science": 0.19811320754716982, + "college_education": 0.40186915887850466, + "college_engineering_hydrology": 0.46226415094339623, + "college_law": 0.32407407407407407, + "college_mathematics": 0.2, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.326007326007326, + "computer_science": 0.49019607843137253, + "computer_security": 0.49707602339181284, + "conceptual_physics": 0.4013605442176871, + "construction_project_management": 0.4244604316546763, + "economics": 0.5408805031446541, + "education": 0.49693251533742333, + "electrical_engineering": 0.45930232558139533, + "elementary_chinese": 0.28174603174603174, + "elementary_commonsense": 0.4595959595959596, + "elementary_information_and_technology": 0.6890756302521008, + "elementary_mathematics": 0.33043478260869563, + "ethnology": 0.3925925925925926, + "food_science": 0.46153846153846156, + "genetics": 0.3977272727272727, + "global_facts": 0.4899328859060403, + "high_school_biology": 0.34911242603550297, + "high_school_chemistry": 0.2878787878787879, + "high_school_geography": 0.3305084745762712, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.5317460317460317, + "international_law": 0.4, + "journalism": 0.5116279069767442, + "jurisprudence": 0.41849148418491483, + "legal_and_moral_basis": 0.6915887850467289, + "logical": 0.4065040650406504, + "machine_learning": 0.36885245901639346, + "management": 0.5285714285714286, + "marketing": 0.5888888888888889, + "marxist_theory": 0.49206349206349204, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.47586206896551725, + "philosophy": 0.3619047619047619, + "professional_accounting": 0.46285714285714286, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.2925531914893617, + "professional_psychology": 0.4224137931034483, + "public_relations": 0.47701149425287354, + "security_study": 0.4888888888888889, + "sociology": 0.49557522123893805, + "sports_science": 0.49696969696969695, + "traditional_chinese_medicine": 0.2972972972972973, + "virology": 0.46153846153846156, + "world_history": 0.4720496894409938, + "world_religions": 0.49375 + } + }, + "prompt_2": { + "accuracy": 0.42298394059747885, + "category_acc": { + "agronomy": 0.5029585798816568, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.25609756097560976, + "arts": 0.45625, + "astronomy": 0.36363636363636365, + "business_ethics": 0.5167464114832536, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.5725190839694656, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.37383177570093457, + "chinese_history": 0.4241486068111455, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.4860335195530726, + "clinical_knowledge": 0.31223628691983124, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.4672897196261682, + "college_engineering_hydrology": 0.4716981132075472, + "college_law": 0.28703703703703703, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.34798534798534797, + "computer_science": 0.5196078431372549, + "computer_security": 0.5029239766081871, + "conceptual_physics": 0.3877551020408163, + "construction_project_management": 0.45323741007194246, + "economics": 0.5660377358490566, + "education": 0.5276073619631901, + "electrical_engineering": 0.4941860465116279, + "elementary_chinese": 0.23412698412698413, + "elementary_commonsense": 0.3939393939393939, + "elementary_information_and_technology": 0.7016806722689075, + "elementary_mathematics": 0.3782608695652174, + "ethnology": 0.4074074074074074, + "food_science": 0.4405594405594406, + "genetics": 0.4034090909090909, + "global_facts": 0.46308724832214765, + "high_school_biology": 0.33727810650887574, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3813559322033898, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.3, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.5634920634920635, + "international_law": 0.3621621621621622, + "journalism": 0.4418604651162791, + "jurisprudence": 0.4257907542579075, + "legal_and_moral_basis": 0.6822429906542056, + "logical": 0.43902439024390244, + "machine_learning": 0.4180327868852459, + "management": 0.49523809523809526, + "marketing": 0.5888888888888889, + "marxist_theory": 0.48677248677248675, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.4689655172413793, + "philosophy": 0.4, + "professional_accounting": 0.49142857142857144, + "professional_law": 0.3080568720379147, + "professional_medicine": 0.31648936170212766, + "professional_psychology": 0.44396551724137934, + "public_relations": 0.5057471264367817, + "security_study": 0.5185185185185185, + "sociology": 0.48672566371681414, + "sports_science": 0.4909090909090909, + "traditional_chinese_medicine": 0.3675675675675676, + "virology": 0.4970414201183432, + "world_history": 0.4472049689440994, + "world_religions": 0.4625 + } + }, + "prompt_3": { + "accuracy": 0.42557416681056814, + "category_acc": { + "agronomy": 0.46153846153846156, + "anatomy": 0.3108108108108108, + "ancient_chinese": 0.23780487804878048, + "arts": 0.425, + "astronomy": 0.3333333333333333, + "business_ethics": 0.49282296650717705, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.5801526717557252, + "chinese_food_culture": 0.2867647058823529, + "chinese_foreign_policy": 0.4392523364485981, + "chinese_history": 0.42105263157894735, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.5195530726256983, + "clinical_knowledge": 0.32489451476793246, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.411214953271028, + "college_engineering_hydrology": 0.5, + "college_law": 0.3055555555555556, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.3018867924528302, + "college_medicine": 0.32967032967032966, + "computer_science": 0.5147058823529411, + "computer_security": 0.5263157894736842, + "conceptual_physics": 0.3673469387755102, + "construction_project_management": 0.4316546762589928, + "economics": 0.5723270440251572, + "education": 0.5030674846625767, + "electrical_engineering": 0.45930232558139533, + "elementary_chinese": 0.2698412698412698, + "elementary_commonsense": 0.4494949494949495, + "elementary_information_and_technology": 0.6680672268907563, + "elementary_mathematics": 0.3565217391304348, + "ethnology": 0.45185185185185184, + "food_science": 0.4405594405594406, + "genetics": 0.4034090909090909, + "global_facts": 0.5234899328859061, + "high_school_biology": 0.3431952662721893, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3559322033898305, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.3, + "high_school_politics": 0.40559440559440557, + "human_sexuality": 0.5317460317460317, + "international_law": 0.3783783783783784, + "journalism": 0.5116279069767442, + "jurisprudence": 0.44038929440389296, + "legal_and_moral_basis": 0.6635514018691588, + "logical": 0.43902439024390244, + "machine_learning": 0.39344262295081966, + "management": 0.5, + "marketing": 0.5944444444444444, + "marxist_theory": 0.5079365079365079, + "modern_chinese": 0.2672413793103448, + "nutrition": 0.4689655172413793, + "philosophy": 0.4666666666666667, + "professional_accounting": 0.49142857142857144, + "professional_law": 0.3412322274881517, + "professional_medicine": 0.300531914893617, + "professional_psychology": 0.46551724137931033, + "public_relations": 0.5172413793103449, + "security_study": 0.4962962962962963, + "sociology": 0.5132743362831859, + "sports_science": 0.45454545454545453, + "traditional_chinese_medicine": 0.35135135135135137, + "virology": 0.44970414201183434, + "world_history": 0.484472049689441, + "world_religions": 0.525 + } + }, + "prompt_4": { + "accuracy": 0.41754446554999136, + "category_acc": { + "agronomy": 0.40236686390532544, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.27439024390243905, + "arts": 0.45625, + "astronomy": 0.28484848484848485, + "business_ethics": 0.47368421052631576, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.5572519083969466, + "chinese_food_culture": 0.375, + "chinese_foreign_policy": 0.411214953271028, + "chinese_history": 0.4241486068111455, + "chinese_literature": 0.3480392156862745, + "chinese_teacher_qualification": 0.5027932960893855, + "clinical_knowledge": 0.270042194092827, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.4528301886792453, + "college_law": 0.35185185185185186, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.3333333333333333, + "computer_science": 0.5, + "computer_security": 0.5146198830409356, + "conceptual_physics": 0.35374149659863946, + "construction_project_management": 0.48201438848920863, + "economics": 0.5220125786163522, + "education": 0.4723926380368098, + "electrical_engineering": 0.4011627906976744, + "elementary_chinese": 0.2619047619047619, + "elementary_commonsense": 0.3939393939393939, + "elementary_information_and_technology": 0.6722689075630253, + "elementary_mathematics": 0.34347826086956523, + "ethnology": 0.37777777777777777, + "food_science": 0.43356643356643354, + "genetics": 0.44886363636363635, + "global_facts": 0.46308724832214765, + "high_school_biology": 0.3136094674556213, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.3644067796610169, + "high_school_mathematics": 0.3048780487804878, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.43356643356643354, + "human_sexuality": 0.5555555555555556, + "international_law": 0.3783783783783784, + "journalism": 0.5, + "jurisprudence": 0.45498783454987834, + "legal_and_moral_basis": 0.7102803738317757, + "logical": 0.4065040650406504, + "machine_learning": 0.4262295081967213, + "management": 0.5333333333333333, + "marketing": 0.5888888888888889, + "marxist_theory": 0.4444444444444444, + "modern_chinese": 0.21551724137931033, + "nutrition": 0.503448275862069, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.44571428571428573, + "professional_law": 0.3459715639810427, + "professional_medicine": 0.31648936170212766, + "professional_psychology": 0.4051724137931034, + "public_relations": 0.5, + "security_study": 0.43703703703703706, + "sociology": 0.4646017699115044, + "sports_science": 0.47878787878787876, + "traditional_chinese_medicine": 0.3027027027027027, + "virology": 0.4911242603550296, + "world_history": 0.4658385093167702, + "world_religions": 0.49375 + } + }, + "prompt_5": { + "accuracy": 0.41996201001554134, + "category_acc": { + "agronomy": 0.44970414201183434, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.23780487804878048, + "arts": 0.4625, + "astronomy": 0.2787878787878788, + "business_ethics": 0.5311004784688995, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.6030534351145038, + "chinese_food_culture": 0.34558823529411764, + "chinese_foreign_policy": 0.411214953271028, + "chinese_history": 0.4674922600619195, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.3459915611814346, + "college_actuarial_science": 0.20754716981132076, + "college_education": 0.4485981308411215, + "college_engineering_hydrology": 0.4811320754716981, + "college_law": 0.35185185185185186, + "college_mathematics": 0.3523809523809524, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.32967032967032966, + "computer_science": 0.47549019607843135, + "computer_security": 0.5087719298245614, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.41007194244604317, + "economics": 0.5283018867924528, + "education": 0.5030674846625767, + "electrical_engineering": 0.46511627906976744, + "elementary_chinese": 0.25793650793650796, + "elementary_commonsense": 0.41414141414141414, + "elementary_information_and_technology": 0.7016806722689075, + "elementary_mathematics": 0.34782608695652173, + "ethnology": 0.3925925925925926, + "food_science": 0.45454545454545453, + "genetics": 0.42613636363636365, + "global_facts": 0.4697986577181208, + "high_school_biology": 0.34911242603550297, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.20121951219512196, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.5158730158730159, + "international_law": 0.3837837837837838, + "journalism": 0.4883720930232558, + "jurisprudence": 0.40389294403892945, + "legal_and_moral_basis": 0.6962616822429907, + "logical": 0.3902439024390244, + "machine_learning": 0.38524590163934425, + "management": 0.5333333333333333, + "marketing": 0.5888888888888889, + "marxist_theory": 0.47619047619047616, + "modern_chinese": 0.25862068965517243, + "nutrition": 0.496551724137931, + "philosophy": 0.37142857142857144, + "professional_accounting": 0.4685714285714286, + "professional_law": 0.3033175355450237, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.47413793103448276, + "public_relations": 0.47701149425287354, + "security_study": 0.4888888888888889, + "sociology": 0.5132743362831859, + "sports_science": 0.46060606060606063, + "traditional_chinese_medicine": 0.3081081081081081, + "virology": 0.4556213017751479, + "world_history": 0.4968944099378882, + "world_religions": 0.49375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48484848484848486 + }, + "prompt_2": { + "accuracy": 0.42424242424242425 + }, + "prompt_3": { + "accuracy": 0.42424242424242425 + }, + "prompt_4": { + "accuracy": 0.48484848484848486 + }, + "prompt_5": { + "accuracy": 0.42424242424242425 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6272727272727273 + }, + "prompt_2": { + "accuracy": 0.6477272727272727 + }, + "prompt_3": { + "accuracy": 0.65 + }, + "prompt_4": { + "accuracy": 0.5863636363636363 + }, + "prompt_5": { + "accuracy": 0.6204545454545455 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5169491525423728 + }, + "prompt_2": { + "accuracy": 0.4901694915254237 + }, + "prompt_3": { + "accuracy": 0.4976271186440678 + }, + "prompt_4": { + "accuracy": 0.5271186440677966 + }, + "prompt_5": { + "accuracy": 0.543728813559322 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7004487658937921 + }, + "prompt_2": { + "accuracy": 0.6967090501121914 + }, + "prompt_3": { + "accuracy": 0.6824981301421092 + }, + "prompt_4": { + "accuracy": 0.6985789080029918 + }, + "prompt_5": { + "accuracy": 0.6989528795811518 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8309652131308183 + }, + "prompt_2": { + "accuracy": 0.8358647721705047 + }, + "prompt_3": { + "accuracy": 0.8476237138657521 + }, + "prompt_4": { + "accuracy": 0.8241058304752572 + }, + "prompt_5": { + "accuracy": 0.8515433610975012 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3455937638723135, + "rouge2": 0.1367141527415547, + "rougeL": 0.26613206428097186, + "avg_rouge": 0.24947999363161336 + }, + "prompt_2": { + "rouge1": 0.3667400429531901, + "rouge2": 0.14410179756159538, + "rougeL": 0.282366600688299, + "avg_rouge": 0.26440281373436153 + }, + "prompt_3": { + "rouge1": 0.35446893551068426, + "rouge2": 0.13568480092306537, + "rougeL": 0.27211522434961444, + "avg_rouge": 0.2540896535944547 + }, + "prompt_4": { + "rouge1": 0.36007589706546567, + "rouge2": 0.13933483937420435, + "rougeL": 0.27681507851527654, + "avg_rouge": 0.25874193831831555 + }, + "prompt_5": { + "rouge1": 0.37135420990671286, + "rouge2": 0.14284337070537023, + "rougeL": 0.2875633854119042, + "avg_rouge": 0.2672536553413291 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.23034539485268415, + "rouge2": 0.06202021750119662, + "rougeL": 0.16944752768058982, + "avg_rouge": 0.15393771334482353 + }, + "prompt_2": { + "rouge1": 0.2288671664941608, + "rouge2": 0.06134916054981929, + "rougeL": 0.16823698009150542, + "avg_rouge": 0.15281776904516184 + }, + "prompt_3": { + "rouge1": 0.22901254893773276, + "rouge2": 0.06121842982381441, + "rougeL": 0.1684449641848823, + "avg_rouge": 0.15289198098214316 + }, + "prompt_4": { + "rouge1": 0.23004222056916593, + "rouge2": 0.06172121115761395, + "rougeL": 0.16932696202140132, + "avg_rouge": 0.1536967979160604 + }, + "prompt_5": { + "rouge1": 0.2267561130964018, + "rouge2": 0.0586519572314283, + "rougeL": 0.1687863988309379, + "avg_rouge": 0.151398156386256 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.9380733944954128 + }, + "prompt_2": { + "accuracy": 0.9357798165137615 + }, + "prompt_3": { + "accuracy": 0.9438073394495413 + }, + "prompt_4": { + "accuracy": 0.9323394495412844 + }, + "prompt_5": { + "accuracy": 0.9036697247706422 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7612655800575263 + }, + "prompt_2": { + "accuracy": 0.75071907957814 + }, + "prompt_3": { + "accuracy": 0.7833173537871524 + }, + "prompt_4": { + "accuracy": 0.8024928092042186 + }, + "prompt_5": { + "accuracy": 0.7737296260786194 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8065 + }, + "prompt_2": { + "accuracy": 0.7915 + }, + "prompt_3": { + "accuracy": 0.789 + }, + "prompt_4": { + "accuracy": 0.78 + }, + "prompt_5": { + "accuracy": 0.7845 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.695 + }, + "prompt_2": { + "accuracy": 0.696 + }, + "prompt_3": { + "accuracy": 0.6985 + }, + "prompt_4": { + "accuracy": 0.6825 + }, + "prompt_5": { + "accuracy": 0.7045 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8665 + }, + "prompt_2": { + "accuracy": 0.874 + }, + "prompt_3": { + "accuracy": 0.8715 + }, + "prompt_4": { + "accuracy": 0.8725 + }, + "prompt_5": { + "accuracy": 0.8625 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7183098591549296 + }, + "prompt_2": { + "accuracy": 0.7323943661971831 + }, + "prompt_3": { + "accuracy": 0.7464788732394366 + }, + "prompt_4": { + "accuracy": 0.8028169014084507 + }, + "prompt_5": { + "accuracy": 0.7323943661971831 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7833935018050542 + }, + "prompt_2": { + "accuracy": 0.7472924187725631 + }, + "prompt_3": { + "accuracy": 0.8231046931407943 + }, + "prompt_4": { + "accuracy": 0.7725631768953068 + }, + "prompt_5": { + "accuracy": 0.776173285198556 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7647058823529411 + }, + "prompt_2": { + "accuracy": 0.7941176470588235 + }, + "prompt_3": { + "accuracy": 0.7573529411764706 + }, + "prompt_4": { + "accuracy": 0.7916666666666666 + }, + "prompt_5": { + "accuracy": 0.8063725490196079 + } } }, "five_shot": { @@ -14876,235 +131738,3250 @@ "model_link": "https://huggingface.co/Qwen/Qwen1.5-7B", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.5352380952380952, + "language_acc": { + "Vietnamese": 0.5133333333333333, + "Malay": 0.4666666666666667, + "Filipino": 0.41333333333333333, + "Indonesian": 0.52, + "Chinese": 0.6, + "Spanish": 0.5866666666666667, + "English": 0.6466666666666666 + }, + "consistency_score_2": 0.5746031746031748, + "consistency_score_3": 0.40933333333333327, + "consistency_score_4": 0.31561904761904763, + "consistency_score_5": 0.2549206349206349, + "consistency_score_6": 0.21238095238095236, + "consistency_score_7": 0.18, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.6133333333333333, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Indonesian": 0.6266666666666667, + "Vietnamese,Chinese": 0.54, + "Vietnamese,Spanish": 0.58, + "Vietnamese,English": 0.62, + "Malay,Filipino": 0.5533333333333333, + "Malay,Indonesian": 0.62, + "Malay,Chinese": 0.5, + "Malay,Spanish": 0.5266666666666666, + "Malay,English": 0.5466666666666666, + "Filipino,Indonesian": 0.5866666666666667, + "Filipino,Chinese": 0.5266666666666666, + "Filipino,Spanish": 0.44666666666666666, + "Filipino,English": 0.52, + "Indonesian,Chinese": 0.6133333333333333, + "Indonesian,Spanish": 0.5733333333333334, + "Indonesian,English": 0.64, + "Chinese,Spanish": 0.64, + "Chinese,English": 0.64, + "Spanish,English": 0.64 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.4, + "Vietnamese,Malay,Indonesian": 0.4866666666666667, + "Vietnamese,Malay,Chinese": 0.38666666666666666, + "Vietnamese,Malay,Spanish": 0.4, + "Vietnamese,Malay,English": 0.43333333333333335, + "Vietnamese,Filipino,Indonesian": 0.42, + "Vietnamese,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Filipino,English": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.4266666666666667, + "Vietnamese,Indonesian,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian,English": 0.47333333333333333, + "Vietnamese,Chinese,Spanish": 0.41333333333333333, + "Vietnamese,Chinese,English": 0.42, + "Vietnamese,Spanish,English": 0.44666666666666666, + "Malay,Filipino,Indonesian": 0.43333333333333335, + "Malay,Filipino,Chinese": 0.36, + "Malay,Filipino,Spanish": 0.32, + "Malay,Filipino,English": 0.36666666666666664, + "Malay,Indonesian,Chinese": 0.43333333333333335, + "Malay,Indonesian,Spanish": 0.43333333333333335, + "Malay,Indonesian,English": 0.4533333333333333, + "Malay,Chinese,Spanish": 0.36666666666666664, + "Malay,Chinese,English": 0.38, + "Malay,Spanish,English": 0.3933333333333333, + "Filipino,Indonesian,Chinese": 0.4266666666666667, + "Filipino,Indonesian,Spanish": 0.35333333333333333, + "Filipino,Indonesian,English": 0.4266666666666667, + "Filipino,Chinese,Spanish": 0.36, + "Filipino,Chinese,English": 0.3933333333333333, + "Filipino,Spanish,English": 0.3333333333333333, + "Indonesian,Chinese,Spanish": 0.4533333333333333, + "Indonesian,Chinese,English": 0.47333333333333333, + "Indonesian,Spanish,English": 0.4533333333333333, + "Chinese,Spanish,English": 0.49333333333333335 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.35333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.3, + "Vietnamese,Malay,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,English": 0.3, + "Vietnamese,Malay,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.36, + "Vietnamese,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.28, + "Vietnamese,Malay,Chinese,English": 0.2866666666666667, + "Vietnamese,Malay,Spanish,English": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish": 0.28, + "Vietnamese,Filipino,Indonesian,English": 0.3333333333333333, + "Vietnamese,Filipino,Chinese,Spanish": 0.28, + "Vietnamese,Filipino,Chinese,English": 0.29333333333333333, + "Vietnamese,Filipino,Spanish,English": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.3466666666666667, + "Vietnamese,Indonesian,Spanish,English": 0.36, + "Vietnamese,Chinese,Spanish,English": 0.35333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.32666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.3, + "Malay,Filipino,Indonesian,English": 0.34, + "Malay,Filipino,Chinese,Spanish": 0.26, + "Malay,Filipino,Chinese,English": 0.29333333333333333, + "Malay,Filipino,Spanish,English": 0.25333333333333335, + "Malay,Indonesian,Chinese,Spanish": 0.34, + "Malay,Indonesian,Chinese,English": 0.35333333333333333, + "Malay,Indonesian,Spanish,English": 0.35333333333333333, + "Malay,Chinese,Spanish,English": 0.3, + "Filipino,Indonesian,Chinese,Spanish": 0.30666666666666664, + "Filipino,Indonesian,Chinese,English": 0.3333333333333333, + "Filipino,Indonesian,Spanish,English": 0.2866666666666667, + "Filipino,Chinese,Spanish,English": 0.29333333333333333, + "Indonesian,Chinese,Spanish,English": 0.36666666666666664 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.28, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.29333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.24, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.24, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.24, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.2866666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.25333333333333335, + "Malay,Filipino,Indonesian,Chinese,English": 0.28, + "Malay,Filipino,Indonesian,Spanish,English": 0.25333333333333335, + "Malay,Filipino,Chinese,Spanish,English": 0.22, + "Malay,Indonesian,Chinese,Spanish,English": 0.2866666666666667, + "Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.18, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.20666666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.18 + } + }, + "AC3_2": 0.5542225127538114, + "AC3_3": 0.46389460235728747, + "AC3_4": 0.3970850788920032, + "AC3_5": 0.3453565210299693, + "AC3_6": 0.3040970578907774, + "AC3_7": 0.26940079889708773 + }, + "prompt_2": { + "overall_acc": 0.5295238095238095, + "language_acc": { + "Vietnamese": 0.47333333333333333, + "Malay": 0.48, + "Filipino": 0.38666666666666666, + "Indonesian": 0.52, + "Chinese": 0.6266666666666667, + "Spanish": 0.5533333333333333, + "English": 0.6666666666666666 + }, + "consistency_score_2": 0.507936507936508, + "consistency_score_3": 0.325142857142857, + "consistency_score_4": 0.2323809523809524, + "consistency_score_5": 0.18031746031746035, + "consistency_score_6": 0.15238095238095234, + "consistency_score_7": 0.14, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5, + "Vietnamese,Filipino": 0.3933333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Chinese": 0.52, + "Vietnamese,Spanish": 0.54, + "Vietnamese,English": 0.5333333333333333, + "Malay,Filipino": 0.4, + "Malay,Indonesian": 0.6066666666666667, + "Malay,Chinese": 0.5, + "Malay,Spanish": 0.48, + "Malay,English": 0.4666666666666667, + "Filipino,Indonesian": 0.36666666666666664, + "Filipino,Chinese": 0.43333333333333335, + "Filipino,Spanish": 0.38666666666666666, + "Filipino,English": 0.46, + "Indonesian,Chinese": 0.56, + "Indonesian,Spanish": 0.5733333333333334, + "Indonesian,English": 0.5466666666666666, + "Chinese,Spanish": 0.5866666666666667, + "Chinese,English": 0.64, + "Spanish,English": 0.6266666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.24666666666666667, + "Vietnamese,Malay,Indonesian": 0.37333333333333335, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Spanish": 0.31333333333333335, + "Vietnamese,Malay,English": 0.32, + "Vietnamese,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Filipino,Spanish": 0.25333333333333335, + "Vietnamese,Filipino,English": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish": 0.38, + "Vietnamese,Indonesian,English": 0.36, + "Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese,English": 0.3933333333333333, + "Vietnamese,Spanish,English": 0.38666666666666666, + "Malay,Filipino,Indonesian": 0.26666666666666666, + "Malay,Filipino,Chinese": 0.26, + "Malay,Filipino,Spanish": 0.22, + "Malay,Filipino,English": 0.24, + "Malay,Indonesian,Chinese": 0.38, + "Malay,Indonesian,Spanish": 0.38, + "Malay,Indonesian,English": 0.37333333333333335, + "Malay,Chinese,Spanish": 0.3333333333333333, + "Malay,Chinese,English": 0.36666666666666664, + "Malay,Spanish,English": 0.32666666666666666, + "Filipino,Indonesian,Chinese": 0.24666666666666667, + "Filipino,Indonesian,Spanish": 0.24666666666666667, + "Filipino,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish": 0.28, + "Filipino,Chinese,English": 0.34, + "Filipino,Spanish,English": 0.29333333333333333, + "Indonesian,Chinese,Spanish": 0.41333333333333333, + "Indonesian,Chinese,English": 0.42, + "Indonesian,Spanish,English": 0.4066666666666667, + "Chinese,Spanish,English": 0.47333333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Filipino,English": 0.19333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.26, + "Vietnamese,Malay,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Chinese,English": 0.24666666666666667, + "Vietnamese,Malay,Spanish,English": 0.24, + "Vietnamese,Filipino,Indonesian,Chinese": 0.18, + "Vietnamese,Filipino,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,English": 0.19333333333333333, + "Vietnamese,Filipino,Chinese,Spanish": 0.2, + "Vietnamese,Filipino,Chinese,English": 0.24, + "Vietnamese,Filipino,Spanish,English": 0.22, + "Vietnamese,Indonesian,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish,English": 0.28, + "Vietnamese,Chinese,Spanish,English": 0.3, + "Malay,Filipino,Indonesian,Chinese": 0.2, + "Malay,Filipino,Indonesian,Spanish": 0.2, + "Malay,Filipino,Indonesian,English": 0.19333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.18, + "Malay,Filipino,Chinese,English": 0.22, + "Malay,Filipino,Spanish,English": 0.17333333333333334, + "Malay,Indonesian,Chinese,Spanish": 0.3, + "Malay,Indonesian,Chinese,English": 0.3, + "Malay,Indonesian,Spanish,English": 0.28, + "Malay,Chinese,Spanish,English": 0.28, + "Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Filipino,Indonesian,Chinese,English": 0.21333333333333335, + "Filipino,Indonesian,Spanish,English": 0.18666666666666668, + "Filipino,Chinese,Spanish,English": 0.24, + "Indonesian,Chinese,Spanish,English": 0.3333333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Chinese,English": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.15333333333333332, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.16666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.16, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.23333333333333334, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.16666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.18, + "Malay,Filipino,Indonesian,Spanish,English": 0.16, + "Malay,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Malay,Indonesian,Chinese,Spanish,English": 0.24666666666666667, + "Filipino,Indonesian,Chinese,Spanish,English": 0.16 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.18, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.14, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.15333333333333332 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.14 + } + }, + "AC3_2": 0.5185055661912061, + "AC3_3": 0.4028959851800234, + "AC3_4": 0.3230095237671289, + "AC3_5": 0.26902461875416145, + "AC3_6": 0.23665868578599142, + "AC3_7": 0.22145092457574347 + }, + "prompt_3": { + "overall_acc": 0.5019047619047619, + "language_acc": { + "Vietnamese": 0.47333333333333333, + "Malay": 0.42, + "Filipino": 0.36666666666666664, + "Indonesian": 0.48, + "Chinese": 0.58, + "Spanish": 0.52, + "English": 0.6733333333333333 + }, + "consistency_score_2": 0.46126984126984133, + "consistency_score_3": 0.2733333333333333, + "consistency_score_4": 0.1857142857142857, + "consistency_score_5": 0.13555555555555557, + "consistency_score_6": 0.10095238095238097, + "consistency_score_7": 0.07333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.41333333333333333, + "Vietnamese,Filipino": 0.4533333333333333, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,Chinese": 0.4533333333333333, + "Vietnamese,Spanish": 0.4666666666666667, + "Vietnamese,English": 0.4533333333333333, + "Malay,Filipino": 0.38666666666666666, + "Malay,Indonesian": 0.54, + "Malay,Chinese": 0.4266666666666667, + "Malay,Spanish": 0.44666666666666666, + "Malay,English": 0.4, + "Filipino,Indonesian": 0.4266666666666667, + "Filipino,Chinese": 0.42, + "Filipino,Spanish": 0.4266666666666667, + "Filipino,English": 0.4, + "Indonesian,Chinese": 0.46, + "Indonesian,Spanish": 0.5466666666666666, + "Indonesian,English": 0.5266666666666666, + "Chinese,Spanish": 0.4533333333333333, + "Chinese,English": 0.5133333333333333, + "Spanish,English": 0.54 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.22, + "Vietnamese,Malay,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Chinese": 0.26, + "Vietnamese,Malay,Spanish": 0.28, + "Vietnamese,Malay,English": 0.24, + "Vietnamese,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Spanish": 0.26, + "Vietnamese,Filipino,English": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Indonesian,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,English": 0.2866666666666667, + "Vietnamese,Spanish,English": 0.31333333333333335, + "Malay,Filipino,Indonesian": 0.24, + "Malay,Filipino,Chinese": 0.22, + "Malay,Filipino,Spanish": 0.22666666666666666, + "Malay,Filipino,English": 0.19333333333333333, + "Malay,Indonesian,Chinese": 0.2866666666666667, + "Malay,Indonesian,Spanish": 0.32, + "Malay,Indonesian,English": 0.29333333333333333, + "Malay,Chinese,Spanish": 0.25333333333333335, + "Malay,Chinese,English": 0.24666666666666667, + "Malay,Spanish,English": 0.26666666666666666, + "Filipino,Indonesian,Chinese": 0.24, + "Filipino,Indonesian,Spanish": 0.28, + "Filipino,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish": 0.23333333333333334, + "Filipino,Chinese,English": 0.24666666666666667, + "Filipino,Spanish,English": 0.24666666666666667, + "Indonesian,Chinese,Spanish": 0.30666666666666664, + "Indonesian,Chinese,English": 0.32, + "Indonesian,Spanish,English": 0.35333333333333333, + "Chinese,Spanish,English": 0.32666666666666666 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Chinese": 0.14, + "Vietnamese,Malay,Filipino,Spanish": 0.16, + "Vietnamese,Malay,Filipino,English": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.18, + "Vietnamese,Malay,Chinese,English": 0.17333333333333334, + "Vietnamese,Malay,Spanish,English": 0.2, + "Vietnamese,Filipino,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.21333333333333335, + "Vietnamese,Filipino,Chinese,Spanish": 0.16666666666666666, + "Vietnamese,Filipino,Chinese,English": 0.16, + "Vietnamese,Filipino,Spanish,English": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese,English": 0.22, + "Vietnamese,Indonesian,Spanish,English": 0.24666666666666667, + "Vietnamese,Chinese,Spanish,English": 0.20666666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.16, + "Malay,Filipino,Indonesian,Spanish": 0.18, + "Malay,Filipino,Indonesian,English": 0.16, + "Malay,Filipino,Chinese,Spanish": 0.14, + "Malay,Filipino,Chinese,English": 0.13333333333333333, + "Malay,Filipino,Spanish,English": 0.15333333333333332, + "Malay,Indonesian,Chinese,Spanish": 0.2, + "Malay,Indonesian,Chinese,English": 0.18666666666666668, + "Malay,Indonesian,Spanish,English": 0.21333333333333335, + "Malay,Chinese,Spanish,English": 0.19333333333333333, + "Filipino,Indonesian,Chinese,Spanish": 0.18, + "Filipino,Indonesian,Chinese,English": 0.18, + "Filipino,Indonesian,Spanish,English": 0.19333333333333333, + "Filipino,Chinese,Spanish,English": 0.17333333333333334, + "Indonesian,Chinese,Spanish,English": 0.23333333333333334 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.11333333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.14, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.13333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.09333333333333334, + "Vietnamese,Malay,Filipino,Chinese,English": 0.09333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.12666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Spanish,English": 0.14666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.14, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.14, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.16666666666666666, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.13333333333333333, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.17333333333333334, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.12, + "Malay,Filipino,Indonesian,Chinese,English": 0.11333333333333333, + "Malay,Filipino,Indonesian,Spanish,English": 0.13333333333333333, + "Malay,Filipino,Chinese,Spanish,English": 0.10666666666666667, + "Malay,Indonesian,Chinese,Spanish,English": 0.15333333333333332, + "Filipino,Indonesian,Chinese,Spanish,English": 0.14 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.08666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.08666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.11333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.08, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.12666666666666668, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.12, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.09333333333333334 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.07333333333333333 + } + }, + "AC3_2": 0.48073013775339757, + "AC3_3": 0.3539230138773604, + "AC3_4": 0.27111199046314455, + "AC3_5": 0.21345949531843972, + "AC3_6": 0.16809448579172134, + "AC3_7": 0.12796909490049102 + }, + "prompt_4": { + "overall_acc": 0.52, + "language_acc": { + "Vietnamese": 0.44666666666666666, + "Malay": 0.4533333333333333, + "Filipino": 0.4066666666666667, + "Indonesian": 0.47333333333333333, + "Chinese": 0.62, + "Spanish": 0.58, + "English": 0.66 + }, + "consistency_score_2": 0.5241269841269842, + "consistency_score_3": 0.3464761904761905, + "consistency_score_4": 0.24990476190476188, + "consistency_score_5": 0.18444444444444447, + "consistency_score_6": 0.13428571428571429, + "consistency_score_7": 0.09333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5266666666666666, + "Vietnamese,Filipino": 0.41333333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Chinese": 0.47333333333333333, + "Vietnamese,Spanish": 0.52, + "Vietnamese,English": 0.54, + "Malay,Filipino": 0.4866666666666667, + "Malay,Indonesian": 0.5933333333333334, + "Malay,Chinese": 0.5266666666666666, + "Malay,Spanish": 0.5133333333333333, + "Malay,English": 0.5133333333333333, + "Filipino,Indonesian": 0.4866666666666667, + "Filipino,Chinese": 0.48, + "Filipino,Spanish": 0.44, + "Filipino,English": 0.43333333333333335, + "Indonesian,Chinese": 0.5333333333333333, + "Indonesian,Spanish": 0.5266666666666666, + "Indonesian,English": 0.5466666666666666, + "Chinese,Spanish": 0.5866666666666667, + "Chinese,English": 0.6533333333333333, + "Spanish,English": 0.6666666666666666 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.2866666666666667, + "Vietnamese,Malay,Indonesian": 0.38, + "Vietnamese,Malay,Chinese": 0.32666666666666666, + "Vietnamese,Malay,Spanish": 0.35333333333333333, + "Vietnamese,Malay,English": 0.36666666666666664, + "Vietnamese,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Filipino,Chinese": 0.26, + "Vietnamese,Filipino,Spanish": 0.26, + "Vietnamese,Filipino,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Spanish": 0.36, + "Vietnamese,Indonesian,English": 0.38666666666666666, + "Vietnamese,Chinese,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,English": 0.36666666666666664, + "Vietnamese,Spanish,English": 0.41333333333333333, + "Malay,Filipino,Indonesian": 0.3333333333333333, + "Malay,Filipino,Chinese": 0.31333333333333335, + "Malay,Filipino,Spanish": 0.29333333333333333, + "Malay,Filipino,English": 0.29333333333333333, + "Malay,Indonesian,Chinese": 0.38666666666666666, + "Malay,Indonesian,Spanish": 0.36, + "Malay,Indonesian,English": 0.38666666666666666, + "Malay,Chinese,Spanish": 0.36666666666666664, + "Malay,Chinese,English": 0.38666666666666666, + "Malay,Spanish,English": 0.38, + "Filipino,Indonesian,Chinese": 0.3333333333333333, + "Filipino,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish": 0.32666666666666666, + "Filipino,Chinese,English": 0.3333333333333333, + "Filipino,Spanish,English": 0.3333333333333333, + "Indonesian,Chinese,Spanish": 0.38666666666666666, + "Indonesian,Chinese,English": 0.4, + "Indonesian,Spanish,English": 0.4066666666666667, + "Chinese,Spanish,English": 0.4866666666666667 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,Malay,Filipino,Chinese": 0.2, + "Vietnamese,Malay,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,English": 0.22, + "Vietnamese,Malay,Indonesian,Chinese": 0.26, + "Vietnamese,Malay,Indonesian,Spanish": 0.26, + "Vietnamese,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Malay,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Chinese,English": 0.26666666666666666, + "Vietnamese,Malay,Spanish,English": 0.28, + "Vietnamese,Filipino,Indonesian,Chinese": 0.2, + "Vietnamese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.20666666666666667, + "Vietnamese,Filipino,Chinese,Spanish": 0.2, + "Vietnamese,Filipino,Chinese,English": 0.2, + "Vietnamese,Filipino,Spanish,English": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.3, + "Malay,Filipino,Indonesian,Chinese": 0.24666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Malay,Filipino,Indonesian,English": 0.23333333333333334, + "Malay,Filipino,Chinese,Spanish": 0.23333333333333334, + "Malay,Filipino,Chinese,English": 0.22666666666666666, + "Malay,Filipino,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish": 0.29333333333333333, + "Malay,Indonesian,Chinese,English": 0.29333333333333333, + "Malay,Indonesian,Spanish,English": 0.2866666666666667, + "Malay,Chinese,Spanish,English": 0.30666666666666664, + "Filipino,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Filipino,Indonesian,Chinese,English": 0.24666666666666667, + "Filipino,Indonesian,Spanish,English": 0.24, + "Filipino,Chinese,Spanish,English": 0.2733333333333333, + "Indonesian,Chinese,Spanish,English": 0.32666666666666666 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Chinese,English": 0.16, + "Vietnamese,Malay,Filipino,Spanish,English": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.2, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Spanish,English": 0.22, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.16, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.16, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.23333333333333334, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Malay,Filipino,Indonesian,Chinese,English": 0.18, + "Malay,Filipino,Indonesian,Spanish,English": 0.18, + "Malay,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Malay,Indonesian,Chinese,Spanish,English": 0.24, + "Filipino,Indonesian,Chinese,Spanish,English": 0.2 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.12, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.12, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.12666666666666668, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.12666666666666668, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.14666666666666667 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.09333333333333334 + } + }, + "AC3_2": 0.5220553359183803, + "AC3_3": 0.41586282694694926, + "AC3_4": 0.33757545765036434, + "AC3_5": 0.2723028390780643, + "AC3_6": 0.2134497816267653, + "AC3_7": 0.15826086953941398 + }, + "prompt_5": { + "overall_acc": 0.5104761904761905, + "language_acc": { + "Vietnamese": 0.48, + "Malay": 0.4533333333333333, + "Filipino": 0.38, + "Indonesian": 0.4866666666666667, + "Chinese": 0.6066666666666667, + "Spanish": 0.5266666666666666, + "English": 0.64 + }, + "consistency_score_2": 0.5580952380952381, + "consistency_score_3": 0.38990476190476187, + "consistency_score_4": 0.2984761904761905, + "consistency_score_5": 0.2384126984126984, + "consistency_score_6": 0.19428571428571434, + "consistency_score_7": 0.16, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.6266666666666667, + "Vietnamese,Filipino": 0.44, + "Vietnamese,Indonesian": 0.6333333333333333, + "Vietnamese,Chinese": 0.5133333333333333, + "Vietnamese,Spanish": 0.5066666666666667, + "Vietnamese,English": 0.5666666666666667, + "Malay,Filipino": 0.5066666666666667, + "Malay,Indonesian": 0.72, + "Malay,Chinese": 0.58, + "Malay,Spanish": 0.5266666666666666, + "Malay,English": 0.54, + "Filipino,Indonesian": 0.5133333333333333, + "Filipino,Chinese": 0.46, + "Filipino,Spanish": 0.42, + "Filipino,English": 0.5133333333333333, + "Indonesian,Chinese": 0.5666666666666667, + "Indonesian,Spanish": 0.56, + "Indonesian,English": 0.5733333333333334, + "Chinese,Spanish": 0.5933333333333334, + "Chinese,English": 0.7, + "Spanish,English": 0.66 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.5133333333333333, + "Vietnamese,Malay,Chinese": 0.42, + "Vietnamese,Malay,Spanish": 0.4, + "Vietnamese,Malay,English": 0.43333333333333335, + "Vietnamese,Filipino,Indonesian": 0.36, + "Vietnamese,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Filipino,Spanish": 0.26, + "Vietnamese,Filipino,English": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.42, + "Vietnamese,Indonesian,Spanish": 0.4, + "Vietnamese,Indonesian,English": 0.44, + "Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese,English": 0.42, + "Vietnamese,Spanish,English": 0.41333333333333333, + "Malay,Filipino,Indonesian": 0.41333333333333333, + "Malay,Filipino,Chinese": 0.3333333333333333, + "Malay,Filipino,Spanish": 0.29333333333333333, + "Malay,Filipino,English": 0.34, + "Malay,Indonesian,Chinese": 0.4866666666666667, + "Malay,Indonesian,Spanish": 0.44666666666666666, + "Malay,Indonesian,English": 0.4666666666666667, + "Malay,Chinese,Spanish": 0.38666666666666666, + "Malay,Chinese,English": 0.44, + "Malay,Spanish,English": 0.4066666666666667, + "Filipino,Indonesian,Chinese": 0.3466666666666667, + "Filipino,Indonesian,Spanish": 0.32, + "Filipino,Indonesian,English": 0.37333333333333335, + "Filipino,Chinese,Spanish": 0.28, + "Filipino,Chinese,English": 0.37333333333333335, + "Filipino,Spanish,English": 0.34, + "Indonesian,Chinese,Spanish": 0.3933333333333333, + "Indonesian,Chinese,English": 0.4533333333333333, + "Indonesian,Spanish,English": 0.44666666666666666, + "Chinese,Spanish,English": 0.49333333333333335 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.3, + "Vietnamese,Malay,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Filipino,English": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.38, + "Vietnamese,Malay,Indonesian,Spanish": 0.36, + "Vietnamese,Malay,Indonesian,English": 0.38666666666666666, + "Vietnamese,Malay,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese,English": 0.35333333333333333, + "Vietnamese,Malay,Spanish,English": 0.3333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,English": 0.2866666666666667, + "Vietnamese,Filipino,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Chinese,English": 0.24666666666666667, + "Vietnamese,Filipino,Spanish,English": 0.24, + "Vietnamese,Indonesian,Chinese,Spanish": 0.32, + "Vietnamese,Indonesian,Chinese,English": 0.35333333333333333, + "Vietnamese,Indonesian,Spanish,English": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,English": 0.3333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.29333333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,English": 0.30666666666666664, + "Malay,Filipino,Chinese,Spanish": 0.22, + "Malay,Filipino,Chinese,English": 0.2866666666666667, + "Malay,Filipino,Spanish,English": 0.24666666666666667, + "Malay,Indonesian,Chinese,Spanish": 0.34, + "Malay,Indonesian,Chinese,English": 0.3933333333333333, + "Malay,Indonesian,Spanish,English": 0.36, + "Malay,Chinese,Spanish,English": 0.3333333333333333, + "Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Filipino,Indonesian,Chinese,English": 0.2866666666666667, + "Filipino,Indonesian,Spanish,English": 0.2733333333333333, + "Filipino,Chinese,Spanish,English": 0.26, + "Indonesian,Chinese,Spanish,English": 0.35333333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Chinese,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.30666666666666664, + "Vietnamese,Malay,Chinese,Spanish,English": 0.28, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.18, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.21333333333333335, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.19333333333333333, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.2866666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.2, + "Malay,Filipino,Indonesian,Chinese,English": 0.25333333333333335, + "Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Malay,Indonesian,Chinese,Spanish,English": 0.3, + "Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.26, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.16666666666666666, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.16 + } + }, + "AC3_2": 0.5332246837630185, + "AC3_3": 0.4421175213829939, + "AC3_4": 0.37669705221441374, + "AC3_5": 0.32502553539332557, + "AC3_6": 0.2814517374118018, + "AC3_7": 0.24363636360002583 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.44318181818181823, + "language_acc": { + "English": 0.4431818181818182, + "Filipino": 0.3181818181818182, + "Vietnamese": 0.4090909090909091, + "Chinese": 0.5625, + "Indonesian": 0.4715909090909091, + "Malay": 0.4090909090909091, + "Spanish": 0.48863636363636365 + }, + "consistency_score_2": 0.4629329004329003, + "consistency_score_3": 0.2748376623376623, + "consistency_score_4": 0.18392857142857144, + "consistency_score_5": 0.13095238095238096, + "consistency_score_6": 0.09496753246753245, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.2897727272727273, + "English,Vietnamese": 0.4659090909090909, + "English,Chinese": 0.5, + "English,Indonesian": 0.48295454545454547, + "English,Malay": 0.39204545454545453, + "English,Spanish": 0.5284090909090909, + "Filipino,Vietnamese": 0.42613636363636365, + "Filipino,Chinese": 0.4318181818181818, + "Filipino,Indonesian": 0.38636363636363635, + "Filipino,Malay": 0.38636363636363635, + "Filipino,Spanish": 0.4147727272727273, + "Vietnamese,Chinese": 0.4659090909090909, + "Vietnamese,Indonesian": 0.5454545454545454, + "Vietnamese,Malay": 0.45454545454545453, + "Vietnamese,Spanish": 0.4659090909090909, + "Chinese,Indonesian": 0.5, + "Chinese,Malay": 0.4375, + "Chinese,Spanish": 0.5681818181818182, + "Indonesian,Malay": 0.5397727272727273, + "Indonesian,Spanish": 0.5625, + "Malay,Spanish": 0.4772727272727273 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.19318181818181818, + "English,Filipino,Chinese": 0.2159090909090909, + "English,Filipino,Indonesian": 0.17613636363636365, + "English,Filipino,Malay": 0.14772727272727273, + "English,Filipino,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese": 0.2784090909090909, + "English,Vietnamese,Indonesian": 0.32954545454545453, + "English,Vietnamese,Malay": 0.24431818181818182, + "English,Vietnamese,Spanish": 0.3068181818181818, + "English,Chinese,Indonesian": 0.3125, + "English,Chinese,Malay": 0.26136363636363635, + "English,Chinese,Spanish": 0.3465909090909091, + "English,Indonesian,Malay": 0.2727272727272727, + "English,Indonesian,Spanish": 0.3522727272727273, + "English,Malay,Spanish": 0.2556818181818182, + "Filipino,Vietnamese,Chinese": 0.26704545454545453, + "Filipino,Vietnamese,Indonesian": 0.29545454545454547, + "Filipino,Vietnamese,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Spanish": 0.2556818181818182, + "Filipino,Chinese,Indonesian": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.2159090909090909, + "Filipino,Chinese,Spanish": 0.2840909090909091, + "Filipino,Indonesian,Malay": 0.24431818181818182, + "Filipino,Indonesian,Spanish": 0.2556818181818182, + "Filipino,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian": 0.3465909090909091, + "Vietnamese,Chinese,Malay": 0.2784090909090909, + "Vietnamese,Chinese,Spanish": 0.32386363636363635, + "Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "Vietnamese,Malay,Spanish": 0.26704545454545453, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Indonesian,Spanish": 0.3693181818181818, + "Chinese,Malay,Spanish": 0.29545454545454547, + "Indonesian,Malay,Spanish": 0.3522727272727273 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.14772727272727273, + "English,Filipino,Vietnamese,Indonesian": 0.1534090909090909, + "English,Filipino,Vietnamese,Malay": 0.125, + "English,Filipino,Vietnamese,Spanish": 0.1534090909090909, + "English,Filipino,Chinese,Indonesian": 0.14772727272727273, + "English,Filipino,Chinese,Malay": 0.11931818181818182, + "English,Filipino,Chinese,Spanish": 0.14772727272727273, + "English,Filipino,Indonesian,Malay": 0.125, + "English,Filipino,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Malay,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian": 0.23295454545454544, + "English,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Spanish": 0.22727272727272727, + "English,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "English,Vietnamese,Indonesian,Spanish": 0.24431818181818182, + "English,Vietnamese,Malay,Spanish": 0.17613636363636365, + "English,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Chinese,Malay,Spanish": 0.19318181818181818, + "English,Indonesian,Malay,Spanish": 0.21022727272727273, + "Filipino,Vietnamese,Chinese,Indonesian": 0.21022727272727273, + "Filipino,Vietnamese,Chinese,Malay": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Filipino,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Filipino,Vietnamese,Malay,Spanish": 0.14772727272727273, + "Filipino,Chinese,Indonesian,Malay": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Spanish": 0.19318181818181818, + "Filipino,Chinese,Malay,Spanish": 0.1590909090909091, + "Filipino,Indonesian,Malay,Spanish": 0.16477272727272727, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2784090909090909, + "Vietnamese,Chinese,Malay,Spanish": 0.1875, + "Vietnamese,Indonesian,Malay,Spanish": 0.2159090909090909, + "Chinese,Indonesian,Malay,Spanish": 0.2215909090909091 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Malay": 0.10795454545454546, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.10795454545454546, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.125, + "English,Filipino,Vietnamese,Malay,Spanish": 0.09659090909090909, + "English,Filipino,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Filipino,Chinese,Indonesian,Spanish": 0.125, + "English,Filipino,Chinese,Malay,Spanish": 0.09090909090909091, + "English,Filipino,Indonesian,Malay,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "English,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.1534090909090909, + "English,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.10795454545454546, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.13068181818181818, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.07954545454545454, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09659090909090909 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.06818181818181818 + } + }, + "AC3_2": 0.45284209664103053, + "AC3_3": 0.33927507034922966, + "AC3_4": 0.2599663473569404, + "AC3_5": 0.2021677662230343, + "AC3_6": 0.15641711227039948, + "AC3_7": 0.11818181815870704 + }, + "prompt_2": { + "overall_acc": 0.4448051948051948, + "language_acc": { + "English": 0.5227272727272727, + "Filipino": 0.2840909090909091, + "Vietnamese": 0.4147727272727273, + "Chinese": 0.5397727272727273, + "Indonesian": 0.4772727272727273, + "Malay": 0.39204545454545453, + "Spanish": 0.48295454545454547 + }, + "consistency_score_2": 0.48187229437229434, + "consistency_score_3": 0.29269480519480523, + "consistency_score_4": 0.1987012987012987, + "consistency_score_5": 0.14258658008658007, + "consistency_score_6": 0.10389610389610389, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.39204545454545453, + "English,Vietnamese": 0.5170454545454546, + "English,Chinese": 0.4943181818181818, + "English,Indonesian": 0.5, + "English,Malay": 0.4375, + "English,Spanish": 0.5965909090909091, + "Filipino,Vietnamese": 0.38636363636363635, + "Filipino,Chinese": 0.3977272727272727, + "Filipino,Indonesian": 0.3977272727272727, + "Filipino,Malay": 0.4318181818181818, + "Filipino,Spanish": 0.4147727272727273, + "Vietnamese,Chinese": 0.4431818181818182, + "Vietnamese,Indonesian": 0.5681818181818182, + "Vietnamese,Malay": 0.5, + "Vietnamese,Spanish": 0.5625, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Malay": 0.45454545454545453, + "Chinese,Spanish": 0.4772727272727273, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Spanish": 0.5852272727272727, + "Malay,Spanish": 0.5454545454545454 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.22727272727272727, + "English,Filipino,Chinese": 0.2159090909090909, + "English,Filipino,Indonesian": 0.23295454545454544, + "English,Filipino,Malay": 0.19886363636363635, + "English,Filipino,Spanish": 0.26704545454545453, + "English,Vietnamese,Chinese": 0.29545454545454547, + "English,Vietnamese,Indonesian": 0.3522727272727273, + "English,Vietnamese,Malay": 0.29545454545454547, + "English,Vietnamese,Spanish": 0.38636363636363635, + "English,Chinese,Indonesian": 0.29545454545454547, + "English,Chinese,Malay": 0.2556818181818182, + "English,Chinese,Spanish": 0.32386363636363635, + "English,Indonesian,Malay": 0.29545454545454547, + "English,Indonesian,Spanish": 0.39204545454545453, + "English,Malay,Spanish": 0.3409090909090909, + "Filipino,Vietnamese,Chinese": 0.21022727272727273, + "Filipino,Vietnamese,Indonesian": 0.26704545454545453, + "Filipino,Vietnamese,Malay": 0.23863636363636365, + "Filipino,Vietnamese,Spanish": 0.2727272727272727, + "Filipino,Chinese,Indonesian": 0.21022727272727273, + "Filipino,Chinese,Malay": 0.2159090909090909, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.2784090909090909, + "Filipino,Indonesian,Spanish": 0.26136363636363635, + "Filipino,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "Vietnamese,Chinese,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish": 0.4034090909090909, + "Vietnamese,Malay,Spanish": 0.375, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.32386363636363635, + "Chinese,Malay,Spanish": 0.32386363636363635, + "Indonesian,Malay,Spanish": 0.3977272727272727 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.14772727272727273, + "English,Filipino,Vietnamese,Indonesian": 0.1875, + "English,Filipino,Vietnamese,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Spanish": 0.19318181818181818, + "English,Filipino,Chinese,Indonesian": 0.13636363636363635, + "English,Filipino,Chinese,Malay": 0.11931818181818182, + "English,Filipino,Chinese,Spanish": 0.14204545454545456, + "English,Filipino,Indonesian,Malay": 0.1590909090909091, + "English,Filipino,Indonesian,Spanish": 0.1875, + "English,Filipino,Malay,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "English,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Malay": 0.23863636363636365, + "English,Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "English,Vietnamese,Malay,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Chinese,Indonesian,Spanish": 0.23295454545454544, + "English,Chinese,Malay,Spanish": 0.2159090909090909, + "English,Indonesian,Malay,Spanish": 0.26136363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Malay": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Spanish": 0.17045454545454544, + "Filipino,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "Filipino,Vietnamese,Malay,Spanish": 0.18181818181818182, + "Filipino,Chinese,Indonesian,Malay": 0.1534090909090909, + "Filipino,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Chinese,Malay,Spanish": 0.14772727272727273, + "Filipino,Indonesian,Malay,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Malay,Spanish": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Spanish": 0.29545454545454547, + "Chinese,Indonesian,Malay,Spanish": 0.26136363636363635 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.125, + "English,Filipino,Vietnamese,Chinese,Malay": 0.09659090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.14204545454545456, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "English,Filipino,Vietnamese,Malay,Spanish": 0.125, + "English,Filipino,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Filipino,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Filipino,Chinese,Malay,Spanish": 0.09659090909090909, + "English,Filipino,Indonesian,Malay,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay,Spanish": 0.17045454545454544, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.20454545454545456, + "English,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.21022727272727273 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.07954545454545454, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.11363636363636363, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.10795454545454546 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.07386363636363637 + } + }, + "AC3_2": 0.46259740254748255, + "AC3_3": 0.3530635113911612, + "AC3_4": 0.27469301632540516, + "AC3_5": 0.21594872189998188, + "AC3_6": 0.1684469376470077, + "AC3_7": 0.126689429481905 + }, + "prompt_3": { + "overall_acc": 0.452922077922078, + "language_acc": { + "English": 0.4375, + "Filipino": 0.3181818181818182, + "Vietnamese": 0.4715909090909091, + "Chinese": 0.5397727272727273, + "Indonesian": 0.48863636363636365, + "Malay": 0.4772727272727273, + "Spanish": 0.4375 + }, + "consistency_score_2": 0.4523809523809524, + "consistency_score_3": 0.2610389610389611, + "consistency_score_4": 0.17045454545454541, + "consistency_score_5": 0.12094155844155843, + "consistency_score_6": 0.09334415584415583, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.3352272727272727, + "English,Vietnamese": 0.45454545454545453, + "English,Chinese": 0.4772727272727273, + "English,Indonesian": 0.5, + "English,Malay": 0.4375, + "English,Spanish": 0.5284090909090909, + "Filipino,Vietnamese": 0.3806818181818182, + "Filipino,Chinese": 0.38636363636363635, + "Filipino,Indonesian": 0.3693181818181818, + "Filipino,Malay": 0.3977272727272727, + "Filipino,Spanish": 0.4090909090909091, + "Vietnamese,Chinese": 0.4431818181818182, + "Vietnamese,Indonesian": 0.4602272727272727, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Spanish": 0.5113636363636364, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Malay": 0.44886363636363635, + "Chinese,Spanish": 0.4715909090909091, + "Indonesian,Malay": 0.5625, + "Indonesian,Spanish": 0.4715909090909091, + "Malay,Spanish": 0.48295454545454547 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.20454545454545456, + "English,Filipino,Chinese": 0.18181818181818182, + "English,Filipino,Indonesian": 0.21022727272727273, + "English,Filipino,Malay": 0.19886363636363635, + "English,Filipino,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese": 0.26704545454545453, + "English,Vietnamese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Malay": 0.29545454545454547, + "English,Vietnamese,Spanish": 0.3352272727272727, + "English,Chinese,Indonesian": 0.26704545454545453, + "English,Chinese,Malay": 0.24431818181818182, + "English,Chinese,Spanish": 0.30113636363636365, + "English,Indonesian,Malay": 0.3181818181818182, + "English,Indonesian,Spanish": 0.3068181818181818, + "English,Malay,Spanish": 0.3068181818181818, + "Filipino,Vietnamese,Chinese": 0.22727272727272727, + "Filipino,Vietnamese,Indonesian": 0.21022727272727273, + "Filipino,Vietnamese,Malay": 0.23863636363636365, + "Filipino,Vietnamese,Spanish": 0.24431818181818182, + "Filipino,Chinese,Indonesian": 0.18181818181818182, + "Filipino,Chinese,Malay": 0.22727272727272727, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.26136363636363635, + "Filipino,Indonesian,Spanish": 0.2159090909090909, + "Filipino,Malay,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "Vietnamese,Chinese,Malay": 0.3068181818181818, + "Vietnamese,Chinese,Spanish": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.3693181818181818, + "Vietnamese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Malay,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.2784090909090909, + "Chinese,Indonesian,Spanish": 0.25, + "Chinese,Malay,Spanish": 0.2784090909090909, + "Indonesian,Malay,Spanish": 0.32386363636363635 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.14772727272727273, + "English,Filipino,Vietnamese,Indonesian": 0.13636363636363635, + "English,Filipino,Vietnamese,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Spanish": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian": 0.11363636363636363, + "English,Filipino,Chinese,Malay": 0.11931818181818182, + "English,Filipino,Chinese,Spanish": 0.13636363636363635, + "English,Filipino,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Malay,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "English,Vietnamese,Chinese,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "English,Vietnamese,Malay,Spanish": 0.23295454545454544, + "English,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish": 0.17613636363636365, + "English,Chinese,Malay,Spanish": 0.16477272727272727, + "English,Indonesian,Malay,Spanish": 0.22727272727272727, + "Filipino,Vietnamese,Chinese,Indonesian": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Indonesian,Malay": 0.1875, + "Filipino,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Malay": 0.14204545454545456, + "Filipino,Chinese,Indonesian,Spanish": 0.11363636363636363, + "Filipino,Chinese,Malay,Spanish": 0.14204545454545456, + "Filipino,Indonesian,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, + "Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Malay,Spanish": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,Spanish": 0.25, + "Chinese,Indonesian,Malay,Spanish": 0.19886363636363635 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Malay": 0.10795454545454546, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.11363636363636363, + "English,Filipino,Vietnamese,Malay,Spanish": 0.11931818181818182, + "English,Filipino,Chinese,Indonesian,Malay": 0.09659090909090909, + "English,Filipino,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Filipino,Chinese,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Indonesian,Malay,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "English,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.125, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.09659090909090909, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11363636363636363, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.13636363636363635, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.10795454545454546, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.10795454545454546, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09659090909090909 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454 + } + }, + "AC3_2": 0.45265135337840073, + "AC3_3": 0.33119540754738214, + "AC3_4": 0.24769176132390247, + "AC3_5": 0.19090633549824548, + "AC3_6": 0.15478763432839238, + "AC3_7": 0.1353242793537429 + }, + "prompt_4": { + "overall_acc": 0.44642857142857145, + "language_acc": { + "English": 0.4772727272727273, + "Filipino": 0.3181818181818182, + "Vietnamese": 0.42613636363636365, + "Chinese": 0.5511363636363636, + "Indonesian": 0.44886363636363635, + "Malay": 0.44886363636363635, + "Spanish": 0.45454545454545453 + }, + "consistency_score_2": 0.4594155844155844, + "consistency_score_3": 0.2686688311688312, + "consistency_score_4": 0.17873376623376624, + "consistency_score_5": 0.1274350649350649, + "consistency_score_6": 0.09496753246753246, + "consistency_score_7": 0.07386363636363637, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.39204545454545453, + "English,Vietnamese": 0.42045454545454547, + "English,Chinese": 0.5397727272727273, + "English,Indonesian": 0.4772727272727273, + "English,Malay": 0.4147727272727273, + "English,Spanish": 0.5170454545454546, + "Filipino,Vietnamese": 0.44886363636363635, + "Filipino,Chinese": 0.4318181818181818, + "Filipino,Indonesian": 0.4602272727272727, + "Filipino,Malay": 0.375, + "Filipino,Spanish": 0.36363636363636365, + "Vietnamese,Chinese": 0.4659090909090909, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.45454545454545453, + "Vietnamese,Spanish": 0.5056818181818182, + "Chinese,Indonesian": 0.4659090909090909, + "Chinese,Malay": 0.42613636363636365, + "Chinese,Spanish": 0.4318181818181818, + "Indonesian,Malay": 0.5170454545454546, + "Indonesian,Spanish": 0.5056818181818182, + "Malay,Spanish": 0.5113636363636364 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.2159090909090909, + "English,Filipino,Chinese": 0.2556818181818182, + "English,Filipino,Indonesian": 0.24431818181818182, + "English,Filipino,Malay": 0.17613636363636365, + "English,Filipino,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese": 0.2897727272727273, + "English,Vietnamese,Indonesian": 0.30113636363636365, + "English,Vietnamese,Malay": 0.24431818181818182, + "English,Vietnamese,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian": 0.3125, + "English,Chinese,Malay": 0.2727272727272727, + "English,Chinese,Spanish": 0.3068181818181818, + "English,Indonesian,Malay": 0.26136363636363635, + "English,Indonesian,Spanish": 0.3068181818181818, + "English,Malay,Spanish": 0.2840909090909091, + "Filipino,Vietnamese,Chinese": 0.2556818181818182, + "Filipino,Vietnamese,Indonesian": 0.30113636363636365, + "Filipino,Vietnamese,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Spanish": 0.24431818181818182, + "Filipino,Chinese,Indonesian": 0.26136363636363635, + "Filipino,Chinese,Malay": 0.2215909090909091, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.2556818181818182, + "Filipino,Indonesian,Spanish": 0.26136363636363635, + "Filipino,Malay,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian": 0.3125, + "Vietnamese,Chinese,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Spanish": 0.2784090909090909, + "Vietnamese,Indonesian,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish": 0.3352272727272727, + "Vietnamese,Malay,Spanish": 0.3125, + "Chinese,Indonesian,Malay": 0.2727272727272727, + "Chinese,Indonesian,Spanish": 0.2727272727272727, + "Chinese,Malay,Spanish": 0.26704545454545453, + "Indonesian,Malay,Spanish": 0.3352272727272727 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian": 0.18181818181818182, + "English,Filipino,Vietnamese,Malay": 0.125, + "English,Filipino,Vietnamese,Spanish": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian": 0.17045454545454544, + "English,Filipino,Chinese,Malay": 0.13068181818181818, + "English,Filipino,Chinese,Spanish": 0.1590909090909091, + "English,Filipino,Indonesian,Malay": 0.13636363636363635, + "English,Filipino,Indonesian,Spanish": 0.17045454545454544, + "English,Filipino,Malay,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "English,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Vietnamese,Chinese,Spanish": 0.21022727272727273, + "English,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "English,Vietnamese,Malay,Spanish": 0.19318181818181818, + "English,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Chinese,Malay,Spanish": 0.19886363636363635, + "English,Indonesian,Malay,Spanish": 0.19318181818181818, + "Filipino,Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "Filipino,Vietnamese,Chinese,Malay": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Spanish": 0.14772727272727273, + "Filipino,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "Filipino,Vietnamese,Malay,Spanish": 0.14772727272727273, + "Filipino,Chinese,Indonesian,Malay": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Filipino,Chinese,Malay,Spanish": 0.13068181818181818, + "Filipino,Indonesian,Malay,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2159090909090909, + "Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "Vietnamese,Indonesian,Malay,Spanish": 0.25, + "Chinese,Indonesian,Malay,Spanish": 0.19318181818181818 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.14204545454545456, + "English,Filipino,Vietnamese,Chinese,Malay": 0.10227272727272728, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Malay,Spanish": 0.10227272727272728, + "English,Filipino,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Filipino,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Filipino,Chinese,Malay,Spanish": 0.09090909090909091, + "English,Filipino,Indonesian,Malay,Spanish": 0.09659090909090909, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Malay,Spanish": 0.14772727272727273, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.13636363636363635, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.10227272727272728, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10795454545454546, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.07954545454545454, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.09659090909090909, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.07386363636363637, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09090909090909091 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.07386363636363637 + } + }, + "AC3_2": 0.45282898100479774, + "AC3_3": 0.33545484023199357, + "AC3_4": 0.2552676484363723, + "AC3_5": 0.19827237822368352, + "AC3_6": 0.15661811948274415, + "AC3_7": 0.1267550701784458 + }, + "prompt_5": { + "overall_acc": 0.41964285714285715, + "language_acc": { + "English": 0.4375, + "Filipino": 0.3068181818181818, + "Vietnamese": 0.42045454545454547, + "Chinese": 0.5227272727272727, + "Indonesian": 0.44886363636363635, + "Malay": 0.4090909090909091, + "Spanish": 0.39204545454545453 + }, + "consistency_score_2": 0.4418290043290043, + "consistency_score_3": 0.2488636363636363, + "consistency_score_4": 0.15762987012987006, + "consistency_score_5": 0.1074134199134199, + "consistency_score_6": 0.07711038961038962, + "consistency_score_7": 0.056818181818181816, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.32386363636363635, + "English,Vietnamese": 0.45454545454545453, + "English,Chinese": 0.5227272727272727, + "English,Indonesian": 0.4659090909090909, + "English,Malay": 0.39204545454545453, + "English,Spanish": 0.5, + "Filipino,Vietnamese": 0.38636363636363635, + "Filipino,Chinese": 0.3806818181818182, + "Filipino,Indonesian": 0.4147727272727273, + "Filipino,Malay": 0.39204545454545453, + "Filipino,Spanish": 0.3522727272727273, + "Vietnamese,Chinese": 0.42613636363636365, + "Vietnamese,Indonesian": 0.4659090909090909, + "Vietnamese,Malay": 0.45454545454545453, + "Vietnamese,Spanish": 0.4431818181818182, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Malay": 0.45454545454545453, + "Chinese,Spanish": 0.5170454545454546, + "Indonesian,Malay": 0.48295454545454547, + "Indonesian,Spanish": 0.5284090909090909, + "Malay,Spanish": 0.4602272727272727 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.20454545454545456, + "English,Filipino,Chinese": 0.19886363636363635, + "English,Filipino,Indonesian": 0.19886363636363635, + "English,Filipino,Malay": 0.17045454545454544, + "English,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese": 0.2727272727272727, + "English,Vietnamese,Indonesian": 0.2727272727272727, + "English,Vietnamese,Malay": 0.23295454545454544, + "English,Vietnamese,Spanish": 0.2784090909090909, + "English,Chinese,Indonesian": 0.2840909090909091, + "English,Chinese,Malay": 0.23863636363636365, + "English,Chinese,Spanish": 0.32386363636363635, + "English,Indonesian,Malay": 0.26136363636363635, + "English,Indonesian,Spanish": 0.29545454545454547, + "English,Malay,Spanish": 0.25, + "Filipino,Vietnamese,Chinese": 0.2159090909090909, + "Filipino,Vietnamese,Indonesian": 0.24431818181818182, + "Filipino,Vietnamese,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Spanish": 0.21022727272727273, + "Filipino,Chinese,Indonesian": 0.2159090909090909, + "Filipino,Chinese,Malay": 0.2159090909090909, + "Filipino,Chinese,Spanish": 0.2159090909090909, + "Filipino,Indonesian,Malay": 0.24431818181818182, + "Filipino,Indonesian,Spanish": 0.23863636363636365, + "Filipino,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "Vietnamese,Chinese,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Indonesian,Spanish": 0.2840909090909091, + "Vietnamese,Malay,Spanish": 0.2556818181818182, + "Chinese,Indonesian,Malay": 0.2727272727272727, + "Chinese,Indonesian,Spanish": 0.32386363636363635, + "Chinese,Malay,Spanish": 0.29545454545454547, + "Indonesian,Malay,Spanish": 0.30113636363636365 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.13636363636363635, + "English,Filipino,Vietnamese,Indonesian": 0.1534090909090909, + "English,Filipino,Vietnamese,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Spanish": 0.14772727272727273, + "English,Filipino,Chinese,Indonesian": 0.13068181818181818, + "English,Filipino,Chinese,Malay": 0.11363636363636363, + "English,Filipino,Chinese,Spanish": 0.14204545454545456, + "English,Filipino,Indonesian,Malay": 0.13636363636363635, + "English,Filipino,Indonesian,Spanish": 0.14204545454545456, + "English,Filipino,Malay,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "English,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Spanish": 0.19886363636363635, + "English,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Malay,Spanish": 0.1590909090909091, + "English,Chinese,Indonesian,Malay": 0.17613636363636365, + "English,Chinese,Indonesian,Spanish": 0.20454545454545456, + "English,Chinese,Malay,Spanish": 0.17045454545454544, + "English,Indonesian,Malay,Spanish": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Malay": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Filipino,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.11363636363636363, + "Filipino,Chinese,Indonesian,Malay": 0.14204545454545456, + "Filipino,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Chinese,Malay,Spanish": 0.125, + "Filipino,Indonesian,Malay,Spanish": 0.14772727272727273, + "Vietnamese,Chinese,Indonesian,Malay": 0.1875, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Malay,Spanish": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "Chinese,Indonesian,Malay,Spanish": 0.20454545454545456 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.10795454545454546, + "English,Filipino,Vietnamese,Chinese,Malay": 0.09659090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.11363636363636363, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.10795454545454546, + "English,Filipino,Vietnamese,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Chinese,Indonesian,Malay": 0.09090909090909091, + "English,Filipino,Chinese,Indonesian,Spanish": 0.09659090909090909, + "English,Filipino,Chinese,Malay,Spanish": 0.07954545454545454, + "English,Filipino,Indonesian,Malay,Spanish": 0.08522727272727272, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Malay,Spanish": 0.11363636363636363, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.11363636363636363, + "English,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.09090909090909091, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.09659090909090909, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.09659090909090909, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.07954545454545454, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.06818181818181818, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.06818181818181818, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.0625, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09659090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.056818181818181816 + } + }, + "AC3_2": 0.43045024223287587, + "AC3_3": 0.3124392908711843, + "AC3_4": 0.2291750361164951, + "AC3_5": 0.17104539451140155, + "AC3_6": 0.13028127915211146, + "AC3_7": 0.10008517885463289 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5048543689320388 + }, + "prompt_2": { + "accuracy": 0.5533980582524272 + }, + "prompt_3": { + "accuracy": 0.5339805825242718 + }, + "prompt_4": { + "accuracy": 0.42718446601941745 + }, + "prompt_5": { + "accuracy": 0.5242718446601942 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7428571428571429 + }, + "prompt_2": { + "accuracy": 0.638095238095238 + }, + "prompt_3": { + "accuracy": 0.6571428571428571 + }, + "prompt_4": { + "accuracy": 0.6666666666666666 + }, + "prompt_5": { + "accuracy": 0.7333333333333333 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5700934579439252 + }, + "prompt_2": { + "accuracy": 0.6635514018691588 + }, + "prompt_3": { + "accuracy": 0.6074766355140186 + }, + "prompt_4": { + "accuracy": 0.411214953271028 + }, + "prompt_5": { + "accuracy": 0.6261682242990654 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.36, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.4, + "culture": 0.4, + "film": 0.2, + "law": 0.7, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.3, + "culture": 0.4, + "film": 0.3, + "law": 0.7, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.3, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.5, + "culture": 0.4, + "film": 0.4, + "law": 0.5, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.35, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.1, + "history": 0.26666666666666666, + "literature": 0.4, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.4, + "geography": 0.5 + } + }, + "prompt_5": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.6, + "demographics": 0.0, + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.3, + "politics": 0.6, + "culture": 0.4, + "film": 0.3, + "law": 0.7, + "geography": 0.9 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1509795858348083 + }, + "prompt_2": { + "bleu_score": 0.2234703462103554 + }, + "prompt_3": { + "bleu_score": 0.20026747280075327 + }, + "prompt_4": { + "bleu_score": 0.08715809029070645 + }, + "prompt_5": { + "bleu_score": 0.09410178371682006 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3468856398958542, + "category_acc": { + "History": 0.3072289156626506, + "Geography": 0.3163265306122449, + "Lampungic": 0.24489795918367346, + "Social science": 0.4741235392320534, + "Balinese": 0.29723991507430997, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.3680555555555556, + "Chemistry": 0.28905109489051095, + "Biology": 0.27928994082840236, + "Science": 0.4241486068111455, + "Christian religion": 0.417910447761194, + "Art": 0.39767054908485855, + "Islam religion": 0.38264580369843526, + "Hindu religion": 0.3466666666666667, + "Madurese": 0.2711864406779661, + "Sport": 0.3716216216216216, + "Indonesian language": 0.38387297633872974, + "Physics": 0.32323232323232326, + "Minangkabau culture": 0.2964824120603015, + "Dayak language": 0.3119266055045872, + "Sociology": 0.3165322580645161, + "Economy": 0.2725409836065574, + "Sundanese": 0.3267070008643042, + "Javanese": 0.28225806451612906, + "Civic education": 0.3676680972818312 + } + }, + "prompt_2": { + "accuracy": 0.34661859937245476, + "category_acc": { + "History": 0.2891566265060241, + "Geography": 0.3183673469387755, + "Lampungic": 0.2789115646258503, + "Social science": 0.48414023372287146, + "Balinese": 0.27388535031847133, + "Makassarese": 0.2956989247311828, + "Banjarese": 0.2847222222222222, + "Chemistry": 0.26277372262773724, + "Biology": 0.27928994082840236, + "Science": 0.4107327141382869, + "Christian religion": 0.36318407960199006, + "Art": 0.3860232945091514, + "Islam religion": 0.406827880512091, + "Hindu religion": 0.32666666666666666, + "Madurese": 0.28135593220338984, + "Sport": 0.3918918918918919, + "Indonesian language": 0.3972602739726027, + "Physics": 0.3151515151515151, + "Minangkabau culture": 0.3316582914572864, + "Dayak language": 0.25688073394495414, + "Sociology": 0.3125, + "Economy": 0.28688524590163933, + "Sundanese": 0.31547104580812446, + "Javanese": 0.3165322580645161, + "Civic education": 0.3447782546494993 + } + }, + "prompt_3": { + "accuracy": 0.37826290139528673, + "category_acc": { + "History": 0.3293172690763052, + "Geography": 0.3448979591836735, + "Lampungic": 0.3401360544217687, + "Social science": 0.5559265442404007, + "Balinese": 0.2951167728237792, + "Makassarese": 0.3387096774193548, + "Banjarese": 0.3541666666666667, + "Chemistry": 0.2656934306569343, + "Biology": 0.3230769230769231, + "Science": 0.46542827657378744, + "Christian religion": 0.4577114427860697, + "Art": 0.4242928452579035, + "Islam religion": 0.4366998577524893, + "Hindu religion": 0.3333333333333333, + "Madurese": 0.2847457627118644, + "Sport": 0.40540540540540543, + "Indonesian language": 0.424346201743462, + "Physics": 0.3656565656565657, + "Minangkabau culture": 0.3316582914572864, + "Dayak language": 0.42201834862385323, + "Sociology": 0.3588709677419355, + "Economy": 0.32581967213114754, + "Sundanese": 0.320656871218669, + "Javanese": 0.3125, + "Civic education": 0.38483547925608014 + } + }, + "prompt_4": { + "accuracy": 0.38807664063021563, + "category_acc": { + "History": 0.36947791164658633, + "Geography": 0.37551020408163266, + "Lampungic": 0.21768707482993196, + "Social science": 0.5659432387312187, + "Balinese": 0.28662420382165604, + "Makassarese": 0.3655913978494624, + "Banjarese": 0.2777777777777778, + "Chemistry": 0.2773722627737226, + "Biology": 0.35384615384615387, + "Science": 0.48194014447884415, + "Christian religion": 0.44776119402985076, + "Art": 0.4209650582362729, + "Islam religion": 0.4310099573257468, + "Hindu religion": 0.36, + "Madurese": 0.29152542372881357, + "Sport": 0.41216216216216217, + "Indonesian language": 0.43773349937733497, + "Physics": 0.34949494949494947, + "Minangkabau culture": 0.34673366834170855, + "Dayak language": 0.3211009174311927, + "Sociology": 0.36693548387096775, + "Economy": 0.3463114754098361, + "Sundanese": 0.3465859982713915, + "Javanese": 0.30443548387096775, + "Civic education": 0.41630901287553645 + } + }, + "prompt_5": { + "accuracy": 0.3819347085920288, + "category_acc": { + "History": 0.3393574297188755, + "Geography": 0.35306122448979593, + "Lampungic": 0.25170068027210885, + "Social science": 0.5392320534223706, + "Balinese": 0.3205944798301486, + "Makassarese": 0.34408602150537637, + "Banjarese": 0.3819444444444444, + "Chemistry": 0.2759124087591241, + "Biology": 0.33964497041420116, + "Science": 0.478844169246646, + "Christian religion": 0.4577114427860697, + "Art": 0.43261231281198004, + "Islam religion": 0.44523470839260315, + "Hindu religion": 0.41333333333333333, + "Madurese": 0.3016949152542373, + "Sport": 0.36486486486486486, + "Indonesian language": 0.41594022415940224, + "Physics": 0.3151515151515151, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.3761467889908257, + "Sociology": 0.36088709677419356, + "Economy": 0.33811475409836067, + "Sundanese": 0.331028522039758, + "Javanese": 0.3014112903225806, + "Civic education": 0.4406294706723891 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.20144912312118174 + }, + "prompt_2": { + "bleu_score": 0.3236749740387827 + }, + "prompt_3": { + "bleu_score": 0.2825239676008666 + }, + "prompt_4": { + "bleu_score": 0.10745492516426794 + }, + "prompt_5": { + "bleu_score": 0.09255945159679849 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1403671322407855 + }, + "prompt_2": { + "bleu_score": 0.26941164557279623 + }, + "prompt_3": { + "bleu_score": 0.22040386977955975 + }, + "prompt_4": { + "bleu_score": 0.07014608963221754 + }, + "prompt_5": { + "bleu_score": 0.0682898011931918 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.08676849127344202 + }, + "prompt_2": { + "bleu_score": 0.22739596860360647 + }, + "prompt_3": { + "bleu_score": 0.23344993450444032 + }, + "prompt_4": { + "bleu_score": 0.08996197157720255 + }, + "prompt_5": { + "bleu_score": 0.08173731202492444 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.26235277005288704 + }, + "prompt_2": { + "bleu_score": 0.3127233287163815 + }, + "prompt_3": { + "bleu_score": 0.32294249796955077 + }, + "prompt_4": { + "bleu_score": 0.09631041045475984 + }, + "prompt_5": { + "bleu_score": 0.08102231125824294 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5670945157526255 + }, + "prompt_2": { + "accuracy": 0.4224037339556593 + }, + "prompt_3": { + "accuracy": 0.574095682613769 + }, + "prompt_4": { + "accuracy": 0.5682613768961493 + }, + "prompt_5": { + "accuracy": 0.5670945157526255 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5247050411154809, + "category_acc": { + "high_school_european_history": 0.6890243902439024, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.6325757575757576, + "medical_genetics": 0.5959595959595959, + "high_school_us_history": 0.7487684729064039, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.7415254237288136, + "virology": 0.45454545454545453, + "high_school_microeconomics": 0.5063291139240507, + "econometrics": 0.36283185840707965, + "college_computer_science": 0.4444444444444444, + "high_school_biology": 0.6310679611650486, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.398576512455516, + "philosophy": 0.6, + "professional_medicine": 0.5793357933579336, + "nutrition": 0.5573770491803278, + "global_facts": 0.3434343434343434, + "machine_learning": 0.42342342342342343, + "security_studies": 0.5737704918032787, + "public_relations": 0.5137614678899083, + "professional_psychology": 0.5220949263502455, + "prehistory": 0.5944272445820433, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.7135416666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6111111111111112, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.4880636604774536, + "human_aging": 0.6081081081081081, + "college_mathematics": 0.3838383838383838, + "high_school_psychology": 0.7040441176470589, + "formal_logic": 0.328, + "high_school_statistics": 0.4046511627906977, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.35315985130111527, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.49145299145299143, + "miscellaneous": 0.6265984654731458, + "high_school_chemistry": 0.5148514851485149, + "marketing": 0.7811158798283262, + "professional_law": 0.4116112198303979, + "management": 0.6372549019607843, + "college_physics": 0.36633663366336633, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.6058823529411764, + "sociology": 0.695, + "us_foreign_policy": 0.7373737373737373, + "high_school_macroeconomics": 0.4910025706940874, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.5768115942028985, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.5165562913907285, + "college_biology": 0.6363636363636364 + } + }, + "prompt_2": { + "accuracy": 0.430818734358241, + "category_acc": { + "high_school_european_history": 0.676829268292683, + "business_ethics": 0.4444444444444444, + "clinical_knowledge": 0.5606060606060606, + "medical_genetics": 0.4444444444444444, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.711864406779661, + "virology": 0.32727272727272727, + "high_school_microeconomics": 0.48523206751054854, + "econometrics": 0.3008849557522124, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.47249190938511326, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.41637010676156583, + "philosophy": 0.4935483870967742, + "professional_medicine": 0.5940959409594095, + "nutrition": 0.4819672131147541, + "global_facts": 0.23232323232323232, + "machine_learning": 0.32432432432432434, + "security_studies": 0.610655737704918, + "public_relations": 0.23853211009174313, + "professional_psychology": 0.3911620294599018, + "prehistory": 0.4674922600619195, + "anatomy": 0.26865671641791045, + "human_sexuality": 0.4307692307692308, + "college_medicine": 0.47674418604651164, + "high_school_government_and_politics": 0.4791666666666667, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.47530864197530864, + "high_school_geography": 0.48223350253807107, + "elementary_mathematics": 0.4854111405835544, + "human_aging": 0.30180180180180183, + "college_mathematics": 0.3939393939393939, + "high_school_psychology": 0.36764705882352944, + "formal_logic": 0.328, + "high_school_statistics": 0.4, + "international_law": 0.6333333333333333, + "high_school_mathematics": 0.3382899628252788, + "high_school_computer_science": 0.5151515151515151, + "conceptual_physics": 0.3888888888888889, + "miscellaneous": 0.3248081841432225, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.6995708154506438, + "professional_law": 0.4096542726679713, + "management": 0.4117647058823529, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.5794392523364486, + "world_religions": 0.36470588235294116, + "sociology": 0.395, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.43958868894601544, + "computer_security": 0.3838383838383838, + "moral_scenarios": 0.26174496644295303, + "moral_disputes": 0.5333333333333333, + "electrical_engineering": 0.4861111111111111, + "astronomy": 0.45695364238410596, + "college_biology": 0.46153846153846156 + } + }, + "prompt_3": { + "accuracy": 0.5510904540579192, + "category_acc": { + "high_school_european_history": 0.6951219512195121, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.6628787878787878, + "medical_genetics": 0.6161616161616161, + "high_school_us_history": 0.7339901477832512, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.7415254237288136, + "virology": 0.41818181818181815, + "high_school_microeconomics": 0.5654008438818565, + "econometrics": 0.3805309734513274, + "college_computer_science": 0.5252525252525253, + "high_school_biology": 0.6440129449838188, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.4234875444839858, + "philosophy": 0.5806451612903226, + "professional_medicine": 0.6199261992619927, + "nutrition": 0.6426229508196721, + "global_facts": 0.40404040404040403, + "machine_learning": 0.46846846846846846, + "security_studies": 0.6065573770491803, + "public_relations": 0.5412844036697247, + "professional_psychology": 0.55810147299509, + "prehistory": 0.631578947368421, + "anatomy": 0.5223880597014925, + "human_sexuality": 0.6538461538461539, + "college_medicine": 0.5872093023255814, + "high_school_government_and_politics": 0.7447916666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.6802030456852792, + "elementary_mathematics": 0.493368700265252, + "human_aging": 0.5855855855855856, + "college_mathematics": 0.3939393939393939, + "high_school_psychology": 0.7297794117647058, + "formal_logic": 0.392, + "high_school_statistics": 0.4418604651162791, + "international_law": 0.7416666666666667, + "high_school_mathematics": 0.3308550185873606, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7404092071611253, + "high_school_chemistry": 0.46534653465346537, + "marketing": 0.7854077253218884, + "professional_law": 0.40574037834311805, + "management": 0.7254901960784313, + "college_physics": 0.36633663366336633, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.711764705882353, + "sociology": 0.75, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.5526992287917738, + "computer_security": 0.6363636363636364, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.6028985507246377, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5629139072847682, + "college_biology": 0.6153846153846154 + } + }, + "prompt_4": { + "accuracy": 0.5411512334644262, + "category_acc": { + "high_school_european_history": 0.6585365853658537, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.6136363636363636, + "medical_genetics": 0.6464646464646465, + "high_school_us_history": 0.7339901477832512, + "high_school_physics": 0.4066666666666667, + "high_school_world_history": 0.7161016949152542, + "virology": 0.43636363636363634, + "high_school_microeconomics": 0.6075949367088608, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.6343042071197411, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.40569395017793597, + "philosophy": 0.6129032258064516, + "professional_medicine": 0.5830258302583026, + "nutrition": 0.5868852459016394, + "global_facts": 0.3838383838383838, + "machine_learning": 0.45045045045045046, + "security_studies": 0.6311475409836066, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.5237315875613748, + "prehistory": 0.5913312693498453, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.676923076923077, + "college_medicine": 0.5406976744186046, + "high_school_government_and_politics": 0.7239583333333334, + "college_chemistry": 0.43434343434343436, + "logical_fallacies": 0.6111111111111112, + "high_school_geography": 0.6802030456852792, + "elementary_mathematics": 0.5411140583554377, + "human_aging": 0.6126126126126126, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7261029411764706, + "formal_logic": 0.352, + "high_school_statistics": 0.5023255813953489, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.34572490706319703, + "high_school_computer_science": 0.5353535353535354, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7007672634271099, + "high_school_chemistry": 0.48514851485148514, + "marketing": 0.776824034334764, + "professional_law": 0.3770384866275277, + "management": 0.7450980392156863, + "college_physics": 0.46534653465346537, + "jurisprudence": 0.5981308411214953, + "world_religions": 0.7294117647058823, + "sociology": 0.77, + "us_foreign_policy": 0.7777777777777778, + "high_school_macroeconomics": 0.5038560411311054, + "computer_security": 0.5959595959595959, + "moral_scenarios": 0.2483221476510067, + "moral_disputes": 0.5826086956521739, + "electrical_engineering": 0.4791666666666667, + "astronomy": 0.5695364238410596, + "college_biology": 0.5944055944055944 + } + }, + "prompt_5": { + "accuracy": 0.5409367179120487, + "category_acc": { + "high_school_european_history": 0.6585365853658537, + "business_ethics": 0.6161616161616161, + "clinical_knowledge": 0.6022727272727273, + "medical_genetics": 0.6363636363636364, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.4, + "high_school_world_history": 0.7330508474576272, + "virology": 0.4666666666666667, + "high_school_microeconomics": 0.5443037974683544, + "econometrics": 0.37168141592920356, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.6343042071197411, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.40213523131672596, + "philosophy": 0.6, + "professional_medicine": 0.5867158671586716, + "nutrition": 0.6032786885245902, + "global_facts": 0.3333333333333333, + "machine_learning": 0.45045045045045046, + "security_studies": 0.5983606557377049, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.5155482815057283, + "prehistory": 0.6191950464396285, + "anatomy": 0.5149253731343284, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.5290697674418605, + "high_school_government_and_politics": 0.7239583333333334, + "college_chemistry": 0.35353535353535354, + "logical_fallacies": 0.6419753086419753, + "high_school_geography": 0.7208121827411168, + "elementary_mathematics": 0.46684350132625996, + "human_aging": 0.6666666666666666, + "college_mathematics": 0.37373737373737376, + "high_school_psychology": 0.7279411764705882, + "formal_logic": 0.312, + "high_school_statistics": 0.40930232558139534, + "international_law": 0.6833333333333333, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.5, + "miscellaneous": 0.7404092071611253, + "high_school_chemistry": 0.46534653465346537, + "marketing": 0.7939914163090128, + "professional_law": 0.395955642530985, + "management": 0.6666666666666666, + "college_physics": 0.39603960396039606, + "jurisprudence": 0.6728971962616822, + "world_religions": 0.7294117647058823, + "sociology": 0.71, + "us_foreign_policy": 0.7474747474747475, + "high_school_macroeconomics": 0.48586118251928023, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.2651006711409396, + "moral_disputes": 0.6202898550724638, + "electrical_engineering": 0.4930555555555556, + "astronomy": 0.5562913907284768, + "college_biology": 0.6083916083916084 + } + } }, - "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "c_eval": { + "prompt_1": { + "accuracy": 0.6708766716196136 + }, + "prompt_2": { + "accuracy": 0.6158989598811293 + }, + "prompt_3": { + "accuracy": 0.5950965824665676 + }, + "prompt_4": { + "accuracy": 0.62481426448737 + }, + "prompt_5": { + "accuracy": 0.5861812778603269 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6805728518057285, + "category_acc": { + "computer_network": 0.5833333333333334, + "operating_system": 0.375, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.6190476190476191, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.5862068965517241, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.6896551724137931, + "high_school_mathematics": 0.4782608695652174, + "high_school_physics": 0.75, + "high_school_chemistry": 0.7083333333333334, + "high_school_biology": 0.8333333333333334, + "middle_school_mathematics": 0.8333333333333334, + "middle_school_biology": 0.9615384615384616, + "middle_school_physics": 0.9166666666666666, + "middle_school_chemistry": 1.0, + "veterinary_medicine": 0.7142857142857143, + "college_economics": 0.5666666666666667, + "business_administration": 0.6052631578947368, + "marxism": 0.875, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.8235294117647058, + "teacher_qualification": 0.7755102040816326, + "high_school_politics": 0.9583333333333334, + "high_school_geography": 0.7916666666666666, + "middle_school_politics": 0.9615384615384616, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.8571428571428571, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.4444444444444444, + "law": 0.6896551724137931, + "chinese_language_and_literature": 0.6785714285714286, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7647058823529411, + "legal_professional": 0.6428571428571429, + "high_school_chinese": 0.7916666666666666, + "high_school_history": 0.88, + "middle_school_history": 0.9629629629629629, + "civil_servant": 0.5769230769230769, + "sports_science": 0.75, + "plant_protection": 0.8148148148148148, + "basic_medicine": 0.75, + "clinical_medicine": 0.5925925925925926, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.6666666666666666, + "fire_engineer": 0.6111111111111112, + "environmental_impact_assessment_engineer": 0.6666666666666666, + "tax_accountant": 0.5925925925925926, + "physician": 0.6481481481481481 + } + }, + "prompt_2": { + "accuracy": 0.6170610211706102, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.5, + "college_programming": 0.6190476190476191, + "college_physics": 0.25, + "college_chemistry": 0.4827586206896552, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.43478260869565216, + "high_school_physics": 0.7083333333333334, + "high_school_chemistry": 0.625, + "high_school_biology": 0.7916666666666666, + "middle_school_mathematics": 0.6666666666666666, + "middle_school_biology": 0.9615384615384616, + "middle_school_physics": 0.7916666666666666, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.7142857142857143, + "college_economics": 0.6166666666666667, + "business_administration": 0.6052631578947368, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.7352941176470589, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.9583333333333334, + "high_school_geography": 0.75, + "middle_school_politics": 0.8846153846153846, + "middle_school_geography": 0.8823529411764706, + "modern_chinese_history": 0.7142857142857143, + "ideological_and_moral_cultivation": 0.8333333333333334, + "logic": 0.4444444444444444, + "law": 0.5172413793103449, + "chinese_language_and_literature": 0.6071428571428571, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7647058823529411, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.7916666666666666, + "high_school_history": 0.92, + "middle_school_history": 0.9629629629629629, + "civil_servant": 0.5576923076923077, + "sports_science": 0.5833333333333334, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.75, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.5, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.42592592592592593, + "physician": 0.6851851851851852 + } + }, + "prompt_3": { + "accuracy": 0.5828144458281445, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.6428571428571429, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.047619047619047616, + "electrical_engineer": 0.38095238095238093, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.43478260869565216, + "high_school_physics": 0.625, + "high_school_chemistry": 0.5416666666666666, + "high_school_biology": 0.625, + "middle_school_mathematics": 0.5416666666666666, + "middle_school_biology": 0.8846153846153846, + "middle_school_physics": 0.7083333333333334, + "middle_school_chemistry": 0.72, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.5833333333333334, + "business_administration": 0.5263157894736842, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.7931034482758621, + "education_science": 0.7647058823529411, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.8333333333333334, + "high_school_geography": 0.8333333333333334, + "middle_school_politics": 0.8461538461538461, + "middle_school_geography": 0.8235294117647058, + "modern_chinese_history": 0.6428571428571429, + "ideological_and_moral_cultivation": 0.875, + "logic": 0.4074074074074074, + "law": 0.4482758620689655, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.6052631578947368, + "professional_tour_guide": 0.7058823529411765, + "legal_professional": 0.5, + "high_school_chinese": 0.5, + "high_school_history": 0.76, + "middle_school_history": 0.7407407407407407, + "civil_servant": 0.5384615384615384, + "sports_science": 0.5833333333333334, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.75, + "clinical_medicine": 0.5925925925925926, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.46296296296296297, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.5277777777777778, + "tax_accountant": 0.46296296296296297, + "physician": 0.5740740740740741 + } + }, + "prompt_4": { + "accuracy": 0.6444582814445828, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.5, + "computer_architecture": 0.6153846153846154, + "college_programming": 0.5952380952380952, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.5172413793103449, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.4523809523809524, + "metrology_engineer": 0.5862068965517241, + "high_school_mathematics": 0.4782608695652174, + "high_school_physics": 0.5416666666666666, + "high_school_chemistry": 0.625, + "high_school_biology": 0.7083333333333334, + "middle_school_mathematics": 0.75, + "middle_school_biology": 0.9615384615384616, + "middle_school_physics": 0.6666666666666666, + "middle_school_chemistry": 0.84, + "veterinary_medicine": 0.6428571428571429, + "college_economics": 0.5166666666666667, + "business_administration": 0.631578947368421, + "marxism": 0.7083333333333334, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.8235294117647058, + "teacher_qualification": 0.7551020408163265, + "high_school_politics": 0.9583333333333334, + "high_school_geography": 0.7916666666666666, + "middle_school_politics": 0.9230769230769231, + "middle_school_geography": 0.8823529411764706, + "modern_chinese_history": 0.6785714285714286, + "ideological_and_moral_cultivation": 0.875, + "logic": 0.5185185185185185, + "law": 0.5862068965517241, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.7647058823529411, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.7083333333333334, + "high_school_history": 0.84, + "middle_school_history": 0.7777777777777778, + "civil_servant": 0.5769230769230769, + "sports_science": 0.7083333333333334, + "plant_protection": 0.7777777777777778, + "basic_medicine": 0.7916666666666666, + "clinical_medicine": 0.6666666666666666, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.5925925925925926, + "fire_engineer": 0.5833333333333334, + "environmental_impact_assessment_engineer": 0.5833333333333334, + "tax_accountant": 0.5555555555555556, + "physician": 0.6851851851851852 + } + }, + "prompt_5": { + "accuracy": 0.5927770859277709, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.5238095238095238, + "college_physics": 0.25, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.6666666666666666, + "high_school_chemistry": 0.7083333333333334, + "high_school_biology": 0.875, + "middle_school_mathematics": 0.625, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.8333333333333334, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.4666666666666667, + "business_administration": 0.5, + "marxism": 0.8333333333333334, + "mao_zedong_thought": 0.7241379310344828, + "education_science": 0.6764705882352942, + "teacher_qualification": 0.6326530612244898, + "high_school_politics": 0.9166666666666666, + "high_school_geography": 0.7916666666666666, + "middle_school_politics": 0.8461538461538461, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.75, + "ideological_and_moral_cultivation": 0.7083333333333334, + "logic": 0.37037037037037035, + "law": 0.6551724137931034, + "chinese_language_and_literature": 0.5, + "art_studies": 0.631578947368421, + "professional_tour_guide": 0.7352941176470589, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.7083333333333334, + "high_school_history": 0.84, + "middle_school_history": 0.9259259259259259, + "civil_servant": 0.5576923076923077, + "sports_science": 0.625, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.625, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.6274509803921569, + "accountant": 0.6481481481481481, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.5, + "physician": 0.5555555555555556 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7132616487455197 + }, + "prompt_2": { + "accuracy": 0.6774193548387096 + }, + "prompt_3": { + "accuracy": 0.6129032258064516 + }, + "prompt_4": { + "accuracy": 0.6881720430107527 + }, + "prompt_5": { + "accuracy": 0.6630824372759857 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6895182179243654, + "category_acc": { + "agronomy": 0.6035502958579881, + "anatomy": 0.6959459459459459, + "ancient_chinese": 0.3719512195121951, + "arts": 0.875, + "astronomy": 0.42424242424242425, + "business_ethics": 0.6507177033492823, + "chinese_civil_service_exam": 0.64375, + "chinese_driving_rule": 0.9007633587786259, + "chinese_food_culture": 0.6544117647058824, + "chinese_foreign_policy": 0.794392523364486, + "chinese_history": 0.9071207430340558, + "chinese_literature": 0.5686274509803921, + "chinese_teacher_qualification": 0.8491620111731844, + "clinical_knowledge": 0.6582278481012658, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.8130841121495327, + "college_engineering_hydrology": 0.5849056603773585, + "college_law": 0.6666666666666666, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.5943396226415094, + "college_medicine": 0.7435897435897436, + "computer_science": 0.7009803921568627, + "computer_security": 0.8128654970760234, + "conceptual_physics": 0.8299319727891157, + "construction_project_management": 0.5467625899280576, + "economics": 0.6415094339622641, + "education": 0.7177914110429447, + "electrical_engineering": 0.7093023255813954, + "elementary_chinese": 0.7063492063492064, + "elementary_commonsense": 0.6868686868686869, + "elementary_information_and_technology": 0.8529411764705882, + "elementary_mathematics": 0.508695652173913, + "ethnology": 0.6814814814814815, + "food_science": 0.5804195804195804, + "genetics": 0.5397727272727273, + "global_facts": 0.6912751677852349, + "high_school_biology": 0.8698224852071006, + "high_school_chemistry": 0.7803030303030303, + "high_school_geography": 0.7457627118644068, + "high_school_mathematics": 0.4878048780487805, + "high_school_physics": 0.6909090909090909, + "high_school_politics": 0.7412587412587412, + "human_sexuality": 0.6349206349206349, + "international_law": 0.5567567567567567, + "journalism": 0.6046511627906976, + "jurisprudence": 0.7007299270072993, + "legal_and_moral_basis": 0.9485981308411215, + "logical": 0.5609756097560976, + "machine_learning": 0.5409836065573771, + "management": 0.7619047619047619, + "marketing": 0.7222222222222222, + "marxist_theory": 0.8888888888888888, + "modern_chinese": 0.5172413793103449, + "nutrition": 0.6482758620689655, + "philosophy": 0.7142857142857143, + "professional_accounting": 0.8057142857142857, + "professional_law": 0.6113744075829384, + "professional_medicine": 0.6196808510638298, + "professional_psychology": 0.7887931034482759, + "public_relations": 0.6666666666666666, + "security_study": 0.7851851851851852, + "sociology": 0.6769911504424779, + "sports_science": 0.6666666666666666, + "traditional_chinese_medicine": 0.745945945945946, + "virology": 0.7159763313609467, + "world_history": 0.8757763975155279, + "world_religions": 0.7125 + } + }, + "prompt_2": { + "accuracy": 0.6346054222068728, + "category_acc": { + "agronomy": 0.5384615384615384, + "anatomy": 0.6486486486486487, + "ancient_chinese": 0.3048780487804878, + "arts": 0.86875, + "astronomy": 0.40606060606060607, + "business_ethics": 0.5980861244019139, + "chinese_civil_service_exam": 0.56875, + "chinese_driving_rule": 0.8625954198473282, + "chinese_food_culture": 0.6029411764705882, + "chinese_foreign_policy": 0.7850467289719626, + "chinese_history": 0.8637770897832817, + "chinese_literature": 0.5588235294117647, + "chinese_teacher_qualification": 0.8156424581005587, + "clinical_knowledge": 0.4978902953586498, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.7850467289719626, + "college_engineering_hydrology": 0.5943396226415094, + "college_law": 0.5555555555555556, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.5471698113207547, + "college_medicine": 0.6410256410256411, + "computer_science": 0.6421568627450981, + "computer_security": 0.7660818713450293, + "conceptual_physics": 0.7346938775510204, + "construction_project_management": 0.5035971223021583, + "economics": 0.6477987421383647, + "education": 0.6993865030674846, + "electrical_engineering": 0.6744186046511628, + "elementary_chinese": 0.6349206349206349, + "elementary_commonsense": 0.6616161616161617, + "elementary_information_and_technology": 0.8109243697478992, + "elementary_mathematics": 0.43043478260869567, + "ethnology": 0.6370370370370371, + "food_science": 0.5734265734265734, + "genetics": 0.5056818181818182, + "global_facts": 0.6644295302013423, + "high_school_biology": 0.7159763313609467, + "high_school_chemistry": 0.5378787878787878, + "high_school_geography": 0.6779661016949152, + "high_school_mathematics": 0.4329268292682927, + "high_school_physics": 0.5363636363636364, + "high_school_politics": 0.7482517482517482, + "human_sexuality": 0.6031746031746031, + "international_law": 0.5027027027027027, + "journalism": 0.5755813953488372, + "jurisprudence": 0.635036496350365, + "legal_and_moral_basis": 0.9299065420560748, + "logical": 0.45528455284552843, + "machine_learning": 0.5081967213114754, + "management": 0.7714285714285715, + "marketing": 0.6944444444444444, + "marxist_theory": 0.8571428571428571, + "modern_chinese": 0.45689655172413796, + "nutrition": 0.593103448275862, + "philosophy": 0.6857142857142857, + "professional_accounting": 0.68, + "professional_law": 0.5023696682464455, + "professional_medicine": 0.5, + "professional_psychology": 0.7758620689655172, + "public_relations": 0.6264367816091954, + "security_study": 0.725925925925926, + "sociology": 0.6415929203539823, + "sports_science": 0.6424242424242425, + "traditional_chinese_medicine": 0.654054054054054, + "virology": 0.6390532544378699, + "world_history": 0.8633540372670807, + "world_religions": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.6191504058021067, + "category_acc": { + "agronomy": 0.514792899408284, + "anatomy": 0.5945945945945946, + "ancient_chinese": 0.3475609756097561, + "arts": 0.84375, + "astronomy": 0.40606060606060607, + "business_ethics": 0.5741626794258373, + "chinese_civil_service_exam": 0.475, + "chinese_driving_rule": 0.8931297709923665, + "chinese_food_culture": 0.6176470588235294, + "chinese_foreign_policy": 0.7663551401869159, + "chinese_history": 0.8328173374613003, + "chinese_literature": 0.47058823529411764, + "chinese_teacher_qualification": 0.8100558659217877, + "clinical_knowledge": 0.6118143459915611, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.8130841121495327, + "college_engineering_hydrology": 0.5377358490566038, + "college_law": 0.5925925925925926, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.5188679245283019, + "college_medicine": 0.6886446886446886, + "computer_science": 0.6764705882352942, + "computer_security": 0.7192982456140351, + "conceptual_physics": 0.6802721088435374, + "construction_project_management": 0.4748201438848921, + "economics": 0.610062893081761, + "education": 0.6441717791411042, + "electrical_engineering": 0.6569767441860465, + "elementary_chinese": 0.5912698412698413, + "elementary_commonsense": 0.6363636363636364, + "elementary_information_and_technology": 0.7899159663865546, + "elementary_mathematics": 0.4782608695652174, + "ethnology": 0.5851851851851851, + "food_science": 0.5734265734265734, + "genetics": 0.5284090909090909, + "global_facts": 0.6510067114093959, + "high_school_biology": 0.5798816568047337, + "high_school_chemistry": 0.4696969696969697, + "high_school_geography": 0.6779661016949152, + "high_school_mathematics": 0.40853658536585363, + "high_school_physics": 0.509090909090909, + "high_school_politics": 0.6923076923076923, + "human_sexuality": 0.5793650793650794, + "international_law": 0.5297297297297298, + "journalism": 0.563953488372093, + "jurisprudence": 0.5985401459854015, + "legal_and_moral_basis": 0.9018691588785047, + "logical": 0.4796747967479675, + "machine_learning": 0.4672131147540984, + "management": 0.7523809523809524, + "marketing": 0.65, + "marxist_theory": 0.798941798941799, + "modern_chinese": 0.4051724137931034, + "nutrition": 0.6068965517241379, + "philosophy": 0.6666666666666666, + "professional_accounting": 0.68, + "professional_law": 0.5118483412322274, + "professional_medicine": 0.550531914893617, + "professional_psychology": 0.7629310344827587, + "public_relations": 0.6149425287356322, + "security_study": 0.6962962962962963, + "sociology": 0.6106194690265486, + "sports_science": 0.6060606060606061, + "traditional_chinese_medicine": 0.654054054054054, + "virology": 0.6745562130177515, + "world_history": 0.7763975155279503, + "world_religions": 0.65625 + } + }, + "prompt_4": { + "accuracy": 0.6460024175444655, + "category_acc": { + "agronomy": 0.5266272189349113, + "anatomy": 0.6148648648648649, + "ancient_chinese": 0.36585365853658536, + "arts": 0.83125, + "astronomy": 0.42424242424242425, + "business_ethics": 0.6267942583732058, + "chinese_civil_service_exam": 0.5375, + "chinese_driving_rule": 0.8625954198473282, + "chinese_food_culture": 0.6176470588235294, + "chinese_foreign_policy": 0.7663551401869159, + "chinese_history": 0.8452012383900929, + "chinese_literature": 0.4852941176470588, + "chinese_teacher_qualification": 0.8491620111731844, + "clinical_knowledge": 0.6286919831223629, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.7850467289719626, + "college_engineering_hydrology": 0.5377358490566038, + "college_law": 0.6481481481481481, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.5377358490566038, + "college_medicine": 0.7032967032967034, + "computer_science": 0.6323529411764706, + "computer_security": 0.7719298245614035, + "conceptual_physics": 0.7278911564625851, + "construction_project_management": 0.5323741007194245, + "economics": 0.5786163522012578, + "education": 0.6625766871165644, + "electrical_engineering": 0.6395348837209303, + "elementary_chinese": 0.6626984126984127, + "elementary_commonsense": 0.6666666666666666, + "elementary_information_and_technology": 0.8025210084033614, + "elementary_mathematics": 0.4826086956521739, + "ethnology": 0.6296296296296297, + "food_science": 0.5944055944055944, + "genetics": 0.5, + "global_facts": 0.6644295302013423, + "high_school_biology": 0.7751479289940828, + "high_school_chemistry": 0.6363636363636364, + "high_school_geography": 0.7033898305084746, + "high_school_mathematics": 0.4451219512195122, + "high_school_physics": 0.6181818181818182, + "high_school_politics": 0.7132867132867133, + "human_sexuality": 0.5634920634920635, + "international_law": 0.5243243243243243, + "journalism": 0.563953488372093, + "jurisprudence": 0.6739659367396593, + "legal_and_moral_basis": 0.9158878504672897, + "logical": 0.5121951219512195, + "machine_learning": 0.5327868852459017, + "management": 0.7571428571428571, + "marketing": 0.6666666666666666, + "marxist_theory": 0.8306878306878307, + "modern_chinese": 0.5258620689655172, + "nutrition": 0.6344827586206897, + "philosophy": 0.6571428571428571, + "professional_accounting": 0.7828571428571428, + "professional_law": 0.5545023696682464, + "professional_medicine": 0.5824468085106383, + "professional_psychology": 0.7586206896551724, + "public_relations": 0.6494252873563219, + "security_study": 0.7111111111111111, + "sociology": 0.6106194690265486, + "sports_science": 0.6424242424242425, + "traditional_chinese_medicine": 0.6756756756756757, + "virology": 0.6863905325443787, + "world_history": 0.8136645962732919, + "world_religions": 0.64375 + } + }, + "prompt_5": { + "accuracy": 0.6078397513382835, + "category_acc": { + "agronomy": 0.5443786982248521, + "anatomy": 0.49324324324324326, + "ancient_chinese": 0.38414634146341464, + "arts": 0.79375, + "astronomy": 0.37575757575757573, + "business_ethics": 0.5311004784688995, + "chinese_civil_service_exam": 0.575, + "chinese_driving_rule": 0.7480916030534351, + "chinese_food_culture": 0.5735294117647058, + "chinese_foreign_policy": 0.7570093457943925, + "chinese_history": 0.8018575851393189, + "chinese_literature": 0.5, + "chinese_teacher_qualification": 0.7988826815642458, + "clinical_knowledge": 0.5443037974683544, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.719626168224299, + "college_engineering_hydrology": 0.44339622641509435, + "college_law": 0.4722222222222222, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.5754716981132075, + "college_medicine": 0.5567765567765568, + "computer_science": 0.6225490196078431, + "computer_security": 0.7602339181286549, + "conceptual_physics": 0.8503401360544217, + "construction_project_management": 0.4460431654676259, + "economics": 0.610062893081761, + "education": 0.6196319018404908, + "electrical_engineering": 0.6162790697674418, + "elementary_chinese": 0.6984126984126984, + "elementary_commonsense": 0.5959595959595959, + "elementary_information_and_technology": 0.8067226890756303, + "elementary_mathematics": 0.4826086956521739, + "ethnology": 0.6074074074074074, + "food_science": 0.5034965034965035, + "genetics": 0.48863636363636365, + "global_facts": 0.6442953020134228, + "high_school_biology": 0.7810650887573964, + "high_school_chemistry": 0.6590909090909091, + "high_school_geography": 0.6610169491525424, + "high_school_mathematics": 0.38414634146341464, + "high_school_physics": 0.5727272727272728, + "high_school_politics": 0.7272727272727273, + "human_sexuality": 0.5396825396825397, + "international_law": 0.4918918918918919, + "journalism": 0.5058139534883721, + "jurisprudence": 0.6399026763990268, + "legal_and_moral_basis": 0.9018691588785047, + "logical": 0.5203252032520326, + "machine_learning": 0.5491803278688525, + "management": 0.7095238095238096, + "marketing": 0.6277777777777778, + "marxist_theory": 0.8148148148148148, + "modern_chinese": 0.5689655172413793, + "nutrition": 0.5793103448275863, + "philosophy": 0.6095238095238096, + "professional_accounting": 0.6914285714285714, + "professional_law": 0.5450236966824644, + "professional_medicine": 0.4946808510638298, + "professional_psychology": 0.6896551724137931, + "public_relations": 0.5287356321839081, + "security_study": 0.6888888888888889, + "sociology": 0.588495575221239, + "sports_science": 0.5454545454545454, + "traditional_chinese_medicine": 0.6108108108108108, + "virology": 0.5621301775147929, + "world_history": 0.7391304347826086, + "world_religions": 0.61875 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.3333333333333333 + }, + "prompt_2": { + "accuracy": 0.3939393939393939 + }, + "prompt_3": { + "accuracy": 0.36363636363636365 + }, + "prompt_4": { + "accuracy": 0.36363636363636365 + }, + "prompt_5": { + "accuracy": 0.24242424242424243 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.39090909090909093 + }, + "prompt_2": { + "accuracy": 0.42954545454545456 + }, + "prompt_3": { + "accuracy": 0.4090909090909091 + }, + "prompt_4": { + "accuracy": 0.32045454545454544 + }, + "prompt_5": { + "accuracy": 0.33181818181818185 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.4976271186440678 + }, + "prompt_2": { + "accuracy": 0.49491525423728816 + }, + "prompt_3": { + "accuracy": 0.5145762711864407 + }, + "prompt_4": { + "accuracy": 0.5 + }, + "prompt_5": { + "accuracy": 0.4888135593220339 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8904263275991025 + }, + "prompt_2": { + "accuracy": 0.8799551234106208 + }, + "prompt_3": { + "accuracy": 0.8881824981301422 + }, + "prompt_4": { + "accuracy": 0.8818249813014211 + }, + "prompt_5": { + "accuracy": 0.8406881077038145 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8579127878490935 + }, + "prompt_2": { + "accuracy": 0.8755512003919648 + }, + "prompt_3": { + "accuracy": 0.8873101420872122 + }, + "prompt_4": { + "accuracy": 0.8603625673689368 + }, + "prompt_5": { + "accuracy": 0.8510534051935326 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.3173244890558025, + "rouge2": 0.12276701375409116, + "rougeL": 0.23933387535450873, + "avg_rouge": 0.2264751260548008 + }, + "prompt_2": { + "rouge1": 0.3733921267158601, + "rouge2": 0.14873897061348443, + "rougeL": 0.28973747198256183, + "avg_rouge": 0.2706228564373021 + }, + "prompt_3": { + "rouge1": 0.3369060638939358, + "rouge2": 0.13047468190390468, + "rougeL": 0.25785781169798827, + "avg_rouge": 0.24174618583194293 + }, + "prompt_4": { + "rouge1": 0.36057607993213053, + "rouge2": 0.14133748351769226, + "rougeL": 0.279001046468824, + "avg_rouge": 0.26030486997288227 + }, + "prompt_5": { + "rouge1": 0.3646896376375478, + "rouge2": 0.14159313833442183, + "rougeL": 0.2802971112423251, + "avg_rouge": 0.2621932957380983 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.2034139875913786, + "rouge2": 0.05931771136166532, + "rougeL": 0.1516602223561059, + "avg_rouge": 0.13813064043638326 + }, + "prompt_2": { + "rouge1": 0.20872060885353214, + "rouge2": 0.06099702619280877, + "rougeL": 0.1547822063677178, + "avg_rouge": 0.14149994713801958 + }, + "prompt_3": { + "rouge1": 0.21635782408481302, + "rouge2": 0.06231398905593447, + "rougeL": 0.1596578498817604, + "avg_rouge": 0.1461098876741693 + }, + "prompt_4": { + "rouge1": 0.20745526883072773, + "rouge2": 0.05956384398797949, + "rougeL": 0.15265879380970593, + "avg_rouge": 0.13989263554280437 + }, + "prompt_5": { + "rouge1": 0.20009960677707003, + "rouge2": 0.054725430473660366, + "rougeL": 0.15077180792870118, + "avg_rouge": 0.13519894839314386 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7821100917431193 + }, + "prompt_2": { + "accuracy": 0.9059633027522935 + }, + "prompt_3": { + "accuracy": 0.9162844036697247 + }, + "prompt_4": { + "accuracy": 0.7121559633027523 + }, + "prompt_5": { + "accuracy": 0.9323394495412844 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7823585810162992 + }, + "prompt_2": { + "accuracy": 0.7593480345158198 + }, + "prompt_3": { + "accuracy": 0.7785234899328859 + }, + "prompt_4": { + "accuracy": 0.7833173537871524 + }, + "prompt_5": { + "accuracy": 0.7986577181208053 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.697 + }, + "prompt_2": { + "accuracy": 0.758 + }, + "prompt_3": { + "accuracy": 0.745 + }, + "prompt_4": { + "accuracy": 0.76 + }, + "prompt_5": { + "accuracy": 0.7065 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6285 + }, + "prompt_2": { + "accuracy": 0.6485 + }, + "prompt_3": { + "accuracy": 0.6255 + }, + "prompt_4": { + "accuracy": 0.636 + }, + "prompt_5": { + "accuracy": 0.618 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.588 + }, + "prompt_2": { + "accuracy": 0.6255 + }, + "prompt_3": { + "accuracy": 0.6275 + }, + "prompt_4": { + "accuracy": 0.7035 + }, + "prompt_5": { + "accuracy": 0.5555 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5492957746478874 + }, + "prompt_2": { + "accuracy": 0.5915492957746479 + }, + "prompt_3": { + "accuracy": 0.5211267605633803 + }, + "prompt_4": { + "accuracy": 0.4788732394366197 + }, + "prompt_5": { + "accuracy": 0.5492957746478874 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7509025270758123 + }, + "prompt_2": { + "accuracy": 0.8303249097472925 + }, + "prompt_3": { + "accuracy": 0.7689530685920578 + }, + "prompt_4": { + "accuracy": 0.6498194945848376 + }, + "prompt_5": { + "accuracy": 0.6787003610108303 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7230392156862745 + }, + "prompt_2": { + "accuracy": 0.7475490196078431 + }, + "prompt_3": { + "accuracy": 0.7107843137254902 + }, + "prompt_4": { + "accuracy": 0.6911764705882353 + }, + "prompt_5": { + "accuracy": 0.6936274509803921 + } } }, "five_shot": { @@ -15214,235 +135091,3250 @@ "model_link": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat", "zero_shot": { "cross_mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.5495238095238095, + "language_acc": { + "Vietnamese": 0.54, + "Malay": 0.38666666666666666, + "Filipino": 0.48, + "Indonesian": 0.5666666666666667, + "Chinese": 0.64, + "Spanish": 0.56, + "English": 0.6733333333333333 + }, + "consistency_score_2": 0.5247619047619048, + "consistency_score_3": 0.3497142857142857, + "consistency_score_4": 0.2558095238095238, + "consistency_score_5": 0.19619047619047617, + "consistency_score_6": 0.15523809523809523, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Filipino": 0.44, + "Vietnamese,Indonesian": 0.54, + "Vietnamese,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.5266666666666666, + "Vietnamese,English": 0.5666666666666667, + "Malay,Filipino": 0.49333333333333335, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Chinese": 0.44666666666666666, + "Malay,Spanish": 0.4666666666666667, + "Malay,English": 0.49333333333333335, + "Filipino,Indonesian": 0.49333333333333335, + "Filipino,Chinese": 0.47333333333333333, + "Filipino,Spanish": 0.46, + "Filipino,English": 0.5066666666666667, + "Indonesian,Chinese": 0.5933333333333334, + "Indonesian,Spanish": 0.5133333333333333, + "Indonesian,English": 0.58, + "Chinese,Spanish": 0.5333333333333333, + "Chinese,English": 0.6133333333333333, + "Spanish,English": 0.6533333333333333 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.3, + "Vietnamese,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Malay,Chinese": 0.32, + "Vietnamese,Malay,Spanish": 0.34, + "Vietnamese,Malay,English": 0.35333333333333333, + "Vietnamese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Filipino,Chinese": 0.2866666666666667, + "Vietnamese,Filipino,Spanish": 0.29333333333333333, + "Vietnamese,Filipino,English": 0.32, + "Vietnamese,Indonesian,Chinese": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish": 0.34, + "Vietnamese,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish": 0.35333333333333333, + "Vietnamese,Chinese,English": 0.4066666666666667, + "Vietnamese,Spanish,English": 0.41333333333333333, + "Malay,Filipino,Indonesian": 0.34, + "Malay,Filipino,Chinese": 0.3, + "Malay,Filipino,Spanish": 0.30666666666666664, + "Malay,Filipino,English": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.36666666666666664, + "Malay,Indonesian,Spanish": 0.32666666666666666, + "Malay,Indonesian,English": 0.38, + "Malay,Chinese,Spanish": 0.31333333333333335, + "Malay,Chinese,English": 0.32666666666666666, + "Malay,Spanish,English": 0.36666666666666664, + "Filipino,Indonesian,Chinese": 0.32666666666666666, + "Filipino,Indonesian,Spanish": 0.30666666666666664, + "Filipino,Indonesian,English": 0.34, + "Filipino,Chinese,Spanish": 0.32, + "Filipino,Chinese,English": 0.3466666666666667, + "Filipino,Spanish,English": 0.37333333333333335, + "Indonesian,Chinese,Spanish": 0.38, + "Indonesian,Chinese,English": 0.42, + "Indonesian,Spanish,English": 0.41333333333333333, + "Chinese,Spanish,English": 0.4533333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Chinese": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Spanish": 0.22, + "Vietnamese,Malay,Filipino,English": 0.24, + "Vietnamese,Malay,Indonesian,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,English": 0.31333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Chinese,English": 0.26666666666666666, + "Vietnamese,Malay,Spanish,English": 0.28, + "Vietnamese,Filipino,Indonesian,Chinese": 0.22, + "Vietnamese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.24, + "Vietnamese,Filipino,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Filipino,Spanish,English": 0.24, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.30666666666666664, + "Vietnamese,Indonesian,Spanish,English": 0.29333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.31333333333333335, + "Malay,Filipino,Indonesian,Chinese": 0.24666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Malay,Filipino,Indonesian,English": 0.25333333333333335, + "Malay,Filipino,Chinese,Spanish": 0.21333333333333335, + "Malay,Filipino,Chinese,English": 0.22, + "Malay,Filipino,Spanish,English": 0.25333333333333335, + "Malay,Indonesian,Chinese,Spanish": 0.26, + "Malay,Indonesian,Chinese,English": 0.28, + "Malay,Indonesian,Spanish,English": 0.28, + "Malay,Chinese,Spanish,English": 0.2733333333333333, + "Filipino,Indonesian,Chinese,Spanish": 0.24, + "Filipino,Indonesian,Chinese,English": 0.25333333333333335, + "Filipino,Indonesian,Spanish,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,English": 0.2733333333333333, + "Indonesian,Chinese,Spanish,English": 0.32 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Chinese,English": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.24, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.24, + "Vietnamese,Malay,Chinese,Spanish,English": 0.22, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.16666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.17333333333333334, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.24, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.18, + "Malay,Filipino,Indonesian,Chinese,English": 0.18666666666666668, + "Malay,Filipino,Indonesian,Spanish,English": 0.18666666666666668, + "Malay,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Malay,Indonesian,Chinese,Spanish,English": 0.22666666666666666, + "Filipino,Indonesian,Chinese,Spanish,English": 0.2 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.14, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.14666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.19333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.14666666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.15333333333333332 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.12666666666666668 + } + }, + "AC3_2": 0.5368574805309113, + "AC3_3": 0.42742034067931217, + "AC3_4": 0.34910618547721656, + "AC3_5": 0.28914918198397044, + "AC3_6": 0.24208751605316584, + "AC3_7": 0.20587793424185374 + }, + "prompt_2": { + "overall_acc": 0.5076190476190476, + "language_acc": { + "Vietnamese": 0.4533333333333333, + "Malay": 0.4066666666666667, + "Filipino": 0.3933333333333333, + "Indonesian": 0.46, + "Chinese": 0.5933333333333334, + "Spanish": 0.5733333333333334, + "English": 0.6733333333333333 + }, + "consistency_score_2": 0.4723809523809524, + "consistency_score_3": 0.289904761904762, + "consistency_score_4": 0.19790476190476183, + "consistency_score_5": 0.14285714285714288, + "consistency_score_6": 0.10666666666666666, + "consistency_score_7": 0.08, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.41333333333333333, + "Vietnamese,Filipino": 0.36, + "Vietnamese,Indonesian": 0.4533333333333333, + "Vietnamese,Chinese": 0.43333333333333335, + "Vietnamese,Spanish": 0.5133333333333333, + "Vietnamese,English": 0.5333333333333333, + "Malay,Filipino": 0.36666666666666664, + "Malay,Indonesian": 0.5, + "Malay,Chinese": 0.41333333333333333, + "Malay,Spanish": 0.4666666666666667, + "Malay,English": 0.44, + "Filipino,Indonesian": 0.4266666666666667, + "Filipino,Chinese": 0.42, + "Filipino,Spanish": 0.4, + "Filipino,English": 0.41333333333333333, + "Indonesian,Chinese": 0.5266666666666666, + "Indonesian,Spanish": 0.5266666666666666, + "Indonesian,English": 0.5066666666666667, + "Chinese,Spanish": 0.5733333333333334, + "Chinese,English": 0.62, + "Spanish,English": 0.6133333333333333 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.16, + "Vietnamese,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,Malay,Chinese": 0.26, + "Vietnamese,Malay,Spanish": 0.31333333333333335, + "Vietnamese,Malay,English": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian": 0.2, + "Vietnamese,Filipino,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,English": 0.24666666666666667, + "Vietnamese,Indonesian,Chinese": 0.28, + "Vietnamese,Indonesian,Spanish": 0.32, + "Vietnamese,Indonesian,English": 0.34, + "Vietnamese,Chinese,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,English": 0.36666666666666664, + "Vietnamese,Spanish,English": 0.38, + "Malay,Filipino,Indonesian": 0.24666666666666667, + "Malay,Filipino,Chinese": 0.21333333333333335, + "Malay,Filipino,Spanish": 0.19333333333333333, + "Malay,Filipino,English": 0.18666666666666668, + "Malay,Indonesian,Chinese": 0.31333333333333335, + "Malay,Indonesian,Spanish": 0.31333333333333335, + "Malay,Indonesian,English": 0.3, + "Malay,Chinese,Spanish": 0.31333333333333335, + "Malay,Chinese,English": 0.2866666666666667, + "Malay,Spanish,English": 0.3466666666666667, + "Filipino,Indonesian,Chinese": 0.26, + "Filipino,Indonesian,Spanish": 0.24, + "Filipino,Indonesian,English": 0.22666666666666666, + "Filipino,Chinese,Spanish": 0.28, + "Filipino,Chinese,English": 0.3, + "Filipino,Spanish,English": 0.2866666666666667, + "Indonesian,Chinese,Spanish": 0.37333333333333335, + "Indonesian,Chinese,English": 0.38, + "Indonesian,Spanish,English": 0.38666666666666666, + "Chinese,Spanish,English": 0.44666666666666666 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.11333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.13333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.12666666666666668, + "Vietnamese,Malay,Filipino,English": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,English": 0.22666666666666666, + "Vietnamese,Malay,Spanish,English": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Filipino,Indonesian,English": 0.15333333333333332, + "Vietnamese,Filipino,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Filipino,Chinese,English": 0.20666666666666667, + "Vietnamese,Filipino,Spanish,English": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Spanish": 0.22, + "Vietnamese,Indonesian,Chinese,English": 0.24666666666666667, + "Vietnamese,Indonesian,Spanish,English": 0.26, + "Vietnamese,Chinese,Spanish,English": 0.29333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.16666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.14666666666666667, + "Malay,Filipino,Indonesian,English": 0.14, + "Malay,Filipino,Chinese,Spanish": 0.15333333333333332, + "Malay,Filipino,Chinese,English": 0.14, + "Malay,Filipino,Spanish,English": 0.15333333333333332, + "Malay,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Malay,Indonesian,Chinese,English": 0.22666666666666666, + "Malay,Indonesian,Spanish,English": 0.25333333333333335, + "Malay,Chinese,Spanish,English": 0.25333333333333335, + "Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Filipino,Indonesian,Chinese,English": 0.19333333333333333, + "Filipino,Indonesian,Spanish,English": 0.18, + "Filipino,Chinese,Spanish,English": 0.22666666666666666, + "Indonesian,Chinese,Spanish,English": 0.30666666666666664 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.10666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.1, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.1, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.11333333333333333, + "Vietnamese,Malay,Filipino,Chinese,English": 0.12, + "Vietnamese,Malay,Filipino,Spanish,English": 0.10666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.18, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.18, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.2, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.12666666666666668, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.14, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.12666666666666668, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.2, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.12666666666666668, + "Malay,Filipino,Indonesian,Chinese,English": 0.11333333333333333, + "Malay,Filipino,Indonesian,Spanish,English": 0.12, + "Malay,Filipino,Chinese,Spanish,English": 0.12, + "Malay,Indonesian,Chinese,Spanish,English": 0.2, + "Filipino,Indonesian,Chinese,Spanish,English": 0.15333333333333332 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.09333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.09333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.08666666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.1, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.16, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.11333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.1 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.08 + } + }, + "AC3_2": 0.4893664676255703, + "AC3_3": 0.36904523065646827, + "AC3_4": 0.28478196025995417, + "AC3_5": 0.22296590667979924, + "AC3_6": 0.17628940565605625, + "AC3_7": 0.1382171798792337 + }, + "prompt_3": { + "overall_acc": 0.5047619047619049, + "language_acc": { + "Vietnamese": 0.5, + "Malay": 0.44666666666666666, + "Filipino": 0.4266666666666667, + "Indonesian": 0.49333333333333335, + "Chinese": 0.6066666666666667, + "Spanish": 0.46, + "English": 0.6 + }, + "consistency_score_2": 0.4803174603174602, + "consistency_score_3": 0.30095238095238097, + "consistency_score_4": 0.21447619047619051, + "consistency_score_5": 0.16317460317460317, + "consistency_score_6": 0.1276190476190476, + "consistency_score_7": 0.1, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.44666666666666666, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Indonesian": 0.4866666666666667, + "Vietnamese,Chinese": 0.5133333333333333, + "Vietnamese,Spanish": 0.4866666666666667, + "Vietnamese,English": 0.5466666666666666, + "Malay,Filipino": 0.4, + "Malay,Indonesian": 0.5666666666666667, + "Malay,Chinese": 0.4866666666666667, + "Malay,Spanish": 0.42, + "Malay,English": 0.4533333333333333, + "Filipino,Indonesian": 0.36666666666666664, + "Filipino,Chinese": 0.42, + "Filipino,Spanish": 0.4266666666666667, + "Filipino,English": 0.44666666666666666, + "Indonesian,Chinese": 0.56, + "Indonesian,Spanish": 0.46, + "Indonesian,English": 0.49333333333333335, + "Chinese,Spanish": 0.52, + "Chinese,English": 0.6, + "Spanish,English": 0.56 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Spanish": 0.28, + "Vietnamese,Malay,English": 0.3, + "Vietnamese,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Filipino,Chinese": 0.26, + "Vietnamese,Filipino,Spanish": 0.25333333333333335, + "Vietnamese,Filipino,English": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Indonesian,English": 0.34, + "Vietnamese,Chinese,Spanish": 0.34, + "Vietnamese,Chinese,English": 0.37333333333333335, + "Vietnamese,Spanish,English": 0.35333333333333333, + "Malay,Filipino,Indonesian": 0.26, + "Malay,Filipino,Chinese": 0.26, + "Malay,Filipino,Spanish": 0.21333333333333335, + "Malay,Filipino,English": 0.25333333333333335, + "Malay,Indonesian,Chinese": 0.37333333333333335, + "Malay,Indonesian,Spanish": 0.30666666666666664, + "Malay,Indonesian,English": 0.32, + "Malay,Chinese,Spanish": 0.2866666666666667, + "Malay,Chinese,English": 0.31333333333333335, + "Malay,Spanish,English": 0.3, + "Filipino,Indonesian,Chinese": 0.2733333333333333, + "Filipino,Indonesian,Spanish": 0.24, + "Filipino,Indonesian,English": 0.24, + "Filipino,Chinese,Spanish": 0.2733333333333333, + "Filipino,Chinese,English": 0.3, + "Filipino,Spanish,English": 0.29333333333333333, + "Indonesian,Chinese,Spanish": 0.35333333333333333, + "Indonesian,Chinese,English": 0.37333333333333335, + "Indonesian,Spanish,English": 0.3466666666666667, + "Chinese,Spanish,English": 0.3933333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Chinese": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Filipino,English": 0.18, + "Vietnamese,Malay,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,English": 0.22666666666666666, + "Vietnamese,Malay,Spanish,English": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese": 0.2, + "Vietnamese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,English": 0.18, + "Vietnamese,Filipino,Chinese,Spanish": 0.19333333333333333, + "Vietnamese,Filipino,Chinese,English": 0.20666666666666667, + "Vietnamese,Filipino,Spanish,English": 0.20666666666666667, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,English": 0.2733333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.21333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.16666666666666666, + "Malay,Filipino,Indonesian,English": 0.18666666666666668, + "Malay,Filipino,Chinese,Spanish": 0.17333333333333334, + "Malay,Filipino,Chinese,English": 0.19333333333333333, + "Malay,Filipino,Spanish,English": 0.17333333333333334, + "Malay,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Malay,Indonesian,Chinese,English": 0.24666666666666667, + "Malay,Indonesian,Spanish,English": 0.23333333333333334, + "Malay,Chinese,Spanish,English": 0.22, + "Filipino,Indonesian,Chinese,Spanish": 0.20666666666666667, + "Filipino,Indonesian,Chinese,English": 0.20666666666666667, + "Filipino,Indonesian,Spanish,English": 0.18, + "Filipino,Chinese,Spanish,English": 0.21333333333333335, + "Indonesian,Chinese,Spanish,English": 0.2866666666666667 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.14, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.14, + "Vietnamese,Malay,Filipino,Chinese,English": 0.14, + "Vietnamese,Malay,Filipino,Spanish,English": 0.14, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.19333333333333333, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Spanish,English": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.16, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.14666666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.16, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.24, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.15333333333333332, + "Malay,Filipino,Indonesian,Chinese,English": 0.16, + "Malay,Filipino,Indonesian,Spanish,English": 0.13333333333333333, + "Malay,Filipino,Chinese,Spanish,English": 0.14, + "Malay,Indonesian,Chinese,Spanish,English": 0.19333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.16 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.12666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.12666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.11333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.11333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.16, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.13333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.12 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.1 + } + }, + "AC3_2": 0.49223639176747325, + "AC3_3": 0.377079815330887, + "AC3_4": 0.30103914442949264, + "AC3_5": 0.24662321198552842, + "AC3_6": 0.20372920249216703, + "AC3_7": 0.16692913383066527 + }, + "prompt_4": { + "overall_acc": 0.5342857142857144, + "language_acc": { + "Vietnamese": 0.5266666666666666, + "Malay": 0.3933333333333333, + "Filipino": 0.3933333333333333, + "Indonesian": 0.5333333333333333, + "Chinese": 0.62, + "Spanish": 0.58, + "English": 0.6933333333333334 + }, + "consistency_score_2": 0.508888888888889, + "consistency_score_3": 0.33409523809523817, + "consistency_score_4": 0.24342857142857147, + "consistency_score_5": 0.1873015873015873, + "consistency_score_6": 0.14857142857142858, + "consistency_score_7": 0.12, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.4666666666666667, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Indonesian": 0.54, + "Vietnamese,Chinese": 0.49333333333333335, + "Vietnamese,Spanish": 0.56, + "Vietnamese,English": 0.5733333333333334, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.5133333333333333, + "Malay,Chinese": 0.4266666666666667, + "Malay,Spanish": 0.4266666666666667, + "Malay,English": 0.44666666666666666, + "Filipino,Indonesian": 0.47333333333333333, + "Filipino,Chinese": 0.44, + "Filipino,Spanish": 0.4666666666666667, + "Filipino,English": 0.4533333333333333, + "Indonesian,Chinese": 0.5666666666666667, + "Indonesian,Spanish": 0.52, + "Indonesian,English": 0.54, + "Chinese,Spanish": 0.56, + "Chinese,English": 0.6733333333333333, + "Spanish,English": 0.66 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Indonesian": 0.35333333333333333, + "Vietnamese,Malay,Chinese": 0.29333333333333333, + "Vietnamese,Malay,Spanish": 0.32666666666666666, + "Vietnamese,Malay,English": 0.3333333333333333, + "Vietnamese,Filipino,Indonesian": 0.3, + "Vietnamese,Filipino,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Spanish": 0.32, + "Vietnamese,Filipino,English": 0.3, + "Vietnamese,Indonesian,Chinese": 0.35333333333333333, + "Vietnamese,Indonesian,Spanish": 0.37333333333333335, + "Vietnamese,Indonesian,English": 0.38666666666666666, + "Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese,English": 0.4066666666666667, + "Vietnamese,Spanish,English": 0.44, + "Malay,Filipino,Indonesian": 0.3, + "Malay,Filipino,Chinese": 0.24, + "Malay,Filipino,Spanish": 0.2866666666666667, + "Malay,Filipino,English": 0.25333333333333335, + "Malay,Indonesian,Chinese": 0.32666666666666666, + "Malay,Indonesian,Spanish": 0.3, + "Malay,Indonesian,English": 0.32666666666666666, + "Malay,Chinese,Spanish": 0.28, + "Malay,Chinese,English": 0.32666666666666666, + "Malay,Spanish,English": 0.3333333333333333, + "Filipino,Indonesian,Chinese": 0.31333333333333335, + "Filipino,Indonesian,Spanish": 0.30666666666666664, + "Filipino,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish": 0.3333333333333333, + "Filipino,Chinese,English": 0.32666666666666666, + "Filipino,Spanish,English": 0.36, + "Indonesian,Chinese,Spanish": 0.38666666666666666, + "Indonesian,Chinese,English": 0.42, + "Indonesian,Spanish,English": 0.41333333333333333, + "Chinese,Spanish,English": 0.48 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,Malay,Filipino,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Spanish": 0.22, + "Vietnamese,Malay,Filipino,English": 0.2, + "Vietnamese,Malay,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Chinese,English": 0.25333333333333335, + "Vietnamese,Malay,Spanish,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.24, + "Vietnamese,Filipino,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Filipino,Chinese,English": 0.21333333333333335, + "Vietnamese,Filipino,Spanish,English": 0.26, + "Vietnamese,Indonesian,Chinese,Spanish": 0.28, + "Vietnamese,Indonesian,Chinese,English": 0.3, + "Vietnamese,Indonesian,Spanish,English": 0.31333333333333335, + "Vietnamese,Chinese,Spanish,English": 0.32666666666666666, + "Malay,Filipino,Indonesian,Chinese": 0.20666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Malay,Filipino,Indonesian,English": 0.2, + "Malay,Filipino,Chinese,Spanish": 0.18666666666666668, + "Malay,Filipino,Chinese,English": 0.17333333333333334, + "Malay,Filipino,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish": 0.23333333333333334, + "Malay,Indonesian,Chinese,English": 0.25333333333333335, + "Malay,Indonesian,Spanish,English": 0.26, + "Malay,Chinese,Spanish,English": 0.24666666666666667, + "Filipino,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Filipino,Indonesian,Chinese,English": 0.24666666666666667, + "Filipino,Indonesian,Spanish,English": 0.25333333333333335, + "Filipino,Chinese,Spanish,English": 0.2733333333333333, + "Indonesian,Chinese,Spanish,English": 0.34 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Chinese,English": 0.14, + "Vietnamese,Malay,Filipino,Spanish,English": 0.18, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.18, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.20666666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.2, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.25333333333333335, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Malay,Filipino,Indonesian,Chinese,English": 0.15333333333333332, + "Malay,Filipino,Indonesian,Spanish,English": 0.17333333333333334, + "Malay,Filipino,Chinese,Spanish,English": 0.16, + "Malay,Indonesian,Chinese,Spanish,English": 0.20666666666666667, + "Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.14, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.12666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.15333333333333332, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.13333333333333333, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.18, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.16666666666666666, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.14 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.12 + } + }, + "AC3_2": 0.5212781496761405, + "AC3_3": 0.4111152194556812, + "AC3_4": 0.33446835305870437, + "AC3_5": 0.2773678586756586, + "AC3_6": 0.23249252835806297, + "AC3_7": 0.19598253272113803 + }, + "prompt_5": { + "overall_acc": 0.5438095238095239, + "language_acc": { + "Vietnamese": 0.5266666666666666, + "Malay": 0.41333333333333333, + "Filipino": 0.4, + "Indonesian": 0.5333333333333333, + "Chinese": 0.66, + "Spanish": 0.5866666666666667, + "English": 0.6866666666666666 + }, + "consistency_score_2": 0.5053968253968255, + "consistency_score_3": 0.3293333333333333, + "consistency_score_4": 0.23999999999999996, + "consistency_score_5": 0.18634920634920635, + "consistency_score_6": 0.1514285714285714, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.47333333333333333, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Indonesian": 0.44666666666666666, + "Vietnamese,Chinese": 0.52, + "Vietnamese,Spanish": 0.54, + "Vietnamese,English": 0.58, + "Malay,Filipino": 0.4533333333333333, + "Malay,Indonesian": 0.5066666666666667, + "Malay,Chinese": 0.4266666666666667, + "Malay,Spanish": 0.5, + "Malay,English": 0.4866666666666667, + "Filipino,Indonesian": 0.47333333333333333, + "Filipino,Chinese": 0.4266666666666667, + "Filipino,Spanish": 0.47333333333333333, + "Filipino,English": 0.44666666666666666, + "Indonesian,Chinese": 0.5133333333333333, + "Indonesian,Spanish": 0.52, + "Indonesian,English": 0.5333333333333333, + "Chinese,Spanish": 0.5733333333333334, + "Chinese,English": 0.6666666666666666, + "Spanish,English": 0.6066666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.26, + "Vietnamese,Malay,Indonesian": 0.32, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Malay,English": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,Filipino,Chinese": 0.26666666666666666, + "Vietnamese,Filipino,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,English": 0.30666666666666664, + "Vietnamese,Indonesian,Chinese": 0.3, + "Vietnamese,Indonesian,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Chinese,English": 0.43333333333333335, + "Vietnamese,Spanish,English": 0.42, + "Malay,Filipino,Indonesian": 0.30666666666666664, + "Malay,Filipino,Chinese": 0.22666666666666666, + "Malay,Filipino,Spanish": 0.3, + "Malay,Filipino,English": 0.28, + "Malay,Indonesian,Chinese": 0.30666666666666664, + "Malay,Indonesian,Spanish": 0.3333333333333333, + "Malay,Indonesian,English": 0.3333333333333333, + "Malay,Chinese,Spanish": 0.32666666666666666, + "Malay,Chinese,English": 0.36, + "Malay,Spanish,English": 0.36, + "Filipino,Indonesian,Chinese": 0.28, + "Filipino,Indonesian,Spanish": 0.29333333333333333, + "Filipino,Indonesian,English": 0.30666666666666664, + "Filipino,Chinese,Spanish": 0.3, + "Filipino,Chinese,English": 0.3466666666666667, + "Filipino,Spanish,English": 0.32, + "Indonesian,Chinese,Spanish": 0.36, + "Indonesian,Chinese,English": 0.4066666666666667, + "Indonesian,Spanish,English": 0.38666666666666666, + "Chinese,Spanish,English": 0.4666666666666667 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Chinese": 0.18, + "Vietnamese,Malay,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,English": 0.2, + "Vietnamese,Malay,Indonesian,Chinese": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.26, + "Vietnamese,Malay,Indonesian,English": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.26, + "Vietnamese,Malay,Chinese,English": 0.2733333333333333, + "Vietnamese,Malay,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,English": 0.2, + "Vietnamese,Filipino,Chinese,Spanish": 0.22, + "Vietnamese,Filipino,Chinese,English": 0.26, + "Vietnamese,Filipino,Spanish,English": 0.24666666666666667, + "Vietnamese,Indonesian,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Indonesian,Chinese,English": 0.26, + "Vietnamese,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,English": 0.3333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.18666666666666668, + "Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Malay,Filipino,Indonesian,English": 0.21333333333333335, + "Malay,Filipino,Chinese,Spanish": 0.2, + "Malay,Filipino,Chinese,English": 0.22, + "Malay,Filipino,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish": 0.24666666666666667, + "Malay,Indonesian,Chinese,English": 0.26666666666666666, + "Malay,Indonesian,Spanish,English": 0.26, + "Malay,Chinese,Spanish,English": 0.3, + "Filipino,Indonesian,Chinese,Spanish": 0.20666666666666667, + "Filipino,Indonesian,Chinese,English": 0.26, + "Filipino,Indonesian,Spanish,English": 0.22, + "Filipino,Chinese,Spanish,English": 0.26666666666666666, + "Indonesian,Chinese,Spanish,English": 0.31333333333333335 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.16, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Chinese,English": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.17333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.2, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Spanish,English": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.15333333333333332, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.18, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.16666666666666666, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.21333333333333335, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.21333333333333335, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.16666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.18, + "Malay,Filipino,Indonesian,Spanish,English": 0.18, + "Malay,Filipino,Chinese,Spanish,English": 0.19333333333333333, + "Malay,Indonesian,Chinese,Spanish,English": 0.22666666666666666, + "Filipino,Indonesian,Chinese,Spanish,English": 0.19333333333333333 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.13333333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.14, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.15333333333333332, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.18, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.14666666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.16 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.12666666666666668 + } + }, + "AC3_2": 0.523900007154159, + "AC3_3": 0.41022978471159743, + "AC3_4": 0.33302551636091404, + "AC3_5": 0.2775792960282363, + "AC3_6": 0.23689236787199297, + "AC3_7": 0.20547348481783898 + } }, "cross_logiqa": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.46022727272727265, + "language_acc": { + "English": 0.5738636363636364, + "Filipino": 0.32954545454545453, + "Vietnamese": 0.4318181818181818, + "Chinese": 0.5852272727272727, + "Indonesian": 0.4659090909090909, + "Malay": 0.38636363636363635, + "Spanish": 0.44886363636363635 + }, + "consistency_score_2": 0.5373376623376623, + "consistency_score_3": 0.36655844155844164, + "consistency_score_4": 0.2741883116883116, + "consistency_score_5": 0.2156385281385281, + "consistency_score_6": 0.17451298701298704, + "consistency_score_7": 0.14204545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.42613636363636365, + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.6363636363636364, + "English,Indonesian": 0.5568181818181818, + "English,Malay": 0.48295454545454547, + "English,Spanish": 0.6136363636363636, + "Filipino,Vietnamese": 0.5227272727272727, + "Filipino,Chinese": 0.4772727272727273, + "Filipino,Indonesian": 0.45454545454545453, + "Filipino,Malay": 0.5170454545454546, + "Filipino,Spanish": 0.5170454545454546, + "Vietnamese,Chinese": 0.5284090909090909, + "Vietnamese,Indonesian": 0.5795454545454546, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,Spanish": 0.5738636363636364, + "Chinese,Indonesian": 0.5511363636363636, + "Chinese,Malay": 0.5170454545454546, + "Chinese,Spanish": 0.5681818181818182, + "Indonesian,Malay": 0.5795454545454546, + "Indonesian,Spanish": 0.5681818181818182, + "Malay,Spanish": 0.5 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3409090909090909, + "English,Filipino,Chinese": 0.3352272727272727, + "English,Filipino,Indonesian": 0.30113636363636365, + "English,Filipino,Malay": 0.2840909090909091, + "English,Filipino,Spanish": 0.32954545454545453, + "English,Vietnamese,Chinese": 0.4375, + "English,Vietnamese,Indonesian": 0.4147727272727273, + "English,Vietnamese,Malay": 0.3465909090909091, + "English,Vietnamese,Spanish": 0.42613636363636365, + "English,Chinese,Indonesian": 0.4147727272727273, + "English,Chinese,Malay": 0.375, + "English,Chinese,Spanish": 0.45454545454545453, + "English,Indonesian,Malay": 0.3693181818181818, + "English,Indonesian,Spanish": 0.4034090909090909, + "English,Malay,Spanish": 0.3522727272727273, + "Filipino,Vietnamese,Chinese": 0.3409090909090909, + "Filipino,Vietnamese,Indonesian": 0.35795454545454547, + "Filipino,Vietnamese,Malay": 0.3522727272727273, + "Filipino,Vietnamese,Spanish": 0.36363636363636365, + "Filipino,Chinese,Indonesian": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.32954545454545453, + "Filipino,Chinese,Spanish": 0.3352272727272727, + "Filipino,Indonesian,Malay": 0.3409090909090909, + "Filipino,Indonesian,Spanish": 0.32954545454545453, + "Filipino,Malay,Spanish": 0.3409090909090909, + "Vietnamese,Chinese,Indonesian": 0.3977272727272727, + "Vietnamese,Chinese,Malay": 0.3465909090909091, + "Vietnamese,Chinese,Spanish": 0.4034090909090909, + "Vietnamese,Indonesian,Malay": 0.3977272727272727, + "Vietnamese,Indonesian,Spanish": 0.4147727272727273, + "Vietnamese,Malay,Spanish": 0.36363636363636365, + "Chinese,Indonesian,Malay": 0.3806818181818182, + "Chinese,Indonesian,Spanish": 0.4034090909090909, + "Chinese,Malay,Spanish": 0.35795454545454547, + "Indonesian,Malay,Spanish": 0.3693181818181818 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2784090909090909, + "English,Filipino,Vietnamese,Indonesian": 0.26704545454545453, + "English,Filipino,Vietnamese,Malay": 0.23295454545454544, + "English,Filipino,Vietnamese,Spanish": 0.26704545454545453, + "English,Filipino,Chinese,Indonesian": 0.25, + "English,Filipino,Chinese,Malay": 0.23863636363636365, + "English,Filipino,Chinese,Spanish": 0.26704545454545453, + "English,Filipino,Indonesian,Malay": 0.22727272727272727, + "English,Filipino,Indonesian,Spanish": 0.23863636363636365, + "English,Filipino,Malay,Spanish": 0.23863636363636365, + "English,Vietnamese,Chinese,Indonesian": 0.32386363636363635, + "English,Vietnamese,Chinese,Malay": 0.2840909090909091, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "English,Vietnamese,Malay,Spanish": 0.2784090909090909, + "English,Chinese,Indonesian,Malay": 0.30113636363636365, + "English,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Chinese,Malay,Spanish": 0.29545454545454547, + "English,Indonesian,Malay,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Filipino,Vietnamese,Indonesian,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Malay,Spanish": 0.26136363636363635, + "Filipino,Chinese,Indonesian,Malay": 0.25, + "Filipino,Chinese,Indonesian,Spanish": 0.25, + "Filipino,Chinese,Malay,Spanish": 0.23863636363636365, + "Filipino,Indonesian,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Malay,Spanish": 0.2840909090909091, + "Vietnamese,Indonesian,Malay,Spanish": 0.2897727272727273, + "Chinese,Indonesian,Malay,Spanish": 0.29545454545454547 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "English,Filipino,Vietnamese,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.2215909090909091, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "English,Filipino,Vietnamese,Malay,Spanish": 0.19318181818181818, + "English,Filipino,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian,Spanish": 0.19886363636363635, + "English,Filipino,Chinese,Malay,Spanish": 0.19886363636363635, + "English,Filipino,Indonesian,Malay,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay,Spanish": 0.23863636363636365, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.22727272727272727, + "English,Chinese,Indonesian,Malay,Spanish": 0.23863636363636365, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.19886363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.20454545454545456, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.25 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.17613636363636365, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17613636363636365, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.20454545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456 + } + }, + "AC3_2": 0.4958022042553509, + "AC3_3": 0.40808685713429854, + "AC3_4": 0.3436445020028828, + "AC3_5": 0.29367584972727384, + "AC3_6": 0.2530661473677104, + "AC3_7": 0.2170883361560649 + }, + "prompt_2": { + "overall_acc": 0.46022727272727276, + "language_acc": { + "English": 0.5454545454545454, + "Filipino": 0.36363636363636365, + "Vietnamese": 0.44886363636363635, + "Chinese": 0.5795454545454546, + "Indonesian": 0.4375, + "Malay": 0.3693181818181818, + "Spanish": 0.4772727272727273 + }, + "consistency_score_2": 0.5078463203463203, + "consistency_score_3": 0.33116883116883117, + "consistency_score_4": 0.24155844155844158, + "consistency_score_5": 0.1872294372294372, + "consistency_score_6": 0.15097402597402598, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.4318181818181818, + "English,Vietnamese": 0.5852272727272727, + "English,Chinese": 0.6022727272727273, + "English,Indonesian": 0.5170454545454546, + "English,Malay": 0.44886363636363635, + "English,Spanish": 0.6136363636363636, + "Filipino,Vietnamese": 0.4772727272727273, + "Filipino,Chinese": 0.4772727272727273, + "Filipino,Indonesian": 0.44886363636363635, + "Filipino,Malay": 0.4318181818181818, + "Filipino,Spanish": 0.48295454545454547, + "Vietnamese,Chinese": 0.4659090909090909, + "Vietnamese,Indonesian": 0.5909090909090909, + "Vietnamese,Malay": 0.4715909090909091, + "Vietnamese,Spanish": 0.5568181818181818, + "Chinese,Indonesian": 0.5397727272727273, + "Chinese,Malay": 0.4375, + "Chinese,Spanish": 0.5340909090909091, + "Indonesian,Malay": 0.4772727272727273, + "Indonesian,Spanish": 0.5568181818181818, + "Malay,Spanish": 0.5170454545454546 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3181818181818182, + "English,Filipino,Chinese": 0.3068181818181818, + "English,Filipino,Indonesian": 0.2784090909090909, + "English,Filipino,Malay": 0.2215909090909091, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Vietnamese,Chinese": 0.38636363636363635, + "English,Vietnamese,Indonesian": 0.3977272727272727, + "English,Vietnamese,Malay": 0.3125, + "English,Vietnamese,Spanish": 0.42045454545454547, + "English,Chinese,Indonesian": 0.39204545454545453, + "English,Chinese,Malay": 0.3125, + "English,Chinese,Spanish": 0.4147727272727273, + "English,Indonesian,Malay": 0.30113636363636365, + "English,Indonesian,Spanish": 0.3977272727272727, + "English,Malay,Spanish": 0.3409090909090909, + "Filipino,Vietnamese,Chinese": 0.2897727272727273, + "Filipino,Vietnamese,Indonesian": 0.32386363636363635, + "Filipino,Vietnamese,Malay": 0.2727272727272727, + "Filipino,Vietnamese,Spanish": 0.32386363636363635, + "Filipino,Chinese,Indonesian": 0.3125, + "Filipino,Chinese,Malay": 0.26136363636363635, + "Filipino,Chinese,Spanish": 0.32954545454545453, + "Filipino,Indonesian,Malay": 0.2840909090909091, + "Filipino,Indonesian,Spanish": 0.32386363636363635, + "Filipino,Malay,Spanish": 0.2897727272727273, + "Vietnamese,Chinese,Indonesian": 0.3693181818181818, + "Vietnamese,Chinese,Malay": 0.2727272727272727, + "Vietnamese,Chinese,Spanish": 0.3465909090909091, + "Vietnamese,Indonesian,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish": 0.3977272727272727, + "Vietnamese,Malay,Spanish": 0.3465909090909091, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.38636363636363635, + "Chinese,Malay,Spanish": 0.32386363636363635, + "Indonesian,Malay,Spanish": 0.36363636363636365 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.23863636363636365, + "English,Filipino,Vietnamese,Indonesian": 0.24431818181818182, + "English,Filipino,Vietnamese,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Spanish": 0.26704545454545453, + "English,Filipino,Chinese,Indonesian": 0.2159090909090909, + "English,Filipino,Chinese,Malay": 0.17613636363636365, + "English,Filipino,Chinese,Spanish": 0.23863636363636365, + "English,Filipino,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Indonesian,Spanish": 0.23863636363636365, + "English,Filipino,Malay,Spanish": 0.19318181818181818, + "English,Vietnamese,Chinese,Indonesian": 0.3125, + "English,Vietnamese,Chinese,Malay": 0.2215909090909091, + "English,Vietnamese,Chinese,Spanish": 0.29545454545454547, + "English,Vietnamese,Indonesian,Malay": 0.24431818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.32386363636363635, + "English,Vietnamese,Malay,Spanish": 0.26704545454545453, + "English,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Chinese,Malay,Spanish": 0.25, + "English,Indonesian,Malay,Spanish": 0.26136363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.22727272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.1875, + "Filipino,Vietnamese,Chinese,Spanish": 0.23863636363636365, + "Filipino,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Filipino,Vietnamese,Indonesian,Spanish": 0.25, + "Filipino,Vietnamese,Malay,Spanish": 0.20454545454545456, + "Filipino,Chinese,Indonesian,Malay": 0.20454545454545456, + "Filipino,Chinese,Indonesian,Spanish": 0.25, + "Filipino,Chinese,Malay,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,Spanish": 0.2784090909090909, + "Chinese,Indonesian,Malay,Spanish": 0.26704545454545453 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.1875, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.20454545454545456, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Vietnamese,Malay,Spanish": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Filipino,Chinese,Indonesian,Spanish": 0.1875, + "English,Filipino,Chinese,Malay,Spanish": 0.1534090909090909, + "English,Filipino,Indonesian,Malay,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.26136363636363635, + "English,Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.22727272727272727, + "English,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.18181818181818182, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.21022727272727273 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13068181818181818, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1875, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + } + }, + "AC3_2": 0.48286561812177603, + "AC3_3": 0.38517482512615486, + "AC3_4": 0.3168254368358083, + "AC3_5": 0.2661740682641736, + "AC3_6": 0.2273632741388238, + "AC3_7": 0.1966019417139787 + }, + "prompt_3": { + "overall_acc": 0.4107142857142857, + "language_acc": { + "English": 0.5, + "Filipino": 0.2897727272727273, + "Vietnamese": 0.39204545454545453, + "Chinese": 0.5, + "Indonesian": 0.42613636363636365, + "Malay": 0.36363636363636365, + "Spanish": 0.4034090909090909 + }, + "consistency_score_2": 0.4266774891774891, + "consistency_score_3": 0.23733766233766232, + "consistency_score_4": 0.15487012987012988, + "consistency_score_5": 0.11093073593073594, + "consistency_score_6": 0.0844155844155844, + "consistency_score_7": 0.06818181818181818, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.3693181818181818, + "English,Vietnamese": 0.4431818181818182, + "English,Chinese": 0.5056818181818182, + "English,Indonesian": 0.4431818181818182, + "English,Malay": 0.4034090909090909, + "English,Spanish": 0.4318181818181818, + "Filipino,Vietnamese": 0.38636363636363635, + "Filipino,Chinese": 0.4147727272727273, + "Filipino,Indonesian": 0.3977272727272727, + "Filipino,Malay": 0.3806818181818182, + "Filipino,Spanish": 0.3806818181818182, + "Vietnamese,Chinese": 0.44886363636363635, + "Vietnamese,Indonesian": 0.42613636363636365, + "Vietnamese,Malay": 0.3977272727272727, + "Vietnamese,Spanish": 0.4318181818181818, + "Chinese,Indonesian": 0.4772727272727273, + "Chinese,Malay": 0.4375, + "Chinese,Spanish": 0.4431818181818182, + "Indonesian,Malay": 0.4431818181818182, + "Indonesian,Spanish": 0.4772727272727273, + "Malay,Spanish": 0.42045454545454547 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.19886363636363635, + "English,Filipino,Chinese": 0.2215909090909091, + "English,Filipino,Indonesian": 0.21022727272727273, + "English,Filipino,Malay": 0.18181818181818182, + "English,Filipino,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese": 0.26704545454545453, + "English,Vietnamese,Indonesian": 0.26704545454545453, + "English,Vietnamese,Malay": 0.22727272727272727, + "English,Vietnamese,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian": 0.3068181818181818, + "English,Chinese,Malay": 0.26704545454545453, + "English,Chinese,Spanish": 0.26704545454545453, + "English,Indonesian,Malay": 0.26136363636363635, + "English,Indonesian,Spanish": 0.2727272727272727, + "English,Malay,Spanish": 0.22727272727272727, + "Filipino,Vietnamese,Chinese": 0.2159090909090909, + "Filipino,Vietnamese,Indonesian": 0.21022727272727273, + "Filipino,Vietnamese,Malay": 0.18181818181818182, + "Filipino,Vietnamese,Spanish": 0.1875, + "Filipino,Chinese,Indonesian": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.22727272727272727, + "Filipino,Chinese,Spanish": 0.22727272727272727, + "Filipino,Indonesian,Malay": 0.20454545454545456, + "Filipino,Indonesian,Spanish": 0.22727272727272727, + "Filipino,Malay,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian": 0.2897727272727273, + "Vietnamese,Chinese,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish": 0.25, + "Vietnamese,Indonesian,Malay": 0.23863636363636365, + "Vietnamese,Indonesian,Spanish": 0.2556818181818182, + "Vietnamese,Malay,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay": 0.26704545454545453, + "Chinese,Indonesian,Spanish": 0.2784090909090909, + "Chinese,Malay,Spanish": 0.24431818181818182, + "Indonesian,Malay,Spanish": 0.2784090909090909 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.13068181818181818, + "English,Filipino,Vietnamese,Indonesian": 0.14204545454545456, + "English,Filipino,Vietnamese,Malay": 0.11363636363636363, + "English,Filipino,Vietnamese,Spanish": 0.11931818181818182, + "English,Filipino,Chinese,Indonesian": 0.1590909090909091, + "English,Filipino,Chinese,Malay": 0.13636363636363635, + "English,Filipino,Chinese,Spanish": 0.1534090909090909, + "English,Filipino,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Malay,Spanish": 0.13068181818181818, + "English,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "English,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Vietnamese,Chinese,Spanish": 0.17045454545454544, + "English,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Malay,Spanish": 0.16477272727272727, + "English,Chinese,Indonesian,Malay": 0.19318181818181818, + "English,Chinese,Indonesian,Spanish": 0.19318181818181818, + "English,Chinese,Malay,Spanish": 0.16477272727272727, + "English,Indonesian,Malay,Spanish": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Indonesian": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Malay": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Spanish": 0.13068181818181818, + "Filipino,Vietnamese,Malay,Spanish": 0.10795454545454546, + "Filipino,Chinese,Indonesian,Malay": 0.14772727272727273, + "Filipino,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Filipino,Chinese,Malay,Spanish": 0.14204545454545456, + "Filipino,Indonesian,Malay,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "Vietnamese,Chinese,Malay,Spanish": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Chinese,Indonesian,Malay,Spanish": 0.1875 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.11363636363636363, + "English,Filipino,Vietnamese,Chinese,Malay": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.10227272727272728, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.10227272727272728, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.09659090909090909, + "English,Filipino,Vietnamese,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Chinese,Indonesian,Malay": 0.10227272727272728, + "English,Filipino,Chinese,Indonesian,Spanish": 0.11363636363636363, + "English,Filipino,Chinese,Malay,Spanish": 0.10227272727272728, + "English,Filipino,Indonesian,Malay,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Malay,Spanish": 0.11931818181818182, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.13636363636363635, + "English,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.10795454545454546, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.11363636363636363, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.09090909090909091, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.10227272727272728, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.08522727272727272, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.07386363636363637, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.07954545454545454, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.08522727272727272 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.06818181818181818 + } + }, + "AC3_2": 0.41854373408341305, + "AC3_3": 0.3008338104316778, + "AC3_4": 0.22492619317000648, + "AC3_5": 0.17468138704416597, + "AC3_6": 0.14004683837920925, + "AC3_7": 0.11694915251795229 + }, + "prompt_4": { + "overall_acc": 0.46915584415584416, + "language_acc": { + "English": 0.5795454545454546, + "Filipino": 0.3693181818181818, + "Vietnamese": 0.42045454545454547, + "Chinese": 0.5511363636363636, + "Indonesian": 0.4602272727272727, + "Malay": 0.42045454545454547, + "Spanish": 0.48295454545454547 + }, + "consistency_score_2": 0.5148809523809523, + "consistency_score_3": 0.3433441558441558, + "consistency_score_4": 0.2579545454545455, + "consistency_score_5": 0.20562770562770563, + "consistency_score_6": 0.16964285714285715, + "consistency_score_7": 0.14204545454545456, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.42045454545454547, + "English,Vietnamese": 0.5056818181818182, + "English,Chinese": 0.6022727272727273, + "English,Indonesian": 0.5340909090909091, + "English,Malay": 0.4715909090909091, + "English,Spanish": 0.6193181818181818, + "Filipino,Vietnamese": 0.4715909090909091, + "Filipino,Chinese": 0.4943181818181818, + "Filipino,Indonesian": 0.44886363636363635, + "Filipino,Malay": 0.5056818181818182, + "Filipino,Spanish": 0.4602272727272727, + "Vietnamese,Chinese": 0.5056818181818182, + "Vietnamese,Indonesian": 0.5113636363636364, + "Vietnamese,Malay": 0.5170454545454546, + "Vietnamese,Spanish": 0.4943181818181818, + "Chinese,Indonesian": 0.5284090909090909, + "Chinese,Malay": 0.5, + "Chinese,Spanish": 0.5511363636363636, + "Indonesian,Malay": 0.5340909090909091, + "Indonesian,Spanish": 0.5909090909090909, + "Malay,Spanish": 0.5454545454545454 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.2897727272727273, + "English,Filipino,Chinese": 0.3352272727272727, + "English,Filipino,Indonesian": 0.29545454545454547, + "English,Filipino,Malay": 0.2897727272727273, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Vietnamese,Chinese": 0.375, + "English,Vietnamese,Indonesian": 0.3352272727272727, + "English,Vietnamese,Malay": 0.32386363636363635, + "English,Vietnamese,Spanish": 0.3693181818181818, + "English,Chinese,Indonesian": 0.3806818181818182, + "English,Chinese,Malay": 0.375, + "English,Chinese,Spanish": 0.42613636363636365, + "English,Indonesian,Malay": 0.3522727272727273, + "English,Indonesian,Spanish": 0.4090909090909091, + "English,Malay,Spanish": 0.3806818181818182, + "Filipino,Vietnamese,Chinese": 0.3125, + "Filipino,Vietnamese,Indonesian": 0.29545454545454547, + "Filipino,Vietnamese,Malay": 0.3181818181818182, + "Filipino,Vietnamese,Spanish": 0.30113636363636365, + "Filipino,Chinese,Indonesian": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.32386363636363635, + "Filipino,Chinese,Spanish": 0.3465909090909091, + "Filipino,Indonesian,Malay": 0.32386363636363635, + "Filipino,Indonesian,Spanish": 0.32954545454545453, + "Filipino,Malay,Spanish": 0.3181818181818182, + "Vietnamese,Chinese,Indonesian": 0.3465909090909091, + "Vietnamese,Chinese,Malay": 0.32954545454545453, + "Vietnamese,Chinese,Spanish": 0.3465909090909091, + "Vietnamese,Indonesian,Malay": 0.3352272727272727, + "Vietnamese,Indonesian,Spanish": 0.36363636363636365, + "Vietnamese,Malay,Spanish": 0.3465909090909091, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Indonesian,Spanish": 0.4090909090909091, + "Chinese,Malay,Spanish": 0.36363636363636365, + "Indonesian,Malay,Spanish": 0.3806818181818182 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.24431818181818182, + "English,Filipino,Vietnamese,Indonesian": 0.22727272727272727, + "English,Filipino,Vietnamese,Malay": 0.21022727272727273, + "English,Filipino,Vietnamese,Spanish": 0.23863636363636365, + "English,Filipino,Chinese,Indonesian": 0.25, + "English,Filipino,Chinese,Malay": 0.25, + "English,Filipino,Chinese,Spanish": 0.2784090909090909, + "English,Filipino,Indonesian,Malay": 0.23295454545454544, + "English,Filipino,Indonesian,Spanish": 0.2556818181818182, + "English,Filipino,Malay,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "English,Vietnamese,Chinese,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Spanish": 0.2840909090909091, + "English,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.2897727272727273, + "English,Vietnamese,Malay,Spanish": 0.2784090909090909, + "English,Chinese,Indonesian,Malay": 0.2897727272727273, + "English,Chinese,Indonesian,Spanish": 0.3181818181818182, + "English,Chinese,Malay,Spanish": 0.29545454545454547, + "English,Indonesian,Malay,Spanish": 0.30113636363636365, + "Filipino,Vietnamese,Chinese,Indonesian": 0.23295454545454544, + "Filipino,Vietnamese,Chinese,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Chinese,Spanish": 0.25, + "Filipino,Vietnamese,Indonesian,Malay": 0.22727272727272727, + "Filipino,Vietnamese,Indonesian,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Malay,Spanish": 0.22727272727272727, + "Filipino,Chinese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Chinese,Indonesian,Spanish": 0.26704545454545453, + "Filipino,Chinese,Malay,Spanish": 0.24431818181818182, + "Filipino,Indonesian,Malay,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.25, + "Vietnamese,Chinese,Indonesian,Spanish": 0.29545454545454547, + "Vietnamese,Chinese,Malay,Spanish": 0.25, + "Vietnamese,Indonesian,Malay,Spanish": 0.2727272727272727, + "Chinese,Indonesian,Malay,Spanish": 0.2840909090909091 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "English,Filipino,Vietnamese,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.21022727272727273, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.21022727272727273, + "English,Filipino,Vietnamese,Malay,Spanish": 0.18181818181818182, + "English,Filipino,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Malay,Spanish": 0.21022727272727273, + "English,Filipino,Indonesian,Malay,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.24431818181818182, + "English,Vietnamese,Chinese,Malay,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.23295454545454544, + "English,Chinese,Indonesian,Malay,Spanish": 0.23863636363636365, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.17613636363636365, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.2159090909090909, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1875, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1875, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456 + } + }, + "AC3_2": 0.4909560469276012, + "AC3_3": 0.39650933477222194, + "AC3_4": 0.332881730860062, + "AC3_5": 0.28593299238620784, + "AC3_6": 0.2491831547985305, + "AC3_7": 0.21806712540196493 + }, + "prompt_5": { + "overall_acc": 0.4545454545454546, + "language_acc": { + "English": 0.5454545454545454, + "Filipino": 0.3409090909090909, + "Vietnamese": 0.45454545454545453, + "Chinese": 0.5340909090909091, + "Indonesian": 0.42613636363636365, + "Malay": 0.4090909090909091, + "Spanish": 0.4715909090909091 + }, + "consistency_score_2": 0.4848484848484847, + "consistency_score_3": 0.3043831168831169, + "consistency_score_4": 0.21509740259740262, + "consistency_score_5": 0.16125541125541126, + "consistency_score_6": 0.12581168831168832, + "consistency_score_7": 0.10227272727272728, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.42613636363636365, + "English,Vietnamese": 0.4772727272727273, + "English,Chinese": 0.5511363636363636, + "English,Indonesian": 0.4943181818181818, + "English,Malay": 0.4659090909090909, + "English,Spanish": 0.5852272727272727, + "Filipino,Vietnamese": 0.38636363636363635, + "Filipino,Chinese": 0.39204545454545453, + "Filipino,Indonesian": 0.3977272727272727, + "Filipino,Malay": 0.42613636363636365, + "Filipino,Spanish": 0.4147727272727273, + "Vietnamese,Chinese": 0.5227272727272727, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Malay": 0.5227272727272727, + "Vietnamese,Spanish": 0.5227272727272727, + "Chinese,Indonesian": 0.5227272727272727, + "Chinese,Malay": 0.4715909090909091, + "Chinese,Spanish": 0.4772727272727273, + "Indonesian,Malay": 0.5397727272727273, + "Indonesian,Spanish": 0.5340909090909091, + "Malay,Spanish": 0.5113636363636364 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.25, + "English,Filipino,Chinese": 0.2727272727272727, + "English,Filipino,Indonesian": 0.2556818181818182, + "English,Filipino,Malay": 0.26136363636363635, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Vietnamese,Chinese": 0.3522727272727273, + "English,Vietnamese,Indonesian": 0.32386363636363635, + "English,Vietnamese,Malay": 0.3125, + "English,Vietnamese,Spanish": 0.3522727272727273, + "English,Chinese,Indonesian": 0.32954545454545453, + "English,Chinese,Malay": 0.3181818181818182, + "English,Chinese,Spanish": 0.375, + "English,Indonesian,Malay": 0.32386363636363635, + "English,Indonesian,Spanish": 0.3806818181818182, + "English,Malay,Spanish": 0.32386363636363635, + "Filipino,Vietnamese,Chinese": 0.26704545454545453, + "Filipino,Vietnamese,Indonesian": 0.25, + "Filipino,Vietnamese,Malay": 0.26136363636363635, + "Filipino,Vietnamese,Spanish": 0.25, + "Filipino,Chinese,Indonesian": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.2556818181818182, + "Filipino,Chinese,Spanish": 0.25, + "Filipino,Indonesian,Malay": 0.2727272727272727, + "Filipino,Indonesian,Spanish": 0.26704545454545453, + "Filipino,Malay,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian": 0.3693181818181818, + "Vietnamese,Chinese,Malay": 0.3352272727272727, + "Vietnamese,Chinese,Spanish": 0.3409090909090909, + "Vietnamese,Indonesian,Malay": 0.35795454545454547, + "Vietnamese,Indonesian,Spanish": 0.3409090909090909, + "Vietnamese,Malay,Spanish": 0.3181818181818182, + "Chinese,Indonesian,Malay": 0.3409090909090909, + "Chinese,Indonesian,Spanish": 0.3181818181818182, + "Chinese,Malay,Spanish": 0.29545454545454547, + "Indonesian,Malay,Spanish": 0.3465909090909091 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.21022727272727273, + "English,Filipino,Vietnamese,Indonesian": 0.18181818181818182, + "English,Filipino,Vietnamese,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Spanish": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian": 0.17613636363636365, + "English,Filipino,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Chinese,Spanish": 0.21022727272727273, + "English,Filipino,Indonesian,Malay": 0.19318181818181818, + "English,Filipino,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Malay,Spanish": 0.19886363636363635, + "English,Vietnamese,Chinese,Indonesian": 0.25, + "English,Vietnamese,Chinese,Malay": 0.2556818181818182, + "English,Vietnamese,Chinese,Spanish": 0.2556818181818182, + "English,Vietnamese,Indonesian,Malay": 0.24431818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.25, + "English,Vietnamese,Malay,Spanish": 0.23295454545454544, + "English,Chinese,Indonesian,Malay": 0.23863636363636365, + "English,Chinese,Indonesian,Spanish": 0.25, + "English,Chinese,Malay,Spanish": 0.23863636363636365, + "English,Indonesian,Malay,Spanish": 0.25, + "Filipino,Vietnamese,Chinese,Indonesian": 0.19886363636363635, + "Filipino,Vietnamese,Chinese,Malay": 0.19318181818181818, + "Filipino,Vietnamese,Chinese,Spanish": 0.19318181818181818, + "Filipino,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Vietnamese,Indonesian,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Malay,Spanish": 0.17613636363636365, + "Filipino,Chinese,Indonesian,Malay": 0.18181818181818182, + "Filipino,Chinese,Indonesian,Spanish": 0.17045454545454544, + "Filipino,Chinese,Malay,Spanish": 0.17045454545454544, + "Filipino,Indonesian,Malay,Spanish": 0.19318181818181818, + "Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, + "Vietnamese,Chinese,Indonesian,Spanish": 0.25, + "Vietnamese,Chinese,Malay,Spanish": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,Spanish": 0.24431818181818182, + "Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Malay": 0.16477272727272727, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Malay,Spanish": 0.14772727272727273, + "English,Filipino,Chinese,Indonesian,Malay": 0.14204545454545456, + "English,Filipino,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Chinese,Malay,Spanish": 0.1534090909090909, + "English,Filipino,Indonesian,Malay,Spanish": 0.16477272727272727, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, + "English,Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.1875, + "English,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.14772727272727273, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.125, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.125, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.10227272727272728 + } + }, + "AC3_2": 0.469208211093747, + "AC3_3": 0.36460865333066433, + "AC3_4": 0.2920110192401396, + "AC3_5": 0.2380571975967469, + "AC3_6": 0.19707565158714851, + "AC3_7": 0.16697588123160803 + } }, "sg_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5922330097087378 + }, + "prompt_2": { + "accuracy": 0.6213592233009708 + }, + "prompt_3": { + "accuracy": 0.6116504854368932 + }, + "prompt_4": { + "accuracy": 0.6019417475728155 + }, + "prompt_5": { + "accuracy": 0.5631067961165048 + } }, "cn_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6666666666666666 + }, + "prompt_2": { + "accuracy": 0.638095238095238 + }, + "prompt_3": { + "accuracy": 0.6666666666666666 + }, + "prompt_4": { + "accuracy": 0.6952380952380952 + }, + "prompt_5": { + "accuracy": 0.638095238095238 + } }, "us_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6635514018691588 + }, + "prompt_2": { + "accuracy": 0.6355140186915887 + }, + "prompt_3": { + "accuracy": 0.6448598130841121 + }, + "prompt_4": { + "accuracy": 0.6728971962616822 + }, + "prompt_5": { + "accuracy": 0.6728971962616822 + } }, "ph_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.5, + "politics": 0.5, + "culture": 0.4, + "film": 0.3, + "law": 0.4, + "geography": 0.8 + } + }, + "prompt_2": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.2, + "literature": 0.4, + "politics": 0.5, + "culture": 0.4, + "film": 0.3, + "law": 0.4, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.4, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.6, + "politics": 0.6, + "culture": 0.5, + "film": 0.4, + "law": 0.4, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.4, + "history": 0.2, + "literature": 0.4, + "politics": 0.6, + "culture": 0.5, + "film": 0.3, + "law": 0.6, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.46, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.5, + "history": 0.2, + "literature": 0.4, + "politics": 0.6, + "culture": 0.5, + "film": 0.4, + "law": 0.6, + "geography": 0.7 + } + } }, "sing2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.1813209431909435 + }, + "prompt_2": { + "bleu_score": 0.1652468503160867 + }, + "prompt_3": { + "bleu_score": 0.18143653925165418 + }, + "prompt_4": { + "bleu_score": 0.19864418571047548 + }, + "prompt_5": { + "bleu_score": 0.12527657643969228 + } }, "indommlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.44816075839508646, + "category_acc": { + "History": 0.3815261044176707, + "Geography": 0.4122448979591837, + "Lampungic": 0.3469387755102041, + "Social science": 0.659432387312187, + "Balinese": 0.3078556263269639, + "Makassarese": 0.3548387096774194, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.2948905109489051, + "Biology": 0.4059171597633136, + "Science": 0.5727554179566563, + "Christian religion": 0.5422885572139303, + "Art": 0.5307820299500832, + "Islam religion": 0.5078236130867709, + "Hindu religion": 0.35333333333333333, + "Madurese": 0.34576271186440677, + "Sport": 0.5608108108108109, + "Indonesian language": 0.5174346201743462, + "Physics": 0.37777777777777777, + "Minangkabau culture": 0.37185929648241206, + "Dayak language": 0.29357798165137616, + "Sociology": 0.4939516129032258, + "Economy": 0.4569672131147541, + "Sundanese": 0.3500432152117545, + "Javanese": 0.3165322580645161, + "Civic education": 0.5150214592274678 + } + }, + "prompt_2": { + "accuracy": 0.4552373322651712, + "category_acc": { + "History": 0.39959839357429716, + "Geography": 0.42857142857142855, + "Lampungic": 0.30612244897959184, + "Social science": 0.669449081803005, + "Balinese": 0.27176220806794055, + "Makassarese": 0.3333333333333333, + "Banjarese": 0.3125, + "Chemistry": 0.310948905109489, + "Biology": 0.4153846153846154, + "Science": 0.5789473684210527, + "Christian religion": 0.5223880597014925, + "Art": 0.5457570715474209, + "Islam religion": 0.5177809388335705, + "Hindu religion": 0.38, + "Madurese": 0.30847457627118646, + "Sport": 0.5472972972972973, + "Indonesian language": 0.538293897882939, + "Physics": 0.39595959595959596, + "Minangkabau culture": 0.36683417085427134, + "Dayak language": 0.3119266055045872, + "Sociology": 0.4798387096774194, + "Economy": 0.4262295081967213, + "Sundanese": 0.3656006914433881, + "Javanese": 0.3175403225806452, + "Civic education": 0.5178826895565093 + } + }, + "prompt_3": { + "accuracy": 0.4533012884705254, + "category_acc": { + "History": 0.39558232931726905, + "Geography": 0.41836734693877553, + "Lampungic": 0.3197278911564626, + "Social science": 0.657762938230384, + "Balinese": 0.27176220806794055, + "Makassarese": 0.3172043010752688, + "Banjarese": 0.3263888888888889, + "Chemistry": 0.3094890510948905, + "Biology": 0.39644970414201186, + "Science": 0.5768833849329206, + "Christian religion": 0.5422885572139303, + "Art": 0.5557404326123128, + "Islam religion": 0.5291607396870555, + "Hindu religion": 0.4, + "Madurese": 0.2983050847457627, + "Sport": 0.5067567567567568, + "Indonesian language": 0.5323785803237858, + "Physics": 0.40606060606060607, + "Minangkabau culture": 0.34673366834170855, + "Dayak language": 0.3302752293577982, + "Sociology": 0.49193548387096775, + "Economy": 0.45491803278688525, + "Sundanese": 0.3474503025064823, + "Javanese": 0.32056451612903225, + "Civic education": 0.525035765379113 + } + }, + "prompt_4": { + "accuracy": 0.4439548701515455, + "category_acc": { + "History": 0.39558232931726905, + "Geography": 0.4306122448979592, + "Lampungic": 0.29931972789115646, + "Social science": 0.659432387312187, + "Balinese": 0.2781316348195329, + "Makassarese": 0.3333333333333333, + "Banjarese": 0.3263888888888889, + "Chemistry": 0.3051094890510949, + "Biology": 0.40828402366863903, + "Science": 0.5552115583075335, + "Christian religion": 0.5323383084577115, + "Art": 0.5540765391014975, + "Islam religion": 0.519203413940256, + "Hindu religion": 0.3933333333333333, + "Madurese": 0.31186440677966104, + "Sport": 0.5135135135135135, + "Indonesian language": 0.5080946450809465, + "Physics": 0.3696969696969697, + "Minangkabau culture": 0.3417085427135678, + "Dayak language": 0.3119266055045872, + "Sociology": 0.48185483870967744, + "Economy": 0.4385245901639344, + "Sundanese": 0.34053586862575624, + "Javanese": 0.31048387096774194, + "Civic education": 0.525035765379113 + } + }, + "prompt_5": { + "accuracy": 0.4452900727685426, + "category_acc": { + "History": 0.3815261044176707, + "Geography": 0.4, + "Lampungic": 0.3469387755102041, + "Social science": 0.6527545909849749, + "Balinese": 0.25690021231422505, + "Makassarese": 0.34946236559139787, + "Banjarese": 0.3194444444444444, + "Chemistry": 0.2875912408759124, + "Biology": 0.40828402366863903, + "Science": 0.5510835913312694, + "Christian religion": 0.5422885572139303, + "Art": 0.5440931780366056, + "Islam religion": 0.519203413940256, + "Hindu religion": 0.38666666666666666, + "Madurese": 0.31864406779661014, + "Sport": 0.527027027027027, + "Indonesian language": 0.522104607721046, + "Physics": 0.37575757575757573, + "Minangkabau culture": 0.3316582914572864, + "Dayak language": 0.3394495412844037, + "Sociology": 0.4778225806451613, + "Economy": 0.4487704918032787, + "Sundanese": 0.3526361279170268, + "Javanese": 0.31149193548387094, + "Civic education": 0.5207439198855508 + } + } }, "flores_ind2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.3041235654636236 + }, + "prompt_2": { + "bleu_score": 0.29512716944728007 + }, + "prompt_3": { + "bleu_score": 0.29373780960981843 + }, + "prompt_4": { + "bleu_score": 0.31250065854498427 + }, + "prompt_5": { + "bleu_score": 0.2774937611806217 + } }, "flores_vie2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.24780407565296586 + }, + "prompt_2": { + "bleu_score": 0.23536264859138686 + }, + "prompt_3": { + "bleu_score": 0.24276746629856732 + }, + "prompt_4": { + "bleu_score": 0.2554545547253871 + }, + "prompt_5": { + "bleu_score": 0.23881161523747158 + } }, "flores_zho2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.15504897763144337 + }, + "prompt_2": { + "bleu_score": 0.14522522401931925 + }, + "prompt_3": { + "bleu_score": 0.14929344122488786 + }, + "prompt_4": { + "bleu_score": 0.1744212961606217 + }, + "prompt_5": { + "bleu_score": 0.20102267624778775 + } }, "flores_zsm2eng": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "bleu_score": 0.27681293456758976 + }, + "prompt_2": { + "bleu_score": 0.27829874052610976 + }, + "prompt_3": { + "bleu_score": 0.26783075773550924 + }, + "prompt_4": { + "bleu_score": 0.28645321655340955 + }, + "prompt_5": { + "bleu_score": 0.25162398658743396 + } }, "mmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5752625437572929 + }, + "prompt_2": { + "accuracy": 0.5635939323220537 + }, + "prompt_3": { + "accuracy": 0.5787631271878646 + }, + "prompt_4": { + "accuracy": 0.5659276546091015 + }, + "prompt_5": { + "accuracy": 0.5565927654609102 + } }, "mmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5616732213085449, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.5959595959595959, + "clinical_knowledge": 0.6401515151515151, + "medical_genetics": 0.6666666666666666, + "high_school_us_history": 0.7093596059113301, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.7669491525423728, + "virology": 0.4666666666666667, + "high_school_microeconomics": 0.5738396624472574, + "econometrics": 0.4247787610619469, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.6440129449838188, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.3914590747330961, + "philosophy": 0.5709677419354838, + "professional_medicine": 0.5571955719557196, + "nutrition": 0.6, + "global_facts": 0.4444444444444444, + "machine_learning": 0.43243243243243246, + "security_studies": 0.5245901639344263, + "public_relations": 0.6146788990825688, + "professional_psychology": 0.5171849427168577, + "prehistory": 0.5727554179566563, + "anatomy": 0.5373134328358209, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.5930232558139535, + "high_school_government_and_politics": 0.7604166666666666, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6172839506172839, + "high_school_geography": 0.7157360406091371, + "elementary_mathematics": 0.6074270557029178, + "human_aging": 0.6126126126126126, + "college_mathematics": 0.31313131313131315, + "high_school_psychology": 0.7665441176470589, + "formal_logic": 0.44, + "high_school_statistics": 0.4325581395348837, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.2788104089219331, + "high_school_computer_science": 0.6464646464646465, + "conceptual_physics": 0.5641025641025641, + "miscellaneous": 0.7506393861892583, + "high_school_chemistry": 0.44554455445544555, + "marketing": 0.8283261802575107, + "professional_law": 0.4142204827136334, + "management": 0.7058823529411765, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.6822429906542056, + "world_religions": 0.7470588235294118, + "sociology": 0.72, + "us_foreign_policy": 0.7777777777777778, + "high_school_macroeconomics": 0.5552699228791774, + "computer_security": 0.6464646464646465, + "moral_scenarios": 0.40492170022371365, + "moral_disputes": 0.5768115942028985, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.5761589403973509, + "college_biology": 0.6153846153846154 + } + }, + "prompt_2": { + "accuracy": 0.5341437254200929, + "category_acc": { + "high_school_european_history": 0.7439024390243902, + "business_ethics": 0.6868686868686869, + "clinical_knowledge": 0.5795454545454546, + "medical_genetics": 0.5757575757575758, + "high_school_us_history": 0.7192118226600985, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.6822033898305084, + "virology": 0.4121212121212121, + "high_school_microeconomics": 0.4978902953586498, + "econometrics": 0.415929203539823, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.6343042071197411, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.3879003558718861, + "philosophy": 0.5612903225806452, + "professional_medicine": 0.5313653136531366, + "nutrition": 0.5442622950819672, + "global_facts": 0.23232323232323232, + "machine_learning": 0.4144144144144144, + "security_studies": 0.5409836065573771, + "public_relations": 0.5504587155963303, + "professional_psychology": 0.48936170212765956, + "prehistory": 0.5386996904024768, + "anatomy": 0.4701492537313433, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5581395348837209, + "high_school_government_and_politics": 0.6510416666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6649746192893401, + "elementary_mathematics": 0.6127320954907162, + "human_aging": 0.536036036036036, + "college_mathematics": 0.26262626262626265, + "high_school_psychology": 0.7426470588235294, + "formal_logic": 0.392, + "high_school_statistics": 0.40930232558139534, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.25650557620817843, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5085470085470085, + "miscellaneous": 0.7416879795396419, + "high_school_chemistry": 0.44554455445544555, + "marketing": 0.8111587982832618, + "professional_law": 0.4181343770384866, + "management": 0.7058823529411765, + "college_physics": 0.38613861386138615, + "jurisprudence": 0.6355140186915887, + "world_religions": 0.7235294117647059, + "sociology": 0.665, + "us_foreign_policy": 0.7272727272727273, + "high_school_macroeconomics": 0.493573264781491, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.3870246085011186, + "moral_disputes": 0.5101449275362319, + "electrical_engineering": 0.4305555555555556, + "astronomy": 0.5761589403973509, + "college_biology": 0.5734265734265734 + } + }, + "prompt_3": { + "accuracy": 0.5656775116195925, + "category_acc": { + "high_school_european_history": 0.7560975609756098, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.5984848484848485, + "medical_genetics": 0.6666666666666666, + "high_school_us_history": 0.7044334975369458, + "high_school_physics": 0.34, + "high_school_world_history": 0.7669491525423728, + "virology": 0.4303030303030303, + "high_school_microeconomics": 0.6118143459915611, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.46464646464646464, + "high_school_biology": 0.6925566343042071, + "abstract_algebra": 0.30303030303030304, + "professional_accounting": 0.4128113879003559, + "philosophy": 0.5935483870967742, + "professional_medicine": 0.6125461254612546, + "nutrition": 0.639344262295082, + "global_facts": 0.2828282828282828, + "machine_learning": 0.43243243243243246, + "security_studies": 0.5409836065573771, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.5204582651391162, + "prehistory": 0.56656346749226, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.6692307692307692, + "college_medicine": 0.5697674418604651, + "high_school_government_and_politics": 0.78125, + "college_chemistry": 0.48484848484848486, + "logical_fallacies": 0.6728395061728395, + "high_school_geography": 0.751269035532995, + "elementary_mathematics": 0.5888594164456233, + "human_aging": 0.5855855855855856, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7720588235294118, + "formal_logic": 0.408, + "high_school_statistics": 0.413953488372093, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.2379182156133829, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5213675213675214, + "miscellaneous": 0.7659846547314578, + "high_school_chemistry": 0.4207920792079208, + "marketing": 0.8111587982832618, + "professional_law": 0.4207436399217221, + "management": 0.7156862745098039, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.6915887850467289, + "world_religions": 0.7529411764705882, + "sociology": 0.7, + "us_foreign_policy": 0.8181818181818182, + "high_school_macroeconomics": 0.5449871465295629, + "computer_security": 0.6767676767676768, + "moral_scenarios": 0.4217002237136465, + "moral_disputes": 0.6434782608695652, + "electrical_engineering": 0.4930555555555556, + "astronomy": 0.5894039735099338, + "college_biology": 0.5734265734265734 + } + }, + "prompt_4": { + "accuracy": 0.5427243475151948, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.6363636363636364, + "clinical_knowledge": 0.5984848484848485, + "medical_genetics": 0.6363636363636364, + "high_school_us_history": 0.6650246305418719, + "high_school_physics": 0.38, + "high_school_world_history": 0.7245762711864406, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.5443037974683544, + "econometrics": 0.3805309734513274, + "college_computer_science": 0.35353535353535354, + "high_school_biology": 0.6407766990291263, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3879003558718861, + "philosophy": 0.6, + "professional_medicine": 0.5350553505535055, + "nutrition": 0.5770491803278689, + "global_facts": 0.37373737373737376, + "machine_learning": 0.45045045045045046, + "security_studies": 0.47540983606557374, + "public_relations": 0.5596330275229358, + "professional_psychology": 0.5188216039279869, + "prehistory": 0.5572755417956656, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.5697674418604651, + "high_school_government_and_politics": 0.7395833333333334, + "college_chemistry": 0.40404040404040403, + "logical_fallacies": 0.6111111111111112, + "high_school_geography": 0.6903553299492385, + "elementary_mathematics": 0.5994694960212201, + "human_aging": 0.5675675675675675, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7150735294117647, + "formal_logic": 0.408, + "high_school_statistics": 0.4325581395348837, + "international_law": 0.6416666666666667, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5042735042735043, + "miscellaneous": 0.7455242966751918, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.7896995708154506, + "professional_law": 0.42204827136333983, + "management": 0.7058823529411765, + "college_physics": 0.3564356435643564, + "jurisprudence": 0.6261682242990654, + "world_religions": 0.7647058823529411, + "sociology": 0.69, + "us_foreign_policy": 0.7373737373737373, + "high_school_macroeconomics": 0.4781491002570694, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.38926174496644295, + "moral_disputes": 0.5739130434782609, + "electrical_engineering": 0.5069444444444444, + "astronomy": 0.6423841059602649, + "college_biology": 0.4825174825174825 + } + }, + "prompt_5": { + "accuracy": 0.5440114408294602, + "category_acc": { + "high_school_european_history": 0.676829268292683, + "business_ethics": 0.6464646464646465, + "clinical_knowledge": 0.571969696969697, + "medical_genetics": 0.5858585858585859, + "high_school_us_history": 0.7192118226600985, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.6949152542372882, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.5738396624472574, + "econometrics": 0.4336283185840708, + "college_computer_science": 0.3939393939393939, + "high_school_biology": 0.6407766990291263, + "abstract_algebra": 0.26262626262626265, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.5903225806451613, + "professional_medicine": 0.5055350553505535, + "nutrition": 0.5967213114754099, + "global_facts": 0.35353535353535354, + "machine_learning": 0.3963963963963964, + "security_studies": 0.5163934426229508, + "public_relations": 0.5779816513761468, + "professional_psychology": 0.5237315875613748, + "prehistory": 0.5944272445820433, + "anatomy": 0.5074626865671642, + "human_sexuality": 0.6230769230769231, + "college_medicine": 0.5290697674418605, + "high_school_government_and_politics": 0.765625, + "college_chemistry": 0.3939393939393939, + "logical_fallacies": 0.6234567901234568, + "high_school_geography": 0.7258883248730964, + "elementary_mathematics": 0.5702917771883289, + "human_aging": 0.6081081081081081, + "college_mathematics": 0.23232323232323232, + "high_school_psychology": 0.7316176470588235, + "formal_logic": 0.44, + "high_school_statistics": 0.4186046511627907, + "international_law": 0.625, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.5858585858585859, + "conceptual_physics": 0.5170940170940171, + "miscellaneous": 0.7595907928388747, + "high_school_chemistry": 0.41089108910891087, + "marketing": 0.8068669527896996, + "professional_law": 0.42661448140900193, + "management": 0.7156862745098039, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.6261682242990654, + "world_religions": 0.7588235294117647, + "sociology": 0.685, + "us_foreign_policy": 0.7575757575757576, + "high_school_macroeconomics": 0.5218508997429306, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.36017897091722595, + "moral_disputes": 0.5478260869565217, + "electrical_engineering": 0.5, + "astronomy": 0.6556291390728477, + "college_biology": 0.5734265734265734 + } + } }, "c_eval": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.650074294205052 + }, + "prompt_2": { + "accuracy": 0.6463595839524517 + }, + "prompt_3": { + "accuracy": 0.6411589895988113 + }, + "prompt_4": { + "accuracy": 0.5950965824665676 + }, + "prompt_5": { + "accuracy": 0.5230312035661219 + } }, "c_eval_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6656288916562889, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.6538461538461539, + "college_programming": 0.7380952380952381, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.42857142857142855, + "metrology_engineer": 0.7586206896551724, + "high_school_mathematics": 0.5217391304347826, + "high_school_physics": 0.6666666666666666, + "high_school_chemistry": 0.7083333333333334, + "high_school_biology": 0.7916666666666666, + "middle_school_mathematics": 0.625, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.9166666666666666, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.5333333333333333, + "business_administration": 0.5526315789473685, + "marxism": 0.875, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.7058823529411765, + "teacher_qualification": 0.8367346938775511, + "high_school_politics": 0.875, + "high_school_geography": 0.8333333333333334, + "middle_school_politics": 1.0, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.8571428571428571, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.48148148148148145, + "law": 0.7586206896551724, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.7105263157894737, + "professional_tour_guide": 0.7647058823529411, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.5833333333333334, + "high_school_history": 0.84, + "middle_school_history": 0.9259259259259259, + "civil_servant": 0.5769230769230769, + "sports_science": 0.6666666666666666, + "plant_protection": 0.7037037037037037, + "basic_medicine": 0.8333333333333334, + "clinical_medicine": 0.6666666666666666, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.5925925925925926, + "fire_engineer": 0.5833333333333334, + "environmental_impact_assessment_engineer": 0.7222222222222222, + "tax_accountant": 0.5740740740740741, + "physician": 0.6666666666666666 + } + }, + "prompt_2": { + "accuracy": 0.651307596513076, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.625, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.5952380952380952, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.35714285714285715, + "metrology_engineer": 0.7586206896551724, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.625, + "high_school_chemistry": 0.6666666666666666, + "high_school_biology": 0.7916666666666666, + "middle_school_mathematics": 0.5, + "middle_school_biology": 0.9615384615384616, + "middle_school_physics": 0.875, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.5833333333333334, + "business_administration": 0.631578947368421, + "marxism": 0.9166666666666666, + "mao_zedong_thought": 0.8275862068965517, + "education_science": 0.7352941176470589, + "teacher_qualification": 0.8163265306122449, + "high_school_politics": 0.875, + "high_school_geography": 0.875, + "middle_school_politics": 0.9615384615384616, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.7857142857142857, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.6296296296296297, + "law": 0.7241379310344828, + "chinese_language_and_literature": 0.6428571428571429, + "art_studies": 0.6842105263157895, + "professional_tour_guide": 0.7058823529411765, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.5833333333333334, + "high_school_history": 0.8, + "middle_school_history": 0.8888888888888888, + "civil_servant": 0.6153846153846154, + "sports_science": 0.7083333333333334, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.8333333333333334, + "clinical_medicine": 0.5185185185185185, + "urban_and_rural_planner": 0.6470588235294118, + "accountant": 0.5925925925925926, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.6944444444444444, + "tax_accountant": 0.5555555555555556, + "physician": 0.7037037037037037 + } + }, + "prompt_3": { + "accuracy": 0.6631382316313823, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.6666666666666666, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.4482758620689655, + "advanced_mathematics": 0.2916666666666667, + "probability_and_statistics": 0.34782608695652173, + "discrete_mathematics": 0.3333333333333333, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.7586206896551724, + "high_school_mathematics": 0.30434782608695654, + "high_school_physics": 0.625, + "high_school_chemistry": 0.5833333333333334, + "high_school_biology": 0.7916666666666666, + "middle_school_mathematics": 0.6666666666666666, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.9166666666666666, + "middle_school_chemistry": 0.92, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.5666666666666667, + "business_administration": 0.7105263157894737, + "marxism": 0.875, + "mao_zedong_thought": 0.7931034482758621, + "education_science": 0.7058823529411765, + "teacher_qualification": 0.8367346938775511, + "high_school_politics": 0.875, + "high_school_geography": 0.8333333333333334, + "middle_school_politics": 0.9615384615384616, + "middle_school_geography": 0.9411764705882353, + "modern_chinese_history": 0.8571428571428571, + "ideological_and_moral_cultivation": 0.9583333333333334, + "logic": 0.48148148148148145, + "law": 0.6206896551724138, + "chinese_language_and_literature": 0.6785714285714286, + "art_studies": 0.6842105263157895, + "professional_tour_guide": 0.7647058823529411, + "legal_professional": 0.5357142857142857, + "high_school_chinese": 0.625, + "high_school_history": 0.88, + "middle_school_history": 0.9629629629629629, + "civil_servant": 0.5769230769230769, + "sports_science": 0.6666666666666666, + "plant_protection": 0.7037037037037037, + "basic_medicine": 0.8333333333333334, + "clinical_medicine": 0.6666666666666666, + "urban_and_rural_planner": 0.6078431372549019, + "accountant": 0.6296296296296297, + "fire_engineer": 0.5833333333333334, + "environmental_impact_assessment_engineer": 0.7222222222222222, + "tax_accountant": 0.5555555555555556, + "physician": 0.6481481481481481 + } + }, + "prompt_4": { + "accuracy": 0.5921544209215442, + "category_acc": { + "computer_network": 0.375, + "operating_system": 0.5, + "computer_architecture": 0.5, + "college_programming": 0.5476190476190477, + "college_physics": 0.20833333333333334, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.38095238095238093, + "electrical_engineer": 0.3333333333333333, + "metrology_engineer": 0.5517241379310345, + "high_school_mathematics": 0.5217391304347826, + "high_school_physics": 0.5, + "high_school_chemistry": 0.6666666666666666, + "high_school_biology": 0.5416666666666666, + "middle_school_mathematics": 0.4166666666666667, + "middle_school_biology": 0.9230769230769231, + "middle_school_physics": 0.8333333333333334, + "middle_school_chemistry": 0.76, + "veterinary_medicine": 0.6785714285714286, + "college_economics": 0.6333333333333333, + "business_administration": 0.6842105263157895, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.7586206896551724, + "education_science": 0.7058823529411765, + "teacher_qualification": 0.7755102040816326, + "high_school_politics": 0.875, + "high_school_geography": 0.7083333333333334, + "middle_school_politics": 0.8846153846153846, + "middle_school_geography": 0.8235294117647058, + "modern_chinese_history": 0.7857142857142857, + "ideological_and_moral_cultivation": 0.9166666666666666, + "logic": 0.2962962962962963, + "law": 0.5517241379310345, + "chinese_language_and_literature": 0.5357142857142857, + "art_studies": 0.6842105263157895, + "professional_tour_guide": 0.7941176470588235, + "legal_professional": 0.42857142857142855, + "high_school_chinese": 0.4583333333333333, + "high_school_history": 0.8, + "middle_school_history": 0.8888888888888888, + "civil_servant": 0.46153846153846156, + "sports_science": 0.6666666666666666, + "plant_protection": 0.7407407407407407, + "basic_medicine": 0.8333333333333334, + "clinical_medicine": 0.5555555555555556, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.5185185185185185, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.6111111111111112, + "tax_accountant": 0.46296296296296297, + "physician": 0.5185185185185185 + } + }, + "prompt_5": { + "accuracy": 0.5448318804483188, + "category_acc": { + "computer_network": 0.5416666666666666, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.5769230769230769, + "college_programming": 0.47619047619047616, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.16666666666666666, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.40476190476190477, + "metrology_engineer": 0.4827586206896552, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.5, + "high_school_chemistry": 0.5, + "high_school_biology": 0.7083333333333334, + "middle_school_mathematics": 0.4166666666666667, + "middle_school_biology": 0.8076923076923077, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.8, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.4, + "business_administration": 0.5526315789473685, + "marxism": 0.7916666666666666, + "mao_zedong_thought": 0.5862068965517241, + "education_science": 0.7352941176470589, + "teacher_qualification": 0.6938775510204082, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.7083333333333334, + "middle_school_politics": 0.9230769230769231, + "middle_school_geography": 0.7647058823529411, + "modern_chinese_history": 0.8214285714285714, + "ideological_and_moral_cultivation": 0.7916666666666666, + "logic": 0.14814814814814814, + "law": 0.5517241379310345, + "chinese_language_and_literature": 0.7142857142857143, + "art_studies": 0.6578947368421053, + "professional_tour_guide": 0.7941176470588235, + "legal_professional": 0.5714285714285714, + "high_school_chinese": 0.5, + "high_school_history": 0.56, + "middle_school_history": 0.8888888888888888, + "civil_servant": 0.4807692307692308, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.7083333333333334, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.5686274509803921, + "accountant": 0.3888888888888889, + "fire_engineer": 0.4722222222222222, + "environmental_impact_assessment_engineer": 0.5555555555555556, + "tax_accountant": 0.35185185185185186, + "physician": 0.5555555555555556 + } + } }, "cmmlu": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6810035842293907 + }, + "prompt_2": { + "accuracy": 0.6917562724014337 + }, + "prompt_3": { + "accuracy": 0.6630824372759857 + }, + "prompt_4": { + "accuracy": 0.6594982078853047 + }, + "prompt_5": { + "accuracy": 0.5842293906810035 + } }, "cmmlu_full": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.647815575893628, + "category_acc": { + "agronomy": 0.5088757396449705, + "anatomy": 0.6013513513513513, + "ancient_chinese": 0.38414634146341464, + "arts": 0.8625, + "astronomy": 0.4121212121212121, + "business_ethics": 0.6172248803827751, + "chinese_civil_service_exam": 0.575, + "chinese_driving_rule": 0.916030534351145, + "chinese_food_culture": 0.6397058823529411, + "chinese_foreign_policy": 0.719626168224299, + "chinese_history": 0.7925696594427245, + "chinese_literature": 0.47549019607843135, + "chinese_teacher_qualification": 0.8100558659217877, + "clinical_knowledge": 0.5864978902953587, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.7663551401869159, + "college_engineering_hydrology": 0.5471698113207547, + "college_law": 0.5740740740740741, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.4811320754716981, + "college_medicine": 0.663003663003663, + "computer_science": 0.6470588235294118, + "computer_security": 0.7602339181286549, + "conceptual_physics": 0.8299319727891157, + "construction_project_management": 0.5611510791366906, + "economics": 0.6729559748427673, + "education": 0.6625766871165644, + "electrical_engineering": 0.622093023255814, + "elementary_chinese": 0.6666666666666666, + "elementary_commonsense": 0.7070707070707071, + "elementary_information_and_technology": 0.8235294117647058, + "elementary_mathematics": 0.48695652173913045, + "ethnology": 0.6518518518518519, + "food_science": 0.5804195804195804, + "genetics": 0.5340909090909091, + "global_facts": 0.6510067114093959, + "high_school_biology": 0.7337278106508875, + "high_school_chemistry": 0.6590909090909091, + "high_school_geography": 0.7966101694915254, + "high_school_mathematics": 0.36585365853658536, + "high_school_physics": 0.6, + "high_school_politics": 0.6993006993006993, + "human_sexuality": 0.6428571428571429, + "international_law": 0.5459459459459459, + "journalism": 0.5930232558139535, + "jurisprudence": 0.6739659367396593, + "legal_and_moral_basis": 0.9485981308411215, + "logical": 0.5528455284552846, + "machine_learning": 0.5245901639344263, + "management": 0.7857142857142857, + "marketing": 0.6888888888888889, + "marxist_theory": 0.8201058201058201, + "modern_chinese": 0.5344827586206896, + "nutrition": 0.6413793103448275, + "philosophy": 0.6761904761904762, + "professional_accounting": 0.7485714285714286, + "professional_law": 0.5592417061611374, + "professional_medicine": 0.5292553191489362, + "professional_psychology": 0.7887931034482759, + "public_relations": 0.6436781609195402, + "security_study": 0.7407407407407407, + "sociology": 0.6283185840707964, + "sports_science": 0.6909090909090909, + "traditional_chinese_medicine": 0.6432432432432432, + "virology": 0.650887573964497, + "world_history": 0.7701863354037267, + "world_religions": 0.69375 + } + }, + "prompt_2": { + "accuracy": 0.6427214643412191, + "category_acc": { + "agronomy": 0.5266272189349113, + "anatomy": 0.5878378378378378, + "ancient_chinese": 0.3902439024390244, + "arts": 0.85625, + "astronomy": 0.46060606060606063, + "business_ethics": 0.5980861244019139, + "chinese_civil_service_exam": 0.5625, + "chinese_driving_rule": 0.9007633587786259, + "chinese_food_culture": 0.6470588235294118, + "chinese_foreign_policy": 0.7102803738317757, + "chinese_history": 0.7523219814241486, + "chinese_literature": 0.5245098039215687, + "chinese_teacher_qualification": 0.7877094972067039, + "clinical_knowledge": 0.620253164556962, + "college_actuarial_science": 0.29245283018867924, + "college_education": 0.7850467289719626, + "college_engineering_hydrology": 0.5471698113207547, + "college_law": 0.5925925925925926, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.5094339622641509, + "college_medicine": 0.6263736263736264, + "computer_science": 0.6911764705882353, + "computer_security": 0.7485380116959064, + "conceptual_physics": 0.8027210884353742, + "construction_project_management": 0.5035971223021583, + "economics": 0.6540880503144654, + "education": 0.6687116564417178, + "electrical_engineering": 0.627906976744186, + "elementary_chinese": 0.6626984126984127, + "elementary_commonsense": 0.6818181818181818, + "elementary_information_and_technology": 0.8361344537815126, + "elementary_mathematics": 0.49130434782608695, + "ethnology": 0.674074074074074, + "food_science": 0.5804195804195804, + "genetics": 0.4943181818181818, + "global_facts": 0.6711409395973155, + "high_school_biology": 0.7633136094674556, + "high_school_chemistry": 0.6818181818181818, + "high_school_geography": 0.7711864406779662, + "high_school_mathematics": 0.35365853658536583, + "high_school_physics": 0.6090909090909091, + "high_school_politics": 0.7062937062937062, + "human_sexuality": 0.5952380952380952, + "international_law": 0.5459459459459459, + "journalism": 0.5755813953488372, + "jurisprudence": 0.6666666666666666, + "legal_and_moral_basis": 0.9485981308411215, + "logical": 0.5365853658536586, + "machine_learning": 0.5409836065573771, + "management": 0.7428571428571429, + "marketing": 0.7111111111111111, + "marxist_theory": 0.8201058201058201, + "modern_chinese": 0.5344827586206896, + "nutrition": 0.6275862068965518, + "philosophy": 0.6666666666666666, + "professional_accounting": 0.7085714285714285, + "professional_law": 0.5687203791469194, + "professional_medicine": 0.5053191489361702, + "professional_psychology": 0.7672413793103449, + "public_relations": 0.6609195402298851, + "security_study": 0.6814814814814815, + "sociology": 0.6460176991150443, + "sports_science": 0.6424242424242425, + "traditional_chinese_medicine": 0.6486486486486487, + "virology": 0.6331360946745562, + "world_history": 0.7577639751552795, + "world_religions": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.6448799861854602, + "category_acc": { + "agronomy": 0.5384615384615384, + "anatomy": 0.5945945945945946, + "ancient_chinese": 0.4024390243902439, + "arts": 0.8625, + "astronomy": 0.3515151515151515, + "business_ethics": 0.6124401913875598, + "chinese_civil_service_exam": 0.59375, + "chinese_driving_rule": 0.8778625954198473, + "chinese_food_culture": 0.6029411764705882, + "chinese_foreign_policy": 0.7102803738317757, + "chinese_history": 0.7801857585139319, + "chinese_literature": 0.4950980392156863, + "chinese_teacher_qualification": 0.8044692737430168, + "clinical_knowledge": 0.6075949367088608, + "college_actuarial_science": 0.3490566037735849, + "college_education": 0.7663551401869159, + "college_engineering_hydrology": 0.5660377358490566, + "college_law": 0.6018518518518519, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.5283018867924528, + "college_medicine": 0.6703296703296703, + "computer_science": 0.6764705882352942, + "computer_security": 0.7485380116959064, + "conceptual_physics": 0.8299319727891157, + "construction_project_management": 0.5467625899280576, + "economics": 0.6666666666666666, + "education": 0.6503067484662577, + "electrical_engineering": 0.627906976744186, + "elementary_chinese": 0.6468253968253969, + "elementary_commonsense": 0.6919191919191919, + "elementary_information_and_technology": 0.8151260504201681, + "elementary_mathematics": 0.48695652173913045, + "ethnology": 0.6518518518518519, + "food_science": 0.5734265734265734, + "genetics": 0.5113636363636364, + "global_facts": 0.6778523489932886, + "high_school_biology": 0.7396449704142012, + "high_school_chemistry": 0.696969696969697, + "high_school_geography": 0.7627118644067796, + "high_school_mathematics": 0.35365853658536583, + "high_school_physics": 0.6272727272727273, + "high_school_politics": 0.7272727272727273, + "human_sexuality": 0.6031746031746031, + "international_law": 0.5135135135135135, + "journalism": 0.5813953488372093, + "jurisprudence": 0.6715328467153284, + "legal_and_moral_basis": 0.9485981308411215, + "logical": 0.5447154471544715, + "machine_learning": 0.5163934426229508, + "management": 0.7761904761904762, + "marketing": 0.7111111111111111, + "marxist_theory": 0.8042328042328042, + "modern_chinese": 0.5086206896551724, + "nutrition": 0.6344827586206897, + "philosophy": 0.6571428571428571, + "professional_accounting": 0.7485714285714286, + "professional_law": 0.5687203791469194, + "professional_medicine": 0.5053191489361702, + "professional_psychology": 0.7844827586206896, + "public_relations": 0.6551724137931034, + "security_study": 0.7185185185185186, + "sociology": 0.6371681415929203, + "sports_science": 0.6424242424242425, + "traditional_chinese_medicine": 0.654054054054054, + "virology": 0.621301775147929, + "world_history": 0.7701863354037267, + "world_religions": 0.69375 + } + }, + "prompt_4": { + "accuracy": 0.6033500259022622, + "category_acc": { + "agronomy": 0.47928994082840237, + "anatomy": 0.581081081081081, + "ancient_chinese": 0.34146341463414637, + "arts": 0.84375, + "astronomy": 0.42424242424242425, + "business_ethics": 0.583732057416268, + "chinese_civil_service_exam": 0.51875, + "chinese_driving_rule": 0.8473282442748091, + "chinese_food_culture": 0.6323529411764706, + "chinese_foreign_policy": 0.6448598130841121, + "chinese_history": 0.6749226006191951, + "chinese_literature": 0.5245098039215687, + "chinese_teacher_qualification": 0.7988826815642458, + "clinical_knowledge": 0.5189873417721519, + "college_actuarial_science": 0.3113207547169811, + "college_education": 0.7383177570093458, + "college_engineering_hydrology": 0.5660377358490566, + "college_law": 0.5370370370370371, + "college_mathematics": 0.2571428571428571, + "college_medical_statistics": 0.4716981132075472, + "college_medicine": 0.608058608058608, + "computer_science": 0.6470588235294118, + "computer_security": 0.6842105263157895, + "conceptual_physics": 0.6598639455782312, + "construction_project_management": 0.5251798561151079, + "economics": 0.5974842767295597, + "education": 0.6625766871165644, + "electrical_engineering": 0.6162790697674418, + "elementary_chinese": 0.6309523809523809, + "elementary_commonsense": 0.6313131313131313, + "elementary_information_and_technology": 0.7941176470588235, + "elementary_mathematics": 0.46956521739130436, + "ethnology": 0.6296296296296297, + "food_science": 0.6013986013986014, + "genetics": 0.48863636363636365, + "global_facts": 0.610738255033557, + "high_school_biology": 0.5680473372781065, + "high_school_chemistry": 0.4621212121212121, + "high_school_geography": 0.7203389830508474, + "high_school_mathematics": 0.34146341463414637, + "high_school_physics": 0.5272727272727272, + "high_school_politics": 0.5734265734265734, + "human_sexuality": 0.6111111111111112, + "international_law": 0.5297297297297298, + "journalism": 0.6046511627906976, + "jurisprudence": 0.6009732360097324, + "legal_and_moral_basis": 0.9112149532710281, + "logical": 0.4878048780487805, + "machine_learning": 0.45901639344262296, + "management": 0.7285714285714285, + "marketing": 0.6944444444444444, + "marxist_theory": 0.7142857142857143, + "modern_chinese": 0.5, + "nutrition": 0.5862068965517241, + "philosophy": 0.5904761904761905, + "professional_accounting": 0.6914285714285714, + "professional_law": 0.5023696682464455, + "professional_medicine": 0.5159574468085106, + "professional_psychology": 0.7931034482758621, + "public_relations": 0.6264367816091954, + "security_study": 0.7111111111111111, + "sociology": 0.6327433628318584, + "sports_science": 0.6060606060606061, + "traditional_chinese_medicine": 0.5783783783783784, + "virology": 0.6035502958579881, + "world_history": 0.639751552795031, + "world_religions": 0.6875 + } + }, + "prompt_5": { + "accuracy": 0.5440338456225177, + "category_acc": { + "agronomy": 0.4260355029585799, + "anatomy": 0.5743243243243243, + "ancient_chinese": 0.32926829268292684, + "arts": 0.75625, + "astronomy": 0.4303030303030303, + "business_ethics": 0.49282296650717705, + "chinese_civil_service_exam": 0.44375, + "chinese_driving_rule": 0.7480916030534351, + "chinese_food_culture": 0.6323529411764706, + "chinese_foreign_policy": 0.6261682242990654, + "chinese_history": 0.5851393188854489, + "chinese_literature": 0.5, + "chinese_teacher_qualification": 0.6871508379888268, + "clinical_knowledge": 0.5316455696202531, + "college_actuarial_science": 0.2641509433962264, + "college_education": 0.6448598130841121, + "college_engineering_hydrology": 0.6132075471698113, + "college_law": 0.48148148148148145, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.5164835164835165, + "computer_science": 0.5931372549019608, + "computer_security": 0.5964912280701754, + "conceptual_physics": 0.6190476190476191, + "construction_project_management": 0.5251798561151079, + "economics": 0.5283018867924528, + "education": 0.6503067484662577, + "electrical_engineering": 0.5, + "elementary_chinese": 0.5793650793650794, + "elementary_commonsense": 0.6313131313131313, + "elementary_information_and_technology": 0.7184873949579832, + "elementary_mathematics": 0.4826086956521739, + "ethnology": 0.5481481481481482, + "food_science": 0.5314685314685315, + "genetics": 0.4318181818181818, + "global_facts": 0.5771812080536913, + "high_school_biology": 0.5443786982248521, + "high_school_chemistry": 0.3939393939393939, + "high_school_geography": 0.5847457627118644, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.4636363636363636, + "high_school_politics": 0.5734265734265734, + "human_sexuality": 0.5476190476190477, + "international_law": 0.42702702702702705, + "journalism": 0.5406976744186046, + "jurisprudence": 0.5036496350364964, + "legal_and_moral_basis": 0.794392523364486, + "logical": 0.4634146341463415, + "machine_learning": 0.4918032786885246, + "management": 0.6523809523809524, + "marketing": 0.6222222222222222, + "marxist_theory": 0.6455026455026455, + "modern_chinese": 0.4827586206896552, + "nutrition": 0.5586206896551724, + "philosophy": 0.5523809523809524, + "professional_accounting": 0.6171428571428571, + "professional_law": 0.4265402843601896, + "professional_medicine": 0.4308510638297872, + "professional_psychology": 0.6896551724137931, + "public_relations": 0.4827586206896552, + "security_study": 0.6296296296296297, + "sociology": 0.5398230088495575, + "sports_science": 0.5575757575757576, + "traditional_chinese_medicine": 0.4972972972972973, + "virology": 0.5680473372781065, + "world_history": 0.5403726708074534, + "world_religions": 0.6375 + } + } }, "zbench": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.48484848484848486 + }, + "prompt_2": { + "accuracy": 0.5757575757575758 + }, + "prompt_3": { + "accuracy": 0.48484848484848486 + }, + "prompt_4": { + "accuracy": 0.45454545454545453 + }, + "prompt_5": { + "accuracy": 0.36363636363636365 + } }, "ind_emotion": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.5954545454545455 + }, + "prompt_2": { + "accuracy": 0.4727272727272727 + }, + "prompt_3": { + "accuracy": 0.5159090909090909 + }, + "prompt_4": { + "accuracy": 0.5409090909090909 + }, + "prompt_5": { + "accuracy": 0.5295454545454545 + } }, "ocnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.6115254237288136 + }, + "prompt_2": { + "accuracy": 0.5986440677966102 + }, + "prompt_3": { + "accuracy": 0.5915254237288136 + }, + "prompt_4": { + "accuracy": 0.5725423728813559 + }, + "prompt_5": { + "accuracy": 0.5728813559322034 + } }, "c3": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8945400149588631 + }, + "prompt_2": { + "accuracy": 0.8814510097232611 + }, + "prompt_3": { + "accuracy": 0.887434554973822 + }, + "prompt_4": { + "accuracy": 0.8915482423335827 + }, + "prompt_5": { + "accuracy": 0.8362004487658938 + } }, "dream": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8863302302792748 + }, + "prompt_2": { + "accuracy": 0.8868201861832435 + }, + "prompt_3": { + "accuracy": 0.8980891719745223 + }, + "prompt_4": { + "accuracy": 0.8819206271435571 + }, + "prompt_5": { + "accuracy": 0.895639392454679 + } }, "samsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.32190764084581686, + "rouge2": 0.11052792291958403, + "rougeL": 0.2372237292336712, + "avg_rouge": 0.22321976433302404 + }, + "prompt_2": { + "rouge1": 0.3506060196636468, + "rouge2": 0.12041414359020293, + "rougeL": 0.2596957721796308, + "avg_rouge": 0.24357197847782683 + }, + "prompt_3": { + "rouge1": 0.3603967040708173, + "rouge2": 0.12317549445914007, + "rougeL": 0.2676287654561986, + "avg_rouge": 0.25040032132871864 + }, + "prompt_4": { + "rouge1": 0.32711946391890945, + "rouge2": 0.10941591735500214, + "rougeL": 0.24072103653080126, + "avg_rouge": 0.2257521392682376 + }, + "prompt_5": { + "rouge1": 0.3537208025548611, + "rouge2": 0.11875532213088043, + "rougeL": 0.2604491567517137, + "avg_rouge": 0.24430842714581838 + } }, "dialogsum": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "rouge1": 0.21548181297762217, + "rouge2": 0.050901786414734106, + "rougeL": 0.15753119191046255, + "avg_rouge": 0.14130493043427295 + }, + "prompt_2": { + "rouge1": 0.21183542222693424, + "rouge2": 0.04852712687585889, + "rougeL": 0.15378109883145116, + "avg_rouge": 0.1380478826447481 + }, + "prompt_3": { + "rouge1": 0.21309901455858457, + "rouge2": 0.049684077676706864, + "rougeL": 0.15516278173438627, + "avg_rouge": 0.13931529132322593 + }, + "prompt_4": { + "rouge1": 0.21413638861634846, + "rouge2": 0.05167476824330981, + "rougeL": 0.15670012707318906, + "avg_rouge": 0.14083709464428243 + }, + "prompt_5": { + "rouge1": 0.2187089027150398, + "rouge2": 0.051881795506017, + "rougeL": 0.15999360313637387, + "avg_rouge": 0.14352810045247688 + } }, "sst2": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.930045871559633 + }, + "prompt_2": { + "accuracy": 0.9094036697247706 + }, + "prompt_3": { + "accuracy": 0.9139908256880734 + }, + "prompt_4": { + "accuracy": 0.930045871559633 + }, + "prompt_5": { + "accuracy": 0.8646788990825688 + } }, "cola": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.822627037392138 + }, + "prompt_2": { + "accuracy": 0.8120805369127517 + }, + "prompt_3": { + "accuracy": 0.8197507190795782 + }, + "prompt_4": { + "accuracy": 0.825503355704698 + }, + "prompt_5": { + "accuracy": 0.8159156279961649 + } }, "qqp": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.7855 + }, + "prompt_2": { + "accuracy": 0.782 + }, + "prompt_3": { + "accuracy": 0.781 + }, + "prompt_4": { + "accuracy": 0.7815 + }, + "prompt_5": { + "accuracy": 0.773 + } }, "mnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.696 + }, + "prompt_2": { + "accuracy": 0.702 + }, + "prompt_3": { + "accuracy": 0.711 + }, + "prompt_4": { + "accuracy": 0.695 + }, + "prompt_5": { + "accuracy": 0.6515 + } }, "qnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.8235 + }, + "prompt_2": { + "accuracy": 0.8375 + }, + "prompt_3": { + "accuracy": 0.8435 + }, + "prompt_4": { + "accuracy": 0.8255 + }, + "prompt_5": { + "accuracy": 0.793 + } }, "wnli": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.676056338028169 + }, + "prompt_2": { + "accuracy": 0.6338028169014085 + }, + "prompt_3": { + "accuracy": 0.5633802816901409 + }, + "prompt_4": { + "accuracy": 0.5070422535211268 + }, + "prompt_5": { + "accuracy": 0.5915492957746479 + } }, "rte": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.855595667870036 + }, + "prompt_2": { + "accuracy": 0.8592057761732852 + }, + "prompt_3": { + "accuracy": 0.8339350180505415 + }, + "prompt_4": { + "accuracy": 0.7509025270758123 + }, + "prompt_5": { + "accuracy": 0.8375451263537906 + } }, "mrpc": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "accuracy": 0.75 + }, + "prompt_2": { + "accuracy": 0.7622549019607843 + }, + "prompt_3": { + "accuracy": 0.7328431372549019 + }, + "prompt_4": { + "accuracy": 0.75 + }, + "prompt_5": { + "accuracy": 0.7647058823529411 + } } }, "five_shot": {