diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -1761,7 +1761,7 @@ }, "sg_eval": { "prompt_1": { - "accuracy": 0.21359223300970873 + "accuracy": 0.6504854368932039 }, "prompt_2": { "accuracy": 0.6213592233009708 @@ -2140,7 +2140,7 @@ "five_shot": { "cross_xquad": { "prompt_1": { - "overall_acc": 0.80819327731092440, + "overall_acc": 0.8081932773109244, "language_acc": { "Spanish": 0.8487394957983193, "English": 0.9084033613445378, @@ -125814,1594 +125814,1759 @@ "model_link": "https://seaeval.github.io/", "zero_shot": { "cross_xquad": { - "prompt_1": -1, - "prompt_2": -1, - "prompt_3": -1, - "prompt_4": -1, - "prompt_5": -1 + "prompt_1": { + "overall_acc": 0.8838235294117648, + "language_acc": { + "Spanish": 0.9, + "English": 0.9243697478991597, + "Chinese": 0.8705882352941177, + "Vietnamese": 0.8403361344537815 + }, + "consistency_score_2": 0.8591036414565826, + "consistency_score_3": 0.7989495798319327, + "consistency_score_4": 0.761344537815126, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9210084033613445, + "Spanish,Chinese": 0.8596638655462185, + "Spanish,Vietnamese": 0.8436974789915966, + "English,Chinese": 0.8689075630252101, + "English,Vietnamese": 0.8420168067226891, + "Chinese,Vietnamese": 0.819327731092437 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8336134453781513, + "Spanish,English,Vietnamese": 0.8117647058823529, + "Spanish,Chinese,Vietnamese": 0.7739495798319328, + "English,Chinese,Vietnamese": 0.7764705882352941 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.761344537815126 + } + }, + "AC3_2": 0.8712882846399156, + "AC3_3": 0.8392461628347567, + "AC3_4": 0.8180248934730036 + }, + "prompt_2": { + "overall_acc": 0.882983193277311, + "language_acc": { + "Spanish": 0.9058823529411765, + "English": 0.9201680672268907, + "Chinese": 0.8663865546218488, + "Vietnamese": 0.8394957983193277 + }, + "consistency_score_2": 0.8591036414565826, + "consistency_score_3": 0.7983193277310924, + "consistency_score_4": 0.7588235294117647, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9176470588235294, + "Spanish,Chinese": 0.8638655462184874, + "Spanish,Vietnamese": 0.8403361344537815, + "English,Chinese": 0.8689075630252101, + "English,Vietnamese": 0.8403361344537815, + "Chinese,Vietnamese": 0.8235294117647058 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8336134453781513, + "Spanish,English,Vietnamese": 0.807563025210084, + "Spanish,Chinese,Vietnamese": 0.7747899159663866, + "English,Chinese,Vietnamese": 0.7773109243697479 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.7588235294117647 + } + }, + "AC3_2": 0.8708797535477946, + "AC3_3": 0.8385195887177843, + "AC3_4": 0.8162086484768751 + }, + "prompt_3": { + "overall_acc": 0.8819327731092438, + "language_acc": { + "Spanish": 0.9008403361344538, + "English": 0.9168067226890756, + "Chinese": 0.8697478991596639, + "Vietnamese": 0.8403361344537815 + }, + "consistency_score_2": 0.8585434173669467, + "consistency_score_3": 0.7972689075630252, + "consistency_score_4": 0.7579831932773109, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9092436974789916, + "Spanish,Chinese": 0.8647058823529412, + "Spanish,Vietnamese": 0.8411764705882353, + "English,Chinese": 0.8680672268907563, + "English,Vietnamese": 0.8378151260504202, + "Chinese,Vietnamese": 0.8302521008403362 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8294117647058824, + "Spanish,English,Vietnamese": 0.8042016806722689, + "Spanish,Chinese,Vietnamese": 0.7781512605042017, + "English,Chinese,Vietnamese": 0.7773109243697479 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.7579831932773109 + } + }, + "AC3_2": 0.8700809364849192, + "AC3_3": 0.837466501626489, + "AC3_4": 0.8152737497269066 + }, + "prompt_4": { + "overall_acc": 0.8806722689075631, + "language_acc": { + "Spanish": 0.8983193277310925, + "English": 0.9117647058823529, + "Chinese": 0.8714285714285714, + "Vietnamese": 0.8411764705882353 + }, + "consistency_score_2": 0.857703081232493, + "consistency_score_3": 0.7953781512605042, + "consistency_score_4": 0.753781512605042, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.9100840336134454, + "Spanish,Chinese": 0.8722689075630252, + "Spanish,Vietnamese": 0.8436974789915966, + "English,Chinese": 0.8647058823529412, + "English,Vietnamese": 0.83109243697479, + "Chinese,Vietnamese": 0.8243697478991596 + }, + "3_combine": { + "Spanish,English,Chinese": 0.83109243697479, + "Spanish,English,Vietnamese": 0.8016806722689076, + "Spanish,Chinese,Vietnamese": 0.7798319327731092, + "English,Chinese,Vietnamese": 0.7689075630252101 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.753781512605042 + } + }, + "AC3_2": 0.8690359288558928, + "AC3_3": 0.8358549034557785, + "AC3_4": 0.8123013112203107 + }, + "prompt_5": { + "overall_acc": 0.8733193277310926, + "language_acc": { + "Spanish": 0.8949579831932774, + "English": 0.9, + "Chinese": 0.8689075630252101, + "Vietnamese": 0.8294117647058824 + }, + "consistency_score_2": 0.8476190476190476, + "consistency_score_3": 0.7819327731092437, + "consistency_score_4": 0.7369747899159664, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.8957983193277311, + "Spanish,Chinese": 0.873109243697479, + "Spanish,Vietnamese": 0.8294117647058824, + "English,Chinese": 0.8588235294117647, + "English,Vietnamese": 0.8134453781512605, + "Chinese,Vietnamese": 0.8151260504201681 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8218487394957983, + "Spanish,English,Vietnamese": 0.780672268907563, + "Spanish,Chinese,Vietnamese": 0.7705882352941177, + "English,Chinese,Vietnamese": 0.7546218487394958 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.7369747899159664 + } + }, + "AC3_2": 0.8602772852283137, + "AC3_3": 0.8251033221522406, + "AC3_4": 0.7993748731873246 + } }, "cross_mmlu": { "prompt_1": { - "overall_acc": 0.5038095238095238, + "overall_acc": 0.5047619047619047, "language_acc": { - "Vietnamese": 0.38666666666666666, - "Malay": 0.4666666666666667, "Filipino": 0.42, - "Indonesian": 0.52, + "Vietnamese": 0.38666666666666666, "Chinese": 0.5133333333333333, "Spanish": 0.54, - "English": 0.68 + "Malay": 0.46, + "Indonesian": 0.54, + "English": 0.6733333333333333 }, - "consistency_score_2": 0.598095238095238, - "consistency_score_3": 0.44057142857142845, - "consistency_score_4": 0.35047619047619055, - "consistency_score_5": 0.2917460317460317, - "consistency_score_6": 0.25047619047619046, - "consistency_score_7": 0.22, + "consistency_score_2": 0.5987301587301587, + "consistency_score_3": 0.44209523809523815, + "consistency_score_4": 0.3537142857142857, + "consistency_score_5": 0.29714285714285715, + "consistency_score_6": 0.2590476190476191, + "consistency_score_7": 0.23333333333333334, "detailed_consistency_score": { "2_combine": { - "Vietnamese,Malay": 0.6, - "Vietnamese,Filipino": 0.5266666666666666, - "Vietnamese,Indonesian": 0.56, - "Vietnamese,Chinese": 0.5333333333333333, - "Vietnamese,Spanish": 0.49333333333333335, - "Vietnamese,English": 0.47333333333333333, - "Malay,Filipino": 0.5, - "Malay,Indonesian": 0.7466666666666667, - "Malay,Chinese": 0.6, - "Malay,Spanish": 0.68, - "Malay,English": 0.6, - "Filipino,Indonesian": 0.5466666666666666, - "Filipino,Chinese": 0.49333333333333335, - "Filipino,Spanish": 0.54, - "Filipino,English": 0.5133333333333333, - "Indonesian,Chinese": 0.68, - "Indonesian,Spanish": 0.7666666666666667, - "Indonesian,English": 0.7133333333333334, - "Chinese,Spanish": 0.6066666666666667, + "Filipino,Vietnamese": 0.5, + "Filipino,Chinese": 0.5333333333333333, + "Filipino,Spanish": 0.5266666666666666, + "Filipino,Malay": 0.4866666666666667, + "Filipino,Indonesian": 0.54, + "Filipino,English": 0.5066666666666667, + "Vietnamese,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.5066666666666667, + "Vietnamese,Malay": 0.58, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,English": 0.4666666666666667, + "Chinese,Spanish": 0.6133333333333333, + "Chinese,Malay": 0.6333333333333333, + "Chinese,Indonesian": 0.7066666666666667, "Chinese,English": 0.64, - "Spanish,English": 0.7466666666666667 - }, - "3_combine": { - "Vietnamese,Malay,Filipino": 0.36666666666666664, - "Vietnamese,Malay,Indonesian": 0.49333333333333335, - "Vietnamese,Malay,Chinese": 0.4266666666666667, - "Vietnamese,Malay,Spanish": 0.4266666666666667, - "Vietnamese,Malay,English": 0.4, - "Vietnamese,Filipino,Indonesian": 0.36666666666666664, - "Vietnamese,Filipino,Chinese": 0.36, - "Vietnamese,Filipino,Spanish": 0.34, - "Vietnamese,Filipino,English": 0.3333333333333333, - "Vietnamese,Indonesian,Chinese": 0.43333333333333335, - "Vietnamese,Indonesian,Spanish": 0.4533333333333333, - "Vietnamese,Indonesian,English": 0.41333333333333333, - "Vietnamese,Chinese,Spanish": 0.38, - "Vietnamese,Chinese,English": 0.38666666666666666, + "Spanish,Malay": 0.68, + "Spanish,Indonesian": 0.7466666666666667, + "Spanish,English": 0.7466666666666667, + "Malay,Indonesian": 0.7333333333333333, + "Malay,English": 0.62, + "Indonesian,English": 0.7066666666666667 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.36666666666666664, + "Filipino,Vietnamese,Spanish": 0.3333333333333333, + "Filipino,Vietnamese,Malay": 0.3466666666666667, + "Filipino,Vietnamese,Indonesian": 0.34, + "Filipino,Vietnamese,English": 0.32, + "Filipino,Chinese,Spanish": 0.38666666666666666, + "Filipino,Chinese,Malay": 0.38, + "Filipino,Chinese,Indonesian": 0.4266666666666667, + "Filipino,Chinese,English": 0.3933333333333333, + "Filipino,Spanish,Malay": 0.3933333333333333, + "Filipino,Spanish,Indonesian": 0.43333333333333335, + "Filipino,Spanish,English": 0.43333333333333335, + "Filipino,Malay,Indonesian": 0.41333333333333333, + "Filipino,Malay,English": 0.37333333333333335, + "Filipino,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish": 0.4, + "Vietnamese,Chinese,Malay": 0.43333333333333335, + "Vietnamese,Chinese,Indonesian": 0.44666666666666666, + "Vietnamese,Chinese,English": 0.3933333333333333, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.44666666666666666, "Vietnamese,Spanish,English": 0.4066666666666667, - "Malay,Filipino,Indonesian": 0.43333333333333335, - "Malay,Filipino,Chinese": 0.35333333333333333, - "Malay,Filipino,Spanish": 0.4, - "Malay,Filipino,English": 0.36666666666666664, - "Malay,Indonesian,Chinese": 0.52, - "Malay,Indonesian,Spanish": 0.62, - "Malay,Indonesian,English": 0.56, - "Malay,Chinese,Spanish": 0.47333333333333333, - "Malay,Chinese,English": 0.4533333333333333, - "Malay,Spanish,English": 0.5333333333333333, - "Filipino,Indonesian,Chinese": 0.4066666666666667, - "Filipino,Indonesian,Spanish": 0.4533333333333333, - "Filipino,Indonesian,English": 0.4066666666666667, - "Filipino,Chinese,Spanish": 0.36666666666666664, - "Filipino,Chinese,English": 0.37333333333333335, - "Filipino,Spanish,English": 0.44, - "Indonesian,Chinese,Spanish": 0.5533333333333333, - "Indonesian,Chinese,English": 0.5466666666666666, - "Indonesian,Spanish,English": 0.6466666666666666, - "Chinese,Spanish,English": 0.5266666666666666 - }, - "4_combine": { - "Vietnamese,Malay,Filipino,Indonesian": 0.32666666666666666, - "Vietnamese,Malay,Filipino,Chinese": 0.29333333333333333, - "Vietnamese,Malay,Filipino,Spanish": 0.29333333333333333, - "Vietnamese,Malay,Filipino,English": 0.2733333333333333, - "Vietnamese,Malay,Indonesian,Chinese": 0.3933333333333333, - "Vietnamese,Malay,Indonesian,Spanish": 0.41333333333333333, - "Vietnamese,Malay,Indonesian,English": 0.37333333333333335, - "Vietnamese,Malay,Chinese,Spanish": 0.3466666666666667, - "Vietnamese,Malay,Chinese,English": 0.34, - "Vietnamese,Malay,Spanish,English": 0.35333333333333333, - "Vietnamese,Filipino,Indonesian,Chinese": 0.31333333333333335, - "Vietnamese,Filipino,Indonesian,Spanish": 0.32, - "Vietnamese,Filipino,Indonesian,English": 0.28, - "Vietnamese,Filipino,Chinese,Spanish": 0.2866666666666667, - "Vietnamese,Filipino,Chinese,English": 0.2866666666666667, - "Vietnamese,Filipino,Spanish,English": 0.2866666666666667, - "Vietnamese,Indonesian,Chinese,Spanish": 0.37333333333333335, - "Vietnamese,Indonesian,Chinese,English": 0.3466666666666667, - "Vietnamese,Indonesian,Spanish,English": 0.3933333333333333, - "Vietnamese,Chinese,Spanish,English": 0.34, - "Malay,Filipino,Indonesian,Chinese": 0.32666666666666666, - "Malay,Filipino,Indonesian,Spanish": 0.38666666666666666, - "Malay,Filipino,Indonesian,English": 0.34, - "Malay,Filipino,Chinese,Spanish": 0.3, - "Malay,Filipino,Chinese,English": 0.28, - "Malay,Filipino,Spanish,English": 0.3333333333333333, - "Malay,Indonesian,Chinese,Spanish": 0.4533333333333333, - "Malay,Indonesian,Chinese,English": 0.42, - "Malay,Indonesian,Spanish,English": 0.5266666666666666, - "Malay,Chinese,Spanish,English": 0.4066666666666667, - "Filipino,Indonesian,Chinese,Spanish": 0.34, - "Filipino,Indonesian,Chinese,English": 0.32, - "Filipino,Indonesian,Spanish,English": 0.38, - "Filipino,Chinese,Spanish,English": 0.32666666666666666, - "Indonesian,Chinese,Spanish,English": 0.49333333333333335 + "Vietnamese,Malay,Indonesian": 0.47333333333333333, + "Vietnamese,Malay,English": 0.4, + "Vietnamese,Indonesian,English": 0.4, + "Chinese,Spanish,Malay": 0.49333333333333335, + "Chinese,Spanish,Indonesian": 0.56, + "Chinese,Spanish,English": 0.5266666666666666, + "Chinese,Malay,Indonesian": 0.5533333333333333, + "Chinese,Malay,English": 0.48, + "Chinese,Indonesian,English": 0.5533333333333333, + "Spanish,Malay,Indonesian": 0.6066666666666667, + "Spanish,Malay,English": 0.54, + "Spanish,Indonesian,English": 0.6333333333333333, + "Malay,Indonesian,English": 0.56 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Malay": 0.29333333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Chinese,English": 0.29333333333333333, + "Filipino,Vietnamese,Spanish,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3, + "Filipino,Vietnamese,Spanish,English": 0.28, + "Filipino,Vietnamese,Malay,Indonesian": 0.3, + "Filipino,Vietnamese,Malay,English": 0.2733333333333333, + "Filipino,Vietnamese,Indonesian,English": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay": 0.32, + "Filipino,Chinese,Spanish,Indonesian": 0.35333333333333333, + "Filipino,Chinese,Spanish,English": 0.34, + "Filipino,Chinese,Malay,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Malay,English": 0.31333333333333335, + "Filipino,Chinese,Indonesian,English": 0.34, + "Filipino,Spanish,Malay,Indonesian": 0.37333333333333335, + "Filipino,Spanish,Malay,English": 0.3333333333333333, + "Filipino,Spanish,Indonesian,English": 0.37333333333333335, + "Filipino,Malay,Indonesian,English": 0.34, + "Vietnamese,Chinese,Spanish,Malay": 0.36, + "Vietnamese,Chinese,Spanish,Indonesian": 0.38666666666666666, + "Vietnamese,Chinese,Spanish,English": 0.3466666666666667, + "Vietnamese,Chinese,Malay,Indonesian": 0.4, + "Vietnamese,Chinese,Malay,English": 0.3466666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.35333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.4066666666666667, + "Vietnamese,Spanish,Malay,English": 0.35333333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.38666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.35333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.47333333333333333, + "Chinese,Spanish,Malay,English": 0.42, + "Chinese,Spanish,Indonesian,English": 0.49333333333333335, + "Chinese,Malay,Indonesian,English": 0.44, + "Spanish,Malay,Indonesian,English": 0.5266666666666666 }, "5_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.28, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.29333333333333333, - "Vietnamese,Malay,Filipino,Indonesian,English": 0.25333333333333335, - "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.25333333333333335, - "Vietnamese,Malay,Filipino,Chinese,English": 0.24, - "Vietnamese,Malay,Filipino,Spanish,English": 0.24666666666666667, - "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.3466666666666667, - "Vietnamese,Malay,Indonesian,Chinese,English": 0.32, - "Vietnamese,Malay,Indonesian,Spanish,English": 0.35333333333333333, - "Vietnamese,Malay,Chinese,Spanish,English": 0.30666666666666664, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.28, - "Vietnamese,Filipino,Indonesian,Chinese,English": 0.25333333333333335, - "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2733333333333333, - "Vietnamese,Filipino,Chinese,Spanish,English": 0.25333333333333335, - "Vietnamese,Indonesian,Chinese,Spanish,English": 0.3333333333333333, - "Malay,Filipino,Indonesian,Chinese,Spanish": 0.29333333333333333, - "Malay,Filipino,Indonesian,Chinese,English": 0.26, - "Malay,Filipino,Indonesian,Spanish,English": 0.32666666666666666, - "Malay,Filipino,Chinese,Spanish,English": 0.26, - "Malay,Indonesian,Chinese,Spanish,English": 0.4, - "Filipino,Indonesian,Chinese,Spanish,English": 0.3 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.26, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.28, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.26, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.26, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Spanish,Malay,English": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Spanish,Malay,English": 0.28, + "Filipino,Chinese,Spanish,Indonesian,English": 0.31333333333333335, + "Filipino,Chinese,Malay,Indonesian,English": 0.2866666666666667, + "Filipino,Spanish,Malay,Indonesian,English": 0.32666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.36, + "Vietnamese,Chinese,Spanish,Malay,English": 0.31333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.34, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.32, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.3466666666666667, + "Chinese,Spanish,Malay,Indonesian,English": 0.41333333333333333 }, "6_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.25333333333333335, - "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.22666666666666666, - "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.24666666666666667, - "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.22, - "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.30666666666666664, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.24666666666666667, - "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.31333333333333335 }, "7_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334 } }, - "AC3_2": 0.546918549565545, - "AC3_3": 0.47007318614281324, - "AC3_4": 0.4133821733337843, - "AC3_5": 0.36951392842997116, - "AC3_6": 0.33460076955640966, - "AC3_7": 0.30626315785242414 + "AC3_2": 0.5477450818726597, + "AC3_3": 0.4713548102307988, + "AC3_4": 0.4159497923379944, + "AC3_5": 0.37407533079827865, + "AC3_6": 0.3423821398435487, + "AC3_7": 0.3191397849029982 }, "prompt_2": { - "overall_acc": 0.5114285714285715, + "overall_acc": 0.5066666666666666, "language_acc": { - "Vietnamese": 0.38666666666666666, - "Malay": 0.4866666666666667, "Filipino": 0.46, - "Indonesian": 0.5533333333333333, - "Chinese": 0.5133333333333333, - "Spanish": 0.56, - "English": 0.62 + "Vietnamese": 0.3933333333333333, + "Chinese": 0.52, + "Spanish": 0.54, + "Malay": 0.4666666666666667, + "Indonesian": 0.5333333333333333, + "English": 0.6333333333333333 }, - "consistency_score_2": 0.584126984126984, - "consistency_score_3": 0.4241904761904762, - "consistency_score_4": 0.33371428571428574, - "consistency_score_5": 0.2752380952380952, - "consistency_score_6": 0.23523809523809525, - "consistency_score_7": 0.20666666666666667, + "consistency_score_2": 0.5825396825396826, + "consistency_score_3": 0.42361904761904773, + "consistency_score_4": 0.33257142857142846, + "consistency_score_5": 0.2726984126984127, + "consistency_score_6": 0.23047619047619047, + "consistency_score_7": 0.2, "detailed_consistency_score": { "2_combine": { - "Vietnamese,Malay": 0.56, - "Vietnamese,Filipino": 0.47333333333333333, - "Vietnamese,Indonesian": 0.5466666666666666, - "Vietnamese,Chinese": 0.5133333333333333, - "Vietnamese,Spanish": 0.5066666666666667, - "Vietnamese,English": 0.46, - "Malay,Filipino": 0.4866666666666667, - "Malay,Indonesian": 0.76, - "Malay,Chinese": 0.6066666666666667, - "Malay,Spanish": 0.6733333333333333, - "Malay,English": 0.5866666666666667, + "Filipino,Vietnamese": 0.46, + "Filipino,Chinese": 0.5266666666666666, + "Filipino,Spanish": 0.5266666666666666, + "Filipino,Malay": 0.5, "Filipino,Indonesian": 0.5266666666666666, - "Filipino,Chinese": 0.52, - "Filipino,Spanish": 0.5333333333333333, - "Filipino,English": 0.49333333333333335, - "Indonesian,Chinese": 0.6666666666666666, - "Indonesian,Spanish": 0.7333333333333333, - "Indonesian,English": 0.6666666666666666, - "Chinese,Spanish": 0.6733333333333333, - "Chinese,English": 0.5933333333333334, - "Spanish,English": 0.6866666666666666 - }, - "3_combine": { - "Vietnamese,Malay,Filipino": 0.32666666666666666, - "Vietnamese,Malay,Indonesian": 0.4666666666666667, - "Vietnamese,Malay,Chinese": 0.3933333333333333, - "Vietnamese,Malay,Spanish": 0.4266666666666667, - "Vietnamese,Malay,English": 0.36, - "Vietnamese,Filipino,Indonesian": 0.3333333333333333, - "Vietnamese,Filipino,Chinese": 0.34, - "Vietnamese,Filipino,Spanish": 0.32666666666666666, - "Vietnamese,Filipino,English": 0.2866666666666667, - "Vietnamese,Indonesian,Chinese": 0.41333333333333333, - "Vietnamese,Indonesian,Spanish": 0.44666666666666666, - "Vietnamese,Indonesian,English": 0.38, - "Vietnamese,Chinese,Spanish": 0.4066666666666667, - "Vietnamese,Chinese,English": 0.36, - "Vietnamese,Spanish,English": 0.37333333333333335, - "Malay,Filipino,Indonesian": 0.42, - "Malay,Filipino,Chinese": 0.37333333333333335, - "Malay,Filipino,Spanish": 0.41333333333333333, - "Malay,Filipino,English": 0.3466666666666667, - "Malay,Indonesian,Chinese": 0.54, - "Malay,Indonesian,Spanish": 0.6066666666666667, - "Malay,Indonesian,English": 0.5333333333333333, - "Malay,Chinese,Spanish": 0.5066666666666667, - "Malay,Chinese,English": 0.4266666666666667, - "Malay,Spanish,English": 0.49333333333333335, - "Filipino,Indonesian,Chinese": 0.38666666666666666, - "Filipino,Indonesian,Spanish": 0.44, - "Filipino,Indonesian,English": 0.38, - "Filipino,Chinese,Spanish": 0.4, - "Filipino,Chinese,English": 0.36666666666666664, - "Filipino,Spanish,English": 0.42, - "Indonesian,Chinese,Spanish": 0.5733333333333334, - "Indonesian,Chinese,English": 0.5, - "Indonesian,Spanish,English": 0.5666666666666667, - "Chinese,Spanish,English": 0.5133333333333333 - }, - "4_combine": { - "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, - "Vietnamese,Malay,Filipino,Chinese": 0.2733333333333333, - "Vietnamese,Malay,Filipino,Spanish": 0.2866666666666667, - "Vietnamese,Malay,Filipino,English": 0.23333333333333334, - "Vietnamese,Malay,Indonesian,Chinese": 0.37333333333333335, - "Vietnamese,Malay,Indonesian,Spanish": 0.4066666666666667, - "Vietnamese,Malay,Indonesian,English": 0.3466666666666667, - "Vietnamese,Malay,Chinese,Spanish": 0.35333333333333333, - "Vietnamese,Malay,Chinese,English": 0.3, - "Vietnamese,Malay,Spanish,English": 0.32, - "Vietnamese,Filipino,Indonesian,Chinese": 0.2733333333333333, - "Vietnamese,Filipino,Indonesian,Spanish": 0.3, - "Vietnamese,Filipino,Indonesian,English": 0.24666666666666667, - "Vietnamese,Filipino,Chinese,Spanish": 0.26666666666666666, - "Vietnamese,Filipino,Chinese,English": 0.25333333333333335, - "Vietnamese,Filipino,Spanish,English": 0.25333333333333335, - "Vietnamese,Indonesian,Chinese,Spanish": 0.38666666666666666, - "Vietnamese,Indonesian,Chinese,English": 0.32666666666666666, - "Vietnamese,Indonesian,Spanish,English": 0.3333333333333333, - "Vietnamese,Chinese,Spanish,English": 0.32, - "Malay,Filipino,Indonesian,Chinese": 0.32666666666666666, - "Malay,Filipino,Indonesian,Spanish": 0.38, - "Malay,Filipino,Indonesian,English": 0.32666666666666666, - "Malay,Filipino,Chinese,Spanish": 0.3333333333333333, - "Malay,Filipino,Chinese,English": 0.2866666666666667, - "Malay,Filipino,Spanish,English": 0.32666666666666666, - "Malay,Indonesian,Chinese,Spanish": 0.48, - "Malay,Indonesian,Chinese,English": 0.4066666666666667, - "Malay,Indonesian,Spanish,English": 0.4666666666666667, - "Malay,Chinese,Spanish,English": 0.4066666666666667, - "Filipino,Indonesian,Chinese,Spanish": 0.3466666666666667, - "Filipino,Indonesian,Chinese,English": 0.30666666666666664, - "Filipino,Indonesian,Spanish,English": 0.35333333333333333, + "Filipino,English": 0.5466666666666666, + "Vietnamese,Chinese": 0.52, + "Vietnamese,Spanish": 0.5066666666666667, + "Vietnamese,Malay": 0.58, + "Vietnamese,Indonesian": 0.5533333333333333, + "Vietnamese,English": 0.48, + "Chinese,Spanish": 0.64, + "Chinese,Malay": 0.6066666666666667, + "Chinese,Indonesian": 0.6333333333333333, + "Chinese,English": 0.6133333333333333, + "Spanish,Malay": 0.66, + "Spanish,Indonesian": 0.72, + "Spanish,English": 0.68, + "Malay,Indonesian": 0.72, + "Malay,English": 0.5733333333333334, + "Indonesian,English": 0.66 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.3333333333333333, + "Filipino,Vietnamese,Spanish": 0.31333333333333335, + "Filipino,Vietnamese,Malay": 0.34, + "Filipino,Vietnamese,Indonesian": 0.34, + "Filipino,Vietnamese,English": 0.31333333333333335, + "Filipino,Chinese,Spanish": 0.3933333333333333, + "Filipino,Chinese,Malay": 0.36666666666666664, + "Filipino,Chinese,Indonesian": 0.37333333333333335, + "Filipino,Chinese,English": 0.3933333333333333, + "Filipino,Spanish,Malay": 0.4, + "Filipino,Spanish,Indonesian": 0.43333333333333335, + "Filipino,Spanish,English": 0.43333333333333335, + "Filipino,Malay,Indonesian": 0.4266666666666667, + "Filipino,Malay,English": 0.38, + "Filipino,Indonesian,English": 0.4, + "Vietnamese,Chinese,Spanish": 0.4, + "Vietnamese,Chinese,Malay": 0.4066666666666667, + "Vietnamese,Chinese,Indonesian": 0.41333333333333333, + "Vietnamese,Chinese,English": 0.37333333333333335, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.44666666666666666, + "Vietnamese,Spanish,English": 0.38666666666666666, + "Vietnamese,Malay,Indonesian": 0.46, + "Vietnamese,Malay,English": 0.37333333333333335, + "Vietnamese,Indonesian,English": 0.4066666666666667, + "Chinese,Spanish,Malay": 0.4866666666666667, + "Chinese,Spanish,Indonesian": 0.54, + "Chinese,Spanish,English": 0.49333333333333335, + "Chinese,Malay,Indonesian": 0.5066666666666667, + "Chinese,Malay,English": 0.4266666666666667, + "Chinese,Indonesian,English": 0.49333333333333335, + "Spanish,Malay,Indonesian": 0.5733333333333334, + "Spanish,Malay,English": 0.49333333333333335, + "Spanish,Indonesian,English": 0.5666666666666667, + "Malay,Indonesian,English": 0.5133333333333333 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Malay": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,English": 0.26, + "Filipino,Vietnamese,Spanish,Malay": 0.28, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3, + "Filipino,Vietnamese,Spanish,English": 0.26, + "Filipino,Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Malay,English": 0.26, + "Filipino,Vietnamese,Indonesian,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Malay": 0.32666666666666666, + "Filipino,Chinese,Spanish,Indonesian": 0.3333333333333333, "Filipino,Chinese,Spanish,English": 0.32666666666666666, - "Indonesian,Chinese,Spanish,English": 0.46 + "Filipino,Chinese,Malay,Indonesian": 0.31333333333333335, + "Filipino,Chinese,Malay,English": 0.2866666666666667, + "Filipino,Chinese,Indonesian,English": 0.30666666666666664, + "Filipino,Spanish,Malay,Indonesian": 0.36666666666666664, + "Filipino,Spanish,Malay,English": 0.3333333333333333, + "Filipino,Spanish,Indonesian,English": 0.36, + "Filipino,Malay,Indonesian,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.3466666666666667, + "Vietnamese,Chinese,Spanish,Indonesian": 0.38, + "Vietnamese,Chinese,Spanish,English": 0.31333333333333335, + "Vietnamese,Chinese,Malay,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,Malay,English": 0.3, + "Vietnamese,Chinese,Indonesian,English": 0.3333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Spanish,Malay,English": 0.32666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.36, + "Vietnamese,Malay,Indonesian,English": 0.35333333333333333, + "Chinese,Spanish,Malay,Indonesian": 0.44666666666666666, + "Chinese,Spanish,Malay,English": 0.3933333333333333, + "Chinese,Spanish,Indonesian,English": 0.44, + "Chinese,Malay,Indonesian,English": 0.3933333333333333, + "Spanish,Malay,Indonesian,English": 0.46 }, "5_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.25333333333333335, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.28, - "Vietnamese,Malay,Filipino,Indonesian,English": 0.23333333333333334, - "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.24666666666666667, - "Vietnamese,Malay,Filipino,Chinese,English": 0.21333333333333335, - "Vietnamese,Malay,Filipino,Spanish,English": 0.22666666666666666, - "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.35333333333333333, - "Vietnamese,Malay,Indonesian,Chinese,English": 0.3, - "Vietnamese,Malay,Indonesian,Spanish,English": 0.31333333333333335, - "Vietnamese,Malay,Chinese,Spanish,English": 0.2866666666666667, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.26, - "Vietnamese,Filipino,Indonesian,Chinese,English": 0.22666666666666666, - "Vietnamese,Filipino,Indonesian,Spanish,English": 0.23333333333333334, - "Vietnamese,Filipino,Chinese,Spanish,English": 0.22, - "Vietnamese,Indonesian,Chinese,Spanish,English": 0.30666666666666664, - "Malay,Filipino,Indonesian,Chinese,Spanish": 0.30666666666666664, - "Malay,Filipino,Indonesian,Chinese,English": 0.26666666666666666, - "Malay,Filipino,Indonesian,Spanish,English": 0.30666666666666664, - "Malay,Filipino,Chinese,Spanish,English": 0.2733333333333333, - "Malay,Indonesian,Chinese,Spanish,English": 0.38666666666666666, - "Filipino,Indonesian,Chinese,Spanish,English": 0.2866666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.22, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,English": 0.21333333333333335, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.22666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.28, + "Filipino,Vietnamese,Spanish,Malay,English": 0.23333333333333334, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.24666666666666667, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.29333333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,Indonesian,English": 0.28, + "Filipino,Chinese,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Spanish,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Spanish,Malay,English": 0.2733333333333333, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.29333333333333333, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.32, + "Chinese,Spanish,Malay,Indonesian,English": 0.36666666666666664 }, "6_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.24666666666666667, - "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.21333333333333335, - "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, - "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, - "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.2866666666666667, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335, - "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.2, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.21333333333333335, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.20666666666666667, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.24, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2733333333333333 }, "7_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.20666666666666667 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2 } }, - "AC3_2": 0.5453657324501167, - "AC3_3": 0.46374243829385026, - "AC3_4": 0.4038867960499603, - "AC3_5": 0.3578761673706263, - "AC3_6": 0.32225218654576254, - "AC3_7": 0.2943766577839396 + "AC3_2": 0.5419605556677205, + "AC3_3": 0.4614359813863799, + "AC3_4": 0.40156150698801174, + "AC3_5": 0.354563475854031, + "AC3_6": 0.3168303186478029, + "AC3_7": 0.28679245278960486 }, "prompt_3": { - "overall_acc": 0.4961904761904762, + "overall_acc": 0.4923809523809524, "language_acc": { - "Vietnamese": 0.4, - "Malay": 0.47333333333333333, - "Filipino": 0.4, - "Indonesian": 0.5666666666666667, - "Chinese": 0.5, - "Spanish": 0.54, - "English": 0.5933333333333334 + "Filipino": 0.36666666666666664, + "Vietnamese": 0.4266666666666667, + "Chinese": 0.47333333333333333, + "Spanish": 0.56, + "Malay": 0.44, + "Indonesian": 0.56, + "English": 0.62 }, - "consistency_score_2": 0.5177777777777779, - "consistency_score_3": 0.33980952380952373, - "consistency_score_4": 0.24380952380952386, - "consistency_score_5": 0.18571428571428572, - "consistency_score_6": 0.14952380952380953, - "consistency_score_7": 0.12666666666666668, + "consistency_score_2": 0.513968253968254, + "consistency_score_3": 0.33790476190476193, + "consistency_score_4": 0.24666666666666665, + "consistency_score_5": 0.19206349206349205, + "consistency_score_6": 0.15714285714285717, + "consistency_score_7": 0.13333333333333333, "detailed_consistency_score": { "2_combine": { - "Vietnamese,Malay": 0.5466666666666666, - "Vietnamese,Filipino": 0.4266666666666667, + "Filipino,Vietnamese": 0.4266666666666667, + "Filipino,Chinese": 0.42, + "Filipino,Spanish": 0.4533333333333333, + "Filipino,Malay": 0.4666666666666667, + "Filipino,Indonesian": 0.46, + "Filipino,English": 0.44666666666666666, + "Vietnamese,Chinese": 0.49333333333333335, + "Vietnamese,Spanish": 0.4066666666666667, + "Vietnamese,Malay": 0.56, "Vietnamese,Indonesian": 0.5066666666666667, - "Vietnamese,Chinese": 0.5066666666666667, - "Vietnamese,Spanish": 0.37333333333333335, - "Vietnamese,English": 0.42, - "Malay,Filipino": 0.44, - "Malay,Indonesian": 0.68, - "Malay,Chinese": 0.5466666666666666, - "Malay,Spanish": 0.5733333333333334, - "Malay,English": 0.5533333333333333, - "Filipino,Indonesian": 0.48, - "Filipino,Chinese": 0.44, - "Filipino,Spanish": 0.44, - "Filipino,English": 0.46, - "Indonesian,Chinese": 0.6, - "Indonesian,Spanish": 0.6, - "Indonesian,English": 0.6, - "Chinese,Spanish": 0.5333333333333333, - "Chinese,English": 0.5666666666666667, - "Spanish,English": 0.58 + "Vietnamese,English": 0.41333333333333333, + "Chinese,Spanish": 0.5066666666666667, + "Chinese,Malay": 0.5466666666666666, + "Chinese,Indonesian": 0.5933333333333334, + "Chinese,English": 0.5466666666666666, + "Spanish,Malay": 0.56, + "Spanish,Indonesian": 0.6133333333333333, + "Spanish,English": 0.58, + "Malay,Indonesian": 0.64, + "Malay,English": 0.52, + "Indonesian,English": 0.6333333333333333 }, "3_combine": { - "Vietnamese,Malay,Filipino": 0.29333333333333333, - "Vietnamese,Malay,Indonesian": 0.41333333333333333, - "Vietnamese,Malay,Chinese": 0.36666666666666664, - "Vietnamese,Malay,Spanish": 0.31333333333333335, - "Vietnamese,Malay,English": 0.31333333333333335, - "Vietnamese,Filipino,Indonesian": 0.29333333333333333, - "Vietnamese,Filipino,Chinese": 0.2866666666666667, - "Vietnamese,Filipino,Spanish": 0.22666666666666666, - "Vietnamese,Filipino,English": 0.25333333333333335, - "Vietnamese,Indonesian,Chinese": 0.36666666666666664, - "Vietnamese,Indonesian,Spanish": 0.30666666666666664, - "Vietnamese,Indonesian,English": 0.31333333333333335, - "Vietnamese,Chinese,Spanish": 0.2866666666666667, - "Vietnamese,Chinese,English": 0.32666666666666666, - "Vietnamese,Spanish,English": 0.26, - "Malay,Filipino,Indonesian": 0.3333333333333333, - "Malay,Filipino,Chinese": 0.2866666666666667, - "Malay,Filipino,Spanish": 0.29333333333333333, - "Malay,Filipino,English": 0.3, - "Malay,Indonesian,Chinese": 0.4533333333333333, - "Malay,Indonesian,Spanish": 0.47333333333333333, - "Malay,Indonesian,English": 0.4533333333333333, - "Malay,Chinese,Spanish": 0.38, - "Malay,Chinese,English": 0.38, - "Malay,Spanish,English": 0.4, - "Filipino,Indonesian,Chinese": 0.31333333333333335, - "Filipino,Indonesian,Spanish": 0.31333333333333335, - "Filipino,Indonesian,English": 0.32666666666666666, + "Filipino,Vietnamese,Chinese": 0.28, + "Filipino,Vietnamese,Spanish": 0.24666666666666667, + "Filipino,Vietnamese,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Indonesian": 0.28, + "Filipino,Vietnamese,English": 0.25333333333333335, "Filipino,Chinese,Spanish": 0.26, - "Filipino,Chinese,English": 0.30666666666666664, - "Filipino,Spanish,English": 0.32, - "Indonesian,Chinese,Spanish": 0.41333333333333333, - "Indonesian,Chinese,English": 0.4533333333333333, - "Indonesian,Spanish,English": 0.44, - "Chinese,Spanish,English": 0.37333333333333335 - }, - "4_combine": { - "Vietnamese,Malay,Filipino,Indonesian": 0.24, - "Vietnamese,Malay,Filipino,Chinese": 0.21333333333333335, - "Vietnamese,Malay,Filipino,Spanish": 0.2, - "Vietnamese,Malay,Filipino,English": 0.2, - "Vietnamese,Malay,Indonesian,Chinese": 0.32, - "Vietnamese,Malay,Indonesian,Spanish": 0.2866666666666667, + "Filipino,Chinese,Malay": 0.29333333333333333, + "Filipino,Chinese,Indonesian": 0.2866666666666667, + "Filipino,Chinese,English": 0.2866666666666667, + "Filipino,Spanish,Malay": 0.31333333333333335, + "Filipino,Spanish,Indonesian": 0.3333333333333333, + "Filipino,Spanish,English": 0.31333333333333335, + "Filipino,Malay,Indonesian": 0.34, + "Filipino,Malay,English": 0.3, + "Filipino,Indonesian,English": 0.31333333333333335, + "Vietnamese,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Chinese,Malay": 0.35333333333333333, + "Vietnamese,Chinese,Indonesian": 0.36, + "Vietnamese,Chinese,English": 0.31333333333333335, + "Vietnamese,Spanish,Malay": 0.3333333333333333, + "Vietnamese,Spanish,Indonesian": 0.34, + "Vietnamese,Spanish,English": 0.28, + "Vietnamese,Malay,Indonesian": 0.4066666666666667, + "Vietnamese,Malay,English": 0.30666666666666664, + "Vietnamese,Indonesian,English": 0.3333333333333333, + "Chinese,Spanish,Malay": 0.38, + "Chinese,Spanish,Indonesian": 0.4066666666666667, + "Chinese,Spanish,English": 0.36, + "Chinese,Malay,Indonesian": 0.43333333333333335, + "Chinese,Malay,English": 0.36, + "Chinese,Indonesian,English": 0.44666666666666666, + "Spanish,Malay,Indonesian": 0.4666666666666667, + "Spanish,Malay,English": 0.38666666666666666, + "Spanish,Indonesian,English": 0.4533333333333333, + "Malay,Indonesian,English": 0.43333333333333335 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.18, + "Filipino,Vietnamese,Chinese,Malay": 0.21333333333333335, + "Filipino,Vietnamese,Chinese,Indonesian": 0.22, + "Filipino,Vietnamese,Chinese,English": 0.21333333333333335, + "Filipino,Vietnamese,Spanish,Malay": 0.20666666666666667, + "Filipino,Vietnamese,Spanish,Indonesian": 0.22, + "Filipino,Vietnamese,Spanish,English": 0.18, + "Filipino,Vietnamese,Malay,Indonesian": 0.24, + "Filipino,Vietnamese,Malay,English": 0.20666666666666667, + "Filipino,Vietnamese,Indonesian,English": 0.20666666666666667, + "Filipino,Chinese,Spanish,Malay": 0.22666666666666666, + "Filipino,Chinese,Spanish,Indonesian": 0.22, + "Filipino,Chinese,Spanish,English": 0.2, + "Filipino,Chinese,Malay,Indonesian": 0.24666666666666667, + "Filipino,Chinese,Malay,English": 0.22666666666666666, + "Filipino,Chinese,Indonesian,English": 0.23333333333333334, + "Filipino,Spanish,Malay,Indonesian": 0.28, + "Filipino,Spanish,Malay,English": 0.22, + "Filipino,Spanish,Indonesian,English": 0.24666666666666667, + "Filipino,Malay,Indonesian,English": 0.25333333333333335, + "Vietnamese,Chinese,Spanish,Malay": 0.25333333333333335, + "Vietnamese,Chinese,Spanish,Indonesian": 0.24666666666666667, + "Vietnamese,Chinese,Spanish,English": 0.21333333333333335, + "Vietnamese,Chinese,Malay,Indonesian": 0.31333333333333335, + "Vietnamese,Chinese,Malay,English": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.2733333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Vietnamese,Spanish,Malay,English": 0.22666666666666666, + "Vietnamese,Spanish,Indonesian,English": 0.24666666666666667, "Vietnamese,Malay,Indonesian,English": 0.26666666666666666, - "Vietnamese,Malay,Chinese,Spanish": 0.25333333333333335, - "Vietnamese,Malay,Chinese,English": 0.24666666666666667, - "Vietnamese,Malay,Spanish,English": 0.22, - "Vietnamese,Filipino,Indonesian,Chinese": 0.22, - "Vietnamese,Filipino,Indonesian,Spanish": 0.2, - "Vietnamese,Filipino,Indonesian,English": 0.2, - "Vietnamese,Filipino,Chinese,Spanish": 0.17333333333333334, - "Vietnamese,Filipino,Chinese,English": 0.20666666666666667, - "Vietnamese,Filipino,Spanish,English": 0.16666666666666666, - "Vietnamese,Indonesian,Chinese,Spanish": 0.26, - "Vietnamese,Indonesian,Chinese,English": 0.2733333333333333, - "Vietnamese,Indonesian,Spanish,English": 0.22, - "Vietnamese,Chinese,Spanish,English": 0.20666666666666667, - "Malay,Filipino,Indonesian,Chinese": 0.23333333333333334, - "Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, - "Malay,Filipino,Indonesian,English": 0.24666666666666667, - "Malay,Filipino,Chinese,Spanish": 0.21333333333333335, - "Malay,Filipino,Chinese,English": 0.21333333333333335, - "Malay,Filipino,Spanish,English": 0.22666666666666666, - "Malay,Indonesian,Chinese,Spanish": 0.34, - "Malay,Indonesian,Chinese,English": 0.3466666666666667, - "Malay,Indonesian,Spanish,English": 0.36, - "Malay,Chinese,Spanish,English": 0.2866666666666667, - "Filipino,Indonesian,Chinese,Spanish": 0.22, - "Filipino,Indonesian,Chinese,English": 0.25333333333333335, - "Filipino,Indonesian,Spanish,English": 0.24666666666666667, - "Filipino,Chinese,Spanish,English": 0.20666666666666667, - "Indonesian,Chinese,Spanish,English": 0.32 + "Chinese,Spanish,Malay,Indonesian": 0.3333333333333333, + "Chinese,Spanish,Malay,English": 0.28, + "Chinese,Spanish,Indonesian,English": 0.32666666666666666, + "Chinese,Malay,Indonesian,English": 0.32666666666666666, + "Spanish,Malay,Indonesian,English": 0.35333333333333333 }, "5_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.18666666666666668, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.18666666666666668, - "Vietnamese,Malay,Filipino,Indonesian,English": 0.17333333333333334, - "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.16, - "Vietnamese,Malay,Filipino,Chinese,English": 0.16, - "Vietnamese,Malay,Filipino,Spanish,English": 0.15333333333333332, - "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.24, - "Vietnamese,Malay,Indonesian,Chinese,English": 0.23333333333333334, - "Vietnamese,Malay,Indonesian,Spanish,English": 0.20666666666666667, - "Vietnamese,Malay,Chinese,Spanish,English": 0.18, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.16666666666666666, - "Vietnamese,Filipino,Indonesian,Chinese,English": 0.18, - "Vietnamese,Filipino,Indonesian,Spanish,English": 0.15333333333333332, - "Vietnamese,Filipino,Chinese,Spanish,English": 0.13333333333333333, - "Vietnamese,Indonesian,Chinese,Spanish,English": 0.19333333333333333, - "Malay,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, - "Malay,Filipino,Indonesian,Chinese,English": 0.19333333333333333, - "Malay,Filipino,Indonesian,Spanish,English": 0.2, - "Malay,Filipino,Chinese,Spanish,English": 0.16666666666666666, - "Malay,Indonesian,Chinese,Spanish,English": 0.26666666666666666, - "Filipino,Indonesian,Chinese,Spanish,English": 0.18 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.16666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.16666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.14666666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.2, + "Filipino,Vietnamese,Chinese,Malay,English": 0.18666666666666668, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.18666666666666668, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.19333333333333333, + "Filipino,Vietnamese,Spanish,Malay,English": 0.15333333333333332, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.16, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.18, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.20666666666666667, + "Filipino,Chinese,Spanish,Malay,English": 0.17333333333333334, + "Filipino,Chinese,Spanish,Indonesian,English": 0.18, + "Filipino,Chinese,Malay,Indonesian,English": 0.20666666666666667, + "Filipino,Spanish,Malay,Indonesian,English": 0.20666666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, + "Vietnamese,Chinese,Spanish,Malay,English": 0.18666666666666668, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.2, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.23333333333333334, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.20666666666666667, + "Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666 }, "6_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.15333333333333332, - "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.15333333333333332, - "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.14666666666666667, - "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.12666666666666668, - "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.18, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.13333333333333333, - "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.15333333333333332 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.16, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.14, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.14, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.17333333333333334, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.14, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.16666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.18 }, "7_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.12666666666666668 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.13333333333333333 } }, - "AC3_2": 0.5067543309873843, - "AC3_3": 0.40337380242329635, - "AC3_4": 0.32696206406073725, - "AC3_5": 0.2702713487233341, - "AC3_6": 0.22979912905557615, - "AC3_7": 0.2018144749930828 + "AC3_2": 0.5029430673978224, + "AC3_3": 0.40077256683579116, + "AC3_4": 0.3286769759005441, + "AC3_5": 0.2763362487448546, + "AC3_6": 0.23824884788958675, + "AC3_7": 0.209842719398224 }, "prompt_4": { - "overall_acc": 0.5123809523809524, + "overall_acc": 0.5066666666666666, "language_acc": { - "Vietnamese": 0.3933333333333333, + "Filipino": 0.44, + "Vietnamese": 0.38666666666666666, + "Chinese": 0.5133333333333333, + "Spanish": 0.5466666666666666, "Malay": 0.49333333333333335, - "Filipino": 0.43333333333333335, - "Indonesian": 0.5466666666666666, - "Chinese": 0.5266666666666666, - "Spanish": 0.5533333333333333, - "English": 0.64 + "Indonesian": 0.5333333333333333, + "English": 0.6333333333333333 }, - "consistency_score_2": 0.5777777777777778, - "consistency_score_3": 0.4144761904761905, - "consistency_score_4": 0.3230476190476191, - "consistency_score_5": 0.2663492063492064, - "consistency_score_6": 0.22857142857142856, - "consistency_score_7": 0.2, + "consistency_score_2": 0.5622222222222222, + "consistency_score_3": 0.3952380952380952, + "consistency_score_4": 0.3064761904761905, + "consistency_score_5": 0.253015873015873, + "consistency_score_6": 0.21809523809523812, + "consistency_score_7": 0.19333333333333333, "detailed_consistency_score": { "2_combine": { - "Vietnamese,Malay": 0.5866666666666667, - "Vietnamese,Filipino": 0.47333333333333333, - "Vietnamese,Indonesian": 0.56, + "Filipino,Vietnamese": 0.44666666666666666, + "Filipino,Chinese": 0.49333333333333335, + "Filipino,Spanish": 0.5, + "Filipino,Malay": 0.4533333333333333, + "Filipino,Indonesian": 0.5, + "Filipino,English": 0.47333333333333333, "Vietnamese,Chinese": 0.5333333333333333, - "Vietnamese,Spanish": 0.5133333333333333, - "Vietnamese,English": 0.44, - "Malay,Filipino": 0.4866666666666667, - "Malay,Indonesian": 0.7333333333333333, - "Malay,Chinese": 0.6, - "Malay,Spanish": 0.6933333333333334, + "Vietnamese,Spanish": 0.5, + "Vietnamese,Malay": 0.56, + "Vietnamese,Indonesian": 0.5266666666666666, + "Vietnamese,English": 0.4533333333333333, + "Chinese,Spanish": 0.5933333333333334, + "Chinese,Malay": 0.5733333333333334, + "Chinese,Indonesian": 0.64, + "Chinese,English": 0.6, + "Spanish,Malay": 0.6466666666666666, + "Spanish,Indonesian": 0.74, + "Spanish,English": 0.66, + "Malay,Indonesian": 0.72, "Malay,English": 0.54, - "Filipino,Indonesian": 0.5066666666666667, - "Filipino,Chinese": 0.5, - "Filipino,Spanish": 0.49333333333333335, - "Filipino,English": 0.48, - "Indonesian,Chinese": 0.6866666666666666, - "Indonesian,Spanish": 0.7933333333333333, - "Indonesian,English": 0.6533333333333333, - "Chinese,Spanish": 0.64, - "Chinese,English": 0.56, - "Spanish,English": 0.66 + "Indonesian,English": 0.6533333333333333 }, "3_combine": { - "Vietnamese,Malay,Filipino": 0.3333333333333333, - "Vietnamese,Malay,Indonesian": 0.4866666666666667, - "Vietnamese,Malay,Chinese": 0.42, - "Vietnamese,Malay,Spanish": 0.44, - "Vietnamese,Malay,English": 0.36666666666666664, - "Vietnamese,Filipino,Indonesian": 0.32666666666666666, - "Vietnamese,Filipino,Chinese": 0.3466666666666667, - "Vietnamese,Filipino,Spanish": 0.30666666666666664, - "Vietnamese,Filipino,English": 0.3, - "Vietnamese,Indonesian,Chinese": 0.43333333333333335, - "Vietnamese,Indonesian,Spanish": 0.48, - "Vietnamese,Indonesian,English": 0.38, - "Vietnamese,Chinese,Spanish": 0.4, - "Vietnamese,Chinese,English": 0.3466666666666667, - "Vietnamese,Spanish,English": 0.36666666666666664, - "Malay,Filipino,Indonesian": 0.41333333333333333, - "Malay,Filipino,Chinese": 0.35333333333333333, - "Malay,Filipino,Spanish": 0.37333333333333335, - "Malay,Filipino,English": 0.32666666666666666, - "Malay,Indonesian,Chinese": 0.5266666666666666, - "Malay,Indonesian,Spanish": 0.6333333333333333, - "Malay,Indonesian,English": 0.5, - "Malay,Chinese,Spanish": 0.4866666666666667, - "Malay,Chinese,English": 0.3933333333333333, - "Malay,Spanish,English": 0.47333333333333333, - "Filipino,Indonesian,Chinese": 0.38, - "Filipino,Indonesian,Spanish": 0.42, - "Filipino,Indonesian,English": 0.37333333333333335, - "Filipino,Chinese,Spanish": 0.3333333333333333, - "Filipino,Chinese,English": 0.3333333333333333, - "Filipino,Spanish,English": 0.36666666666666664, - "Indonesian,Chinese,Spanish": 0.5733333333333334, - "Indonesian,Chinese,English": 0.4866666666666667, - "Indonesian,Spanish,English": 0.5666666666666667, - "Chinese,Spanish,English": 0.46 - }, - "4_combine": { - "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, - "Vietnamese,Malay,Filipino,Chinese": 0.2866666666666667, - "Vietnamese,Malay,Filipino,Spanish": 0.26666666666666666, - "Vietnamese,Malay,Filipino,English": 0.24666666666666667, - "Vietnamese,Malay,Indonesian,Chinese": 0.3933333333333333, - "Vietnamese,Malay,Indonesian,Spanish": 0.4266666666666667, - "Vietnamese,Malay,Indonesian,English": 0.3466666666666667, - "Vietnamese,Malay,Chinese,Spanish": 0.36, - "Vietnamese,Malay,Chinese,English": 0.30666666666666664, - "Vietnamese,Malay,Spanish,English": 0.32666666666666666, - "Vietnamese,Filipino,Indonesian,Chinese": 0.28, - "Vietnamese,Filipino,Indonesian,Spanish": 0.29333333333333333, - "Vietnamese,Filipino,Indonesian,English": 0.25333333333333335, - "Vietnamese,Filipino,Chinese,Spanish": 0.26, - "Vietnamese,Filipino,Chinese,English": 0.25333333333333335, - "Vietnamese,Filipino,Spanish,English": 0.24666666666666667, - "Vietnamese,Indonesian,Chinese,Spanish": 0.38666666666666666, - "Vietnamese,Indonesian,Chinese,English": 0.32666666666666666, - "Vietnamese,Indonesian,Spanish,English": 0.35333333333333333, - "Vietnamese,Chinese,Spanish,English": 0.30666666666666664, - "Malay,Filipino,Indonesian,Chinese": 0.31333333333333335, - "Malay,Filipino,Indonesian,Spanish": 0.36666666666666664, - "Malay,Filipino,Indonesian,English": 0.31333333333333335, - "Malay,Filipino,Chinese,Spanish": 0.28, - "Malay,Filipino,Chinese,English": 0.24666666666666667, - "Malay,Filipino,Spanish,English": 0.2866666666666667, - "Malay,Indonesian,Chinese,Spanish": 0.4666666666666667, - "Malay,Indonesian,Chinese,English": 0.38, - "Malay,Indonesian,Spanish,English": 0.46, - "Malay,Chinese,Spanish,English": 0.36, - "Filipino,Indonesian,Chinese,Spanish": 0.31333333333333335, - "Filipino,Indonesian,Chinese,English": 0.3, - "Filipino,Indonesian,Spanish,English": 0.32, + "Filipino,Vietnamese,Chinese": 0.32, + "Filipino,Vietnamese,Spanish": 0.29333333333333333, + "Filipino,Vietnamese,Malay": 0.29333333333333333, + "Filipino,Vietnamese,Indonesian": 0.3, + "Filipino,Vietnamese,English": 0.28, + "Filipino,Chinese,Spanish": 0.32666666666666666, + "Filipino,Chinese,Malay": 0.3333333333333333, + "Filipino,Chinese,Indonesian": 0.36, + "Filipino,Chinese,English": 0.34, + "Filipino,Spanish,Malay": 0.36, + "Filipino,Spanish,Indonesian": 0.41333333333333333, + "Filipino,Spanish,English": 0.37333333333333335, + "Filipino,Malay,Indonesian": 0.38666666666666666, + "Filipino,Malay,English": 0.31333333333333335, + "Filipino,Indonesian,English": 0.36666666666666664, + "Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Chinese,Malay": 0.3933333333333333, + "Vietnamese,Chinese,Indonesian": 0.3933333333333333, + "Vietnamese,Chinese,English": 0.35333333333333333, + "Vietnamese,Spanish,Malay": 0.4066666666666667, + "Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "Vietnamese,Spanish,English": 0.36, + "Vietnamese,Malay,Indonesian": 0.44666666666666666, + "Vietnamese,Malay,English": 0.35333333333333333, + "Vietnamese,Indonesian,English": 0.37333333333333335, + "Chinese,Spanish,Malay": 0.44, + "Chinese,Spanish,Indonesian": 0.52, + "Chinese,Spanish,English": 0.4666666666666667, + "Chinese,Malay,Indonesian": 0.4866666666666667, + "Chinese,Malay,English": 0.3933333333333333, + "Chinese,Indonesian,English": 0.48, + "Spanish,Malay,Indonesian": 0.5733333333333334, + "Spanish,Malay,English": 0.4666666666666667, + "Spanish,Indonesian,English": 0.5666666666666667, + "Malay,Indonesian,English": 0.49333333333333335 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Malay": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,English": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Malay": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,English": 0.23333333333333334, + "Filipino,Vietnamese,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Malay,English": 0.22666666666666666, + "Filipino,Vietnamese,Indonesian,English": 0.24666666666666667, + "Filipino,Chinese,Spanish,Malay": 0.2733333333333333, + "Filipino,Chinese,Spanish,Indonesian": 0.30666666666666664, "Filipino,Chinese,Spanish,English": 0.2733333333333333, - "Indonesian,Chinese,Spanish,English": 0.41333333333333333 + "Filipino,Chinese,Malay,Indonesian": 0.29333333333333333, + "Filipino,Chinese,Malay,English": 0.25333333333333335, + "Filipino,Chinese,Indonesian,English": 0.29333333333333333, + "Filipino,Spanish,Malay,Indonesian": 0.34, + "Filipino,Spanish,Malay,English": 0.2866666666666667, + "Filipino,Spanish,Indonesian,English": 0.3333333333333333, + "Filipino,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay": 0.32, + "Vietnamese,Chinese,Spanish,Indonesian": 0.34, + "Vietnamese,Chinese,Spanish,English": 0.29333333333333333, + "Vietnamese,Chinese,Malay,Indonesian": 0.34, + "Vietnamese,Chinese,Malay,English": 0.2866666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.31333333333333335, + "Vietnamese,Spanish,Malay,Indonesian": 0.38, + "Vietnamese,Spanish,Malay,English": 0.31333333333333335, + "Vietnamese,Spanish,Indonesian,English": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,English": 0.32666666666666666, + "Chinese,Spanish,Malay,Indonesian": 0.41333333333333333, + "Chinese,Spanish,Malay,English": 0.36, + "Chinese,Spanish,Indonesian,English": 0.42, + "Chinese,Malay,Indonesian,English": 0.38, + "Spanish,Malay,Indonesian,English": 0.44666666666666666 }, "5_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.26, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, - "Vietnamese,Malay,Filipino,Indonesian,English": 0.23333333333333334, - "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.24, - "Vietnamese,Malay,Filipino,Chinese,English": 0.21333333333333335, - "Vietnamese,Malay,Filipino,Spanish,English": 0.22, - "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.36, - "Vietnamese,Malay,Indonesian,Chinese,English": 0.30666666666666664, - "Vietnamese,Malay,Indonesian,Spanish,English": 0.32666666666666666, - "Vietnamese,Malay,Chinese,Spanish,English": 0.2866666666666667, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.25333333333333335, - "Vietnamese,Filipino,Indonesian,Chinese,English": 0.23333333333333334, - "Vietnamese,Filipino,Indonesian,Spanish,English": 0.23333333333333334, - "Vietnamese,Filipino,Chinese,Spanish,English": 0.22, - "Vietnamese,Indonesian,Chinese,Spanish,English": 0.3, - "Malay,Filipino,Indonesian,Chinese,Spanish": 0.28, - "Malay,Filipino,Indonesian,Chinese,English": 0.24666666666666667, - "Malay,Filipino,Indonesian,Spanish,English": 0.2866666666666667, - "Malay,Filipino,Chinese,Spanish,English": 0.22666666666666666, - "Malay,Indonesian,Chinese,Spanish,English": 0.3466666666666667, - "Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.20666666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Malay,English": 0.2, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.22666666666666666, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.24666666666666667, + "Filipino,Vietnamese,Spanish,Malay,English": 0.21333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.22666666666666666, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.22, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.26666666666666666, + "Filipino,Chinese,Spanish,Malay,English": 0.23333333333333334, + "Filipino,Chinese,Spanish,Indonesian,English": 0.26, + "Filipino,Chinese,Malay,Indonesian,English": 0.25333333333333335, + "Filipino,Spanish,Malay,Indonesian,English": 0.2866666666666667, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,English": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.28, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.28, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.3, + "Chinese,Spanish,Malay,Indonesian,English": 0.3466666666666667 }, "6_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.24, - "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.21333333333333335, - "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22, - "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.2, - "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.2866666666666667, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335, - "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22666666666666666 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.19333333333333333, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.20666666666666667, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.2, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.21333333333333335, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.23333333333333334, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.26 }, "7_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.2 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.19333333333333333 } }, - "AC3_2": 0.5431178411460643, - "AC3_3": 0.45825768691354696, - "AC3_4": 0.39625996130144875, - "AC3_5": 0.35049948552174104, - "AC3_6": 0.3161219243054812, - "AC3_7": 0.28770053471897256 + "AC3_2": 0.533000692950828, + "AC3_3": 0.44406898974308984, + "AC3_4": 0.3819286327319795, + "AC3_5": 0.33749547286266424, + "AC3_6": 0.3049321068348429, + "AC3_7": 0.279873015833034 }, "prompt_5": { - "overall_acc": 0.4961904761904762, + "overall_acc": 0.49047619047619045, "language_acc": { - "Vietnamese": 0.38, - "Malay": 0.5066666666666667, - "Filipino": 0.42, - "Indonesian": 0.5266666666666666, - "Chinese": 0.5066666666666667, - "Spanish": 0.52, - "English": 0.6133333333333333 + "Filipino": 0.4066666666666667, + "Vietnamese": 0.3933333333333333, + "Chinese": 0.5, + "Spanish": 0.49333333333333335, + "Malay": 0.49333333333333335, + "Indonesian": 0.5466666666666666, + "English": 0.6 }, - "consistency_score_2": 0.5580952380952381, - "consistency_score_3": 0.39066666666666655, - "consistency_score_4": 0.2980952380952381, - "consistency_score_5": 0.23999999999999996, - "consistency_score_6": 0.20095238095238094, + "consistency_score_2": 0.5482539682539683, + "consistency_score_3": 0.38019047619047625, + "consistency_score_4": 0.291047619047619, + "consistency_score_5": 0.2358730158730159, + "consistency_score_6": 0.19904761904761906, "consistency_score_7": 0.17333333333333334, "detailed_consistency_score": { "2_combine": { - "Vietnamese,Malay": 0.56, - "Vietnamese,Filipino": 0.5, - "Vietnamese,Indonesian": 0.5466666666666666, + "Filipino,Vietnamese": 0.48, + "Filipino,Chinese": 0.4866666666666667, + "Filipino,Spanish": 0.46, + "Filipino,Malay": 0.47333333333333333, + "Filipino,Indonesian": 0.4666666666666667, + "Filipino,English": 0.4666666666666667, "Vietnamese,Chinese": 0.5133333333333333, - "Vietnamese,Spanish": 0.4533333333333333, - "Vietnamese,English": 0.3933333333333333, - "Malay,Filipino": 0.5066666666666667, - "Malay,Indonesian": 0.7733333333333333, - "Malay,Chinese": 0.64, - "Malay,Spanish": 0.6733333333333333, - "Malay,English": 0.56, - "Filipino,Indonesian": 0.4866666666666667, - "Filipino,Chinese": 0.5, - "Filipino,Spanish": 0.48, - "Filipino,English": 0.44, - "Indonesian,Chinese": 0.66, - "Indonesian,Spanish": 0.7, - "Indonesian,English": 0.5933333333333334, - "Chinese,Spanish": 0.5866666666666667, - "Chinese,English": 0.5733333333333334, - "Spanish,English": 0.58 + "Vietnamese,Spanish": 0.49333333333333335, + "Vietnamese,Malay": 0.5866666666666667, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,English": 0.4266666666666667, + "Chinese,Spanish": 0.5466666666666666, + "Chinese,Malay": 0.5933333333333334, + "Chinese,Indonesian": 0.66, + "Chinese,English": 0.5266666666666666, + "Spanish,Malay": 0.6866666666666666, + "Spanish,Indonesian": 0.6866666666666666, + "Spanish,English": 0.6, + "Malay,Indonesian": 0.72, + "Malay,English": 0.5133333333333333, + "Indonesian,English": 0.56 }, "3_combine": { - "Vietnamese,Malay,Filipino": 0.34, - "Vietnamese,Malay,Indonesian": 0.4666666666666667, - "Vietnamese,Malay,Chinese": 0.41333333333333333, - "Vietnamese,Malay,Spanish": 0.41333333333333333, - "Vietnamese,Malay,English": 0.32, - "Vietnamese,Filipino,Indonesian": 0.3333333333333333, - "Vietnamese,Filipino,Chinese": 0.3333333333333333, - "Vietnamese,Filipino,Spanish": 0.29333333333333333, - "Vietnamese,Filipino,English": 0.2733333333333333, - "Vietnamese,Indonesian,Chinese": 0.42, - "Vietnamese,Indonesian,Spanish": 0.41333333333333333, - "Vietnamese,Indonesian,English": 0.32666666666666666, - "Vietnamese,Chinese,Spanish": 0.35333333333333333, - "Vietnamese,Chinese,English": 0.32, - "Vietnamese,Spanish,English": 0.29333333333333333, - "Malay,Filipino,Indonesian": 0.4066666666666667, - "Malay,Filipino,Chinese": 0.37333333333333335, - "Malay,Filipino,Spanish": 0.38, - "Malay,Filipino,English": 0.32, - "Malay,Indonesian,Chinese": 0.5533333333333333, - "Malay,Indonesian,Spanish": 0.6066666666666667, - "Malay,Indonesian,English": 0.49333333333333335, - "Malay,Chinese,Spanish": 0.4866666666666667, - "Malay,Chinese,English": 0.42, - "Malay,Spanish,English": 0.44666666666666666, - "Filipino,Indonesian,Chinese": 0.36, - "Filipino,Indonesian,Spanish": 0.38666666666666666, - "Filipino,Indonesian,English": 0.30666666666666664, - "Filipino,Chinese,Spanish": 0.3466666666666667, - "Filipino,Chinese,English": 0.32, + "Filipino,Vietnamese,Chinese": 0.32666666666666666, + "Filipino,Vietnamese,Spanish": 0.2866666666666667, + "Filipino,Vietnamese,Malay": 0.34, + "Filipino,Vietnamese,Indonesian": 0.3333333333333333, + "Filipino,Vietnamese,English": 0.2866666666666667, + "Filipino,Chinese,Spanish": 0.32666666666666666, + "Filipino,Chinese,Malay": 0.35333333333333333, + "Filipino,Chinese,Indonesian": 0.36, + "Filipino,Chinese,English": 0.32666666666666666, + "Filipino,Spanish,Malay": 0.36, + "Filipino,Spanish,Indonesian": 0.36666666666666664, "Filipino,Spanish,English": 0.31333333333333335, - "Indonesian,Chinese,Spanish": 0.5066666666666667, - "Indonesian,Chinese,English": 0.4533333333333333, - "Indonesian,Spanish,English": 0.47333333333333333, - "Chinese,Spanish,English": 0.4066666666666667 - }, - "4_combine": { - "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, - "Vietnamese,Malay,Filipino,Chinese": 0.28, - "Vietnamese,Malay,Filipino,Spanish": 0.26666666666666666, - "Vietnamese,Malay,Filipino,English": 0.22666666666666666, - "Vietnamese,Malay,Indonesian,Chinese": 0.38, - "Vietnamese,Malay,Indonesian,Spanish": 0.3933333333333333, + "Filipino,Malay,Indonesian": 0.38, + "Filipino,Malay,English": 0.30666666666666664, + "Filipino,Indonesian,English": 0.31333333333333335, + "Vietnamese,Chinese,Spanish": 0.35333333333333333, + "Vietnamese,Chinese,Malay": 0.41333333333333333, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,English": 0.3333333333333333, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.42, + "Vietnamese,Spanish,English": 0.32, + "Vietnamese,Malay,Indonesian": 0.4666666666666667, + "Vietnamese,Malay,English": 0.3333333333333333, + "Vietnamese,Indonesian,English": 0.3333333333333333, + "Chinese,Spanish,Malay": 0.4533333333333333, + "Chinese,Spanish,Indonesian": 0.48, + "Chinese,Spanish,English": 0.38666666666666666, + "Chinese,Malay,Indonesian": 0.5133333333333333, + "Chinese,Malay,English": 0.36666666666666664, + "Chinese,Indonesian,English": 0.41333333333333333, + "Spanish,Malay,Indonesian": 0.5733333333333334, + "Spanish,Malay,English": 0.4266666666666667, + "Spanish,Indonesian,English": 0.4533333333333333, + "Malay,Indonesian,English": 0.4266666666666667 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.24, + "Filipino,Vietnamese,Chinese,Malay": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2866666666666667, + "Filipino,Vietnamese,Chinese,English": 0.24, + "Filipino,Vietnamese,Spanish,Malay": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,English": 0.2, + "Filipino,Vietnamese,Malay,Indonesian": 0.29333333333333333, + "Filipino,Vietnamese,Malay,English": 0.23333333333333334, + "Filipino,Vietnamese,Indonesian,English": 0.23333333333333334, + "Filipino,Chinese,Spanish,Malay": 0.2866666666666667, + "Filipino,Chinese,Spanish,Indonesian": 0.29333333333333333, + "Filipino,Chinese,Spanish,English": 0.24666666666666667, + "Filipino,Chinese,Malay,Indonesian": 0.30666666666666664, + "Filipino,Chinese,Malay,English": 0.25333333333333335, + "Filipino,Chinese,Indonesian,English": 0.26666666666666666, + "Filipino,Spanish,Malay,Indonesian": 0.3333333333333333, + "Filipino,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Malay,Indonesian,English": 0.26, + "Vietnamese,Chinese,Spanish,Malay": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.26, + "Vietnamese,Chinese,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Chinese,Malay,English": 0.2866666666666667, + "Vietnamese,Chinese,Indonesian,English": 0.29333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.37333333333333335, + "Vietnamese,Spanish,Malay,English": 0.2733333333333333, + "Vietnamese,Spanish,Indonesian,English": 0.28, "Vietnamese,Malay,Indonesian,English": 0.2866666666666667, - "Vietnamese,Malay,Chinese,Spanish": 0.34, - "Vietnamese,Malay,Chinese,English": 0.2866666666666667, - "Vietnamese,Malay,Spanish,English": 0.2733333333333333, - "Vietnamese,Filipino,Indonesian,Chinese": 0.2733333333333333, - "Vietnamese,Filipino,Indonesian,Spanish": 0.2733333333333333, - "Vietnamese,Filipino,Indonesian,English": 0.21333333333333335, - "Vietnamese,Filipino,Chinese,Spanish": 0.24666666666666667, - "Vietnamese,Filipino,Chinese,English": 0.22666666666666666, - "Vietnamese,Filipino,Spanish,English": 0.20666666666666667, - "Vietnamese,Indonesian,Chinese,Spanish": 0.3466666666666667, - "Vietnamese,Indonesian,Chinese,English": 0.2866666666666667, - "Vietnamese,Indonesian,Spanish,English": 0.28, - "Vietnamese,Chinese,Spanish,English": 0.25333333333333335, - "Malay,Filipino,Indonesian,Chinese": 0.32, - "Malay,Filipino,Indonesian,Spanish": 0.3466666666666667, - "Malay,Filipino,Indonesian,English": 0.26666666666666666, - "Malay,Filipino,Chinese,Spanish": 0.3, - "Malay,Filipino,Chinese,English": 0.26, - "Malay,Filipino,Spanish,English": 0.26666666666666666, - "Malay,Indonesian,Chinese,Spanish": 0.44666666666666666, - "Malay,Indonesian,Chinese,English": 0.37333333333333335, - "Malay,Indonesian,Spanish,English": 0.4266666666666667, - "Malay,Chinese,Spanish,English": 0.34, - "Filipino,Indonesian,Chinese,Spanish": 0.30666666666666664, - "Filipino,Indonesian,Chinese,English": 0.25333333333333335, - "Filipino,Indonesian,Spanish,English": 0.2733333333333333, - "Filipino,Chinese,Spanish,English": 0.26, - "Indonesian,Chinese,Spanish,English": 0.36 + "Chinese,Spanish,Malay,Indonesian": 0.4266666666666667, + "Chinese,Spanish,Malay,English": 0.31333333333333335, + "Chinese,Spanish,Indonesian,English": 0.3333333333333333, + "Chinese,Malay,Indonesian,English": 0.32, + "Spanish,Malay,Indonesian,English": 0.38 }, "5_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.25333333333333335, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.26, - "Vietnamese,Malay,Filipino,Indonesian,English": 0.19333333333333333, - "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.23333333333333334, - "Vietnamese,Malay,Filipino,Chinese,English": 0.20666666666666667, - "Vietnamese,Malay,Filipino,Spanish,English": 0.19333333333333333, - "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.3333333333333333, - "Vietnamese,Malay,Indonesian,Chinese,English": 0.26, - "Vietnamese,Malay,Indonesian,Spanish,English": 0.26666666666666666, - "Vietnamese,Malay,Chinese,Spanish,English": 0.24666666666666667, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.24, - "Vietnamese,Filipino,Indonesian,Chinese,English": 0.19333333333333333, - "Vietnamese,Filipino,Indonesian,Spanish,English": 0.19333333333333333, - "Vietnamese,Filipino,Chinese,Spanish,English": 0.18666666666666668, - "Vietnamese,Indonesian,Chinese,Spanish,English": 0.24666666666666667, - "Malay,Filipino,Indonesian,Chinese,Spanish": 0.28, - "Malay,Filipino,Indonesian,Chinese,English": 0.22, - "Malay,Filipino,Indonesian,Spanish,English": 0.25333333333333335, - "Malay,Filipino,Chinese,Spanish,English": 0.22666666666666666, - "Malay,Indonesian,Chinese,Spanish,English": 0.32, - "Filipino,Indonesian,Chinese,Spanish,English": 0.23333333333333334 + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.18, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Chinese,Malay,English": 0.20666666666666667, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.22, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.26, + "Filipino,Vietnamese,Spanish,Malay,English": 0.18666666666666668, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.19333333333333333, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.20666666666666667, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2733333333333333, + "Filipino,Chinese,Spanish,Malay,English": 0.21333333333333335, + "Filipino,Chinese,Spanish,Indonesian,English": 0.22, + "Filipino,Chinese,Malay,Indonesian,English": 0.22, + "Filipino,Spanish,Malay,Indonesian,English": 0.24, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish,Malay,English": 0.24, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.24, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.26, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.25333333333333335, + "Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 }, "6_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, - "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.18, - "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.18666666666666668, - "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.18, - "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.24, - "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.18, - "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.17333333333333334, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.18, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.19333333333333333, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.18666666666666668, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.2, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22666666666666666 }, "7_combine": { - "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.17333333333333334 + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.17333333333333334 } }, - "AC3_2": 0.5253254182837208, - "AC3_3": 0.43715062996216253, - "AC3_4": 0.37244033340063765, - "AC3_5": 0.3235187580414366, - "AC3_6": 0.2860551651944606, - "AC3_7": 0.2569179705638079 + "AC3_2": 0.5177581790164023, + "AC3_3": 0.42834849095516725, + "AC3_4": 0.3653168993184125, + "AC3_5": 0.3185522810084245, + "AC3_6": 0.28317548009573784, + "AC3_7": 0.2561453849446744 } }, "cross_logiqa": { "prompt_1": { - "overall_acc": 0.4456168831168831, + "overall_acc": 0.4537337662337663, "language_acc": { + "Indonesian": 0.5, "English": 0.5397727272727273, - "Filipino": 0.3409090909090909, - "Vietnamese": 0.3693181818181818, - "Chinese": 0.4772727272727273, - "Indonesian": 0.4943181818181818, - "Malay": 0.42045454545454547, - "Spanish": 0.4772727272727273 + "Filipino": 0.3522727272727273, + "Spanish": 0.4943181818181818, + "Chinese": 0.4943181818181818, + "Malay": 0.4147727272727273, + "Vietnamese": 0.3806818181818182 }, - "consistency_score_2": 0.5698051948051948, - "consistency_score_3": 0.40178571428571425, - "consistency_score_4": 0.3077922077922078, - "consistency_score_5": 0.2467532467532467, - "consistency_score_6": 0.20373376623376624, - "consistency_score_7": 0.17045454545454544, + "consistency_score_2": 0.5676406926406927, + "consistency_score_3": 0.3974025974025973, + "consistency_score_4": 0.30227272727272725, + "consistency_score_5": 0.240530303030303, + "consistency_score_6": 0.19724025974025974, + "consistency_score_7": 0.16477272727272727, "detailed_consistency_score": { "2_combine": { - "English,Filipino": 0.4659090909090909, - "English,Vietnamese": 0.5056818181818182, - "English,Chinese": 0.625, - "English,Indonesian": 0.7159090909090909, - "English,Malay": 0.6363636363636364, - "English,Spanish": 0.6647727272727273, - "Filipino,Vietnamese": 0.5397727272727273, - "Filipino,Chinese": 0.44886363636363635, - "Filipino,Indonesian": 0.4943181818181818, - "Filipino,Malay": 0.5170454545454546, + "Indonesian,English": 0.7215909090909091, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,Spanish": 0.7443181818181818, + "Indonesian,Chinese": 0.6477272727272727, + "Indonesian,Malay": 0.7045454545454546, + "Indonesian,Vietnamese": 0.5056818181818182, + "English,Filipino": 0.45454545454545453, + "English,Spanish": 0.6875, + "English,Chinese": 0.6363636363636364, + "English,Malay": 0.6079545454545454, + "English,Vietnamese": 0.48295454545454547, "Filipino,Spanish": 0.45454545454545453, - "Vietnamese,Chinese": 0.5056818181818182, - "Vietnamese,Indonesian": 0.5056818181818182, - "Vietnamese,Malay": 0.5170454545454546, - "Vietnamese,Spanish": 0.5113636363636364, - "Chinese,Indonesian": 0.6306818181818182, - "Chinese,Malay": 0.5738636363636364, - "Chinese,Spanish": 0.6079545454545454, - "Indonesian,Malay": 0.6988636363636364, - "Indonesian,Spanish": 0.7215909090909091, - "Malay,Spanish": 0.625 + "Filipino,Chinese": 0.4318181818181818, + "Filipino,Malay": 0.5, + "Filipino,Vietnamese": 0.5170454545454546, + "Spanish,Chinese": 0.6193181818181818, + "Spanish,Malay": 0.6079545454545454, + "Spanish,Vietnamese": 0.5113636363636364, + "Chinese,Malay": 0.5909090909090909, + "Chinese,Vietnamese": 0.48863636363636365, + "Malay,Vietnamese": 0.5113636363636364 }, "3_combine": { - "English,Filipino,Vietnamese": 0.3181818181818182, - "English,Filipino,Chinese": 0.3409090909090909, - "English,Filipino,Indonesian": 0.375, - "English,Filipino,Malay": 0.3693181818181818, - "English,Filipino,Spanish": 0.3409090909090909, - "English,Vietnamese,Chinese": 0.3806818181818182, - "English,Vietnamese,Indonesian": 0.3977272727272727, - "English,Vietnamese,Malay": 0.38636363636363635, - "English,Vietnamese,Spanish": 0.38636363636363635, - "English,Chinese,Indonesian": 0.5227272727272727, - "English,Chinese,Malay": 0.45454545454545453, - "English,Chinese,Spanish": 0.5, - "English,Indonesian,Malay": 0.5568181818181818, - "English,Indonesian,Spanish": 0.5681818181818182, - "English,Malay,Spanish": 0.5056818181818182, - "Filipino,Vietnamese,Chinese": 0.3068181818181818, - "Filipino,Vietnamese,Indonesian": 0.32954545454545453, - "Filipino,Vietnamese,Malay": 0.3352272727272727, - "Filipino,Vietnamese,Spanish": 0.3181818181818182, - "Filipino,Chinese,Indonesian": 0.3522727272727273, - "Filipino,Chinese,Malay": 0.32386363636363635, - "Filipino,Chinese,Spanish": 0.32386363636363635, - "Filipino,Indonesian,Malay": 0.39204545454545453, - "Filipino,Indonesian,Spanish": 0.375, - "Filipino,Malay,Spanish": 0.3522727272727273, - "Vietnamese,Chinese,Indonesian": 0.3693181818181818, - "Vietnamese,Chinese,Malay": 0.35795454545454547, - "Vietnamese,Chinese,Spanish": 0.3693181818181818, - "Vietnamese,Indonesian,Malay": 0.4034090909090909, - "Vietnamese,Indonesian,Spanish": 0.3977272727272727, - "Vietnamese,Malay,Spanish": 0.38636363636363635, - "Chinese,Indonesian,Malay": 0.4772727272727273, - "Chinese,Indonesian,Spanish": 0.5056818181818182, - "Chinese,Malay,Spanish": 0.4375, - "Indonesian,Malay,Spanish": 0.5454545454545454 - }, - "4_combine": { - "English,Filipino,Vietnamese,Chinese": 0.2556818181818182, - "English,Filipino,Vietnamese,Indonesian": 0.26136363636363635, - "English,Filipino,Vietnamese,Malay": 0.2556818181818182, - "English,Filipino,Vietnamese,Spanish": 0.24431818181818182, - "English,Filipino,Chinese,Indonesian": 0.30113636363636365, - "English,Filipino,Chinese,Malay": 0.2840909090909091, - "English,Filipino,Chinese,Spanish": 0.2897727272727273, - "English,Filipino,Indonesian,Malay": 0.32386363636363635, - "English,Filipino,Indonesian,Spanish": 0.29545454545454547, - "English,Filipino,Malay,Spanish": 0.2897727272727273, - "English,Vietnamese,Chinese,Indonesian": 0.3181818181818182, - "English,Vietnamese,Chinese,Malay": 0.3068181818181818, - "English,Vietnamese,Chinese,Spanish": 0.3181818181818182, - "English,Vietnamese,Indonesian,Malay": 0.3522727272727273, - "English,Vietnamese,Indonesian,Spanish": 0.32954545454545453, - "English,Vietnamese,Malay,Spanish": 0.32386363636363635, - "English,Chinese,Indonesian,Malay": 0.42045454545454547, - "English,Chinese,Indonesian,Spanish": 0.4431818181818182, - "English,Chinese,Malay,Spanish": 0.38636363636363635, - "English,Indonesian,Malay,Spanish": 0.4602272727272727, - "Filipino,Vietnamese,Chinese,Indonesian": 0.2556818181818182, - "Filipino,Vietnamese,Chinese,Malay": 0.25, - "Filipino,Vietnamese,Chinese,Spanish": 0.25, - "Filipino,Vietnamese,Indonesian,Malay": 0.26704545454545453, - "Filipino,Vietnamese,Indonesian,Spanish": 0.2556818181818182, - "Filipino,Vietnamese,Malay,Spanish": 0.25, - "Filipino,Chinese,Indonesian,Malay": 0.30113636363636365, - "Filipino,Chinese,Indonesian,Spanish": 0.2840909090909091, - "Filipino,Chinese,Malay,Spanish": 0.26136363636363635, - "Filipino,Indonesian,Malay,Spanish": 0.3068181818181818, - "Vietnamese,Chinese,Indonesian,Malay": 0.3125, - "Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, - "Vietnamese,Chinese,Malay,Spanish": 0.29545454545454547, - "Vietnamese,Indonesian,Malay,Spanish": 0.32954545454545453, - "Chinese,Indonesian,Malay,Spanish": 0.39204545454545453 + "Indonesian,English,Filipino": 0.38636363636363635, + "Indonesian,English,Spanish": 0.5909090909090909, + "Indonesian,English,Chinese": 0.5397727272727273, + "Indonesian,English,Malay": 0.5454545454545454, + "Indonesian,English,Vietnamese": 0.38636363636363635, + "Indonesian,Filipino,Spanish": 0.38636363636363635, + "Indonesian,Filipino,Chinese": 0.3522727272727273, + "Indonesian,Filipino,Malay": 0.4034090909090909, + "Indonesian,Filipino,Vietnamese": 0.3181818181818182, + "Indonesian,Spanish,Chinese": 0.5284090909090909, + "Indonesian,Spanish,Malay": 0.5454545454545454, + "Indonesian,Spanish,Vietnamese": 0.4090909090909091, + "Indonesian,Chinese,Malay": 0.4943181818181818, + "Indonesian,Chinese,Vietnamese": 0.35795454545454547, + "Indonesian,Malay,Vietnamese": 0.3977272727272727, + "English,Filipino,Spanish": 0.3465909090909091, + "English,Filipino,Chinese": 0.3352272727272727, + "English,Filipino,Malay": 0.3465909090909091, + "English,Filipino,Vietnamese": 0.2897727272727273, + "English,Spanish,Chinese": 0.5170454545454546, + "English,Spanish,Malay": 0.48295454545454547, + "English,Spanish,Vietnamese": 0.3806818181818182, + "English,Chinese,Malay": 0.4602272727272727, + "English,Chinese,Vietnamese": 0.36363636363636365, + "English,Malay,Vietnamese": 0.3522727272727273, + "Filipino,Spanish,Chinese": 0.3181818181818182, + "Filipino,Spanish,Malay": 0.3409090909090909, + "Filipino,Spanish,Vietnamese": 0.3068181818181818, + "Filipino,Chinese,Malay": 0.3181818181818182, + "Filipino,Chinese,Vietnamese": 0.2840909090909091, + "Filipino,Malay,Vietnamese": 0.3125, + "Spanish,Chinese,Malay": 0.4431818181818182, + "Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Spanish,Malay,Vietnamese": 0.36363636363636365, + "Chinese,Malay,Vietnamese": 0.3465909090909091 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.3181818181818182, + "Indonesian,English,Filipino,Chinese": 0.3125, + "Indonesian,English,Filipino,Malay": 0.32386363636363635, + "Indonesian,English,Filipino,Vietnamese": 0.25, + "Indonesian,English,Spanish,Chinese": 0.4715909090909091, + "Indonesian,English,Spanish,Malay": 0.45454545454545453, + "Indonesian,English,Spanish,Vietnamese": 0.3352272727272727, + "Indonesian,English,Chinese,Malay": 0.42613636363636365, + "Indonesian,English,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,English,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,Filipino,Spanish,Chinese": 0.29545454545454547, + "Indonesian,Filipino,Spanish,Malay": 0.3181818181818182, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2556818181818182, + "Indonesian,Filipino,Chinese,Malay": 0.30113636363636365, + "Indonesian,Filipino,Chinese,Vietnamese": 0.24431818181818182, + "Indonesian,Filipino,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Spanish,Chinese,Malay": 0.4034090909090909, + "Indonesian,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "Indonesian,Spanish,Malay,Vietnamese": 0.32386363636363635, + "Indonesian,Chinese,Malay,Vietnamese": 0.30113636363636365, + "English,Filipino,Spanish,Chinese": 0.29545454545454547, + "English,Filipino,Spanish,Malay": 0.2727272727272727, + "English,Filipino,Spanish,Vietnamese": 0.23295454545454544, + "English,Filipino,Chinese,Malay": 0.2784090909090909, + "English,Filipino,Chinese,Vietnamese": 0.23863636363636365, + "English,Filipino,Malay,Vietnamese": 0.22727272727272727, + "English,Spanish,Chinese,Malay": 0.39204545454545453, + "English,Spanish,Chinese,Vietnamese": 0.3068181818181818, + "English,Spanish,Malay,Vietnamese": 0.29545454545454547, + "English,Chinese,Malay,Vietnamese": 0.2784090909090909, + "Filipino,Spanish,Chinese,Malay": 0.2556818181818182, + "Filipino,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "Filipino,Spanish,Malay,Vietnamese": 0.22727272727272727, + "Filipino,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Spanish,Chinese,Malay,Vietnamese": 0.2784090909090909 }, "5_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian": 0.2215909090909091, - "English,Filipino,Vietnamese,Chinese,Malay": 0.2159090909090909, - "English,Filipino,Vietnamese,Chinese,Spanish": 0.2215909090909091, - "English,Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, - "English,Filipino,Vietnamese,Indonesian,Spanish": 0.21022727272727273, - "English,Filipino,Vietnamese,Malay,Spanish": 0.21022727272727273, - "English,Filipino,Chinese,Indonesian,Malay": 0.26704545454545453, - "English,Filipino,Chinese,Indonesian,Spanish": 0.2556818181818182, - "English,Filipino,Chinese,Malay,Spanish": 0.24431818181818182, - "English,Filipino,Indonesian,Malay,Spanish": 0.26136363636363635, - "English,Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, - "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2727272727272727, - "English,Vietnamese,Chinese,Malay,Spanish": 0.26704545454545453, - "English,Vietnamese,Indonesian,Malay,Spanish": 0.29545454545454547, - "English,Chinese,Indonesian,Malay,Spanish": 0.35795454545454547, - "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, - "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, - "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.20454545454545456, - "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.2159090909090909, - "Filipino,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, - "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.26136363636363635 + "Indonesian,English,Filipino,Spanish,Chinese": 0.2784090909090909, + "Indonesian,English,Filipino,Spanish,Malay": 0.26704545454545453, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "Indonesian,English,Filipino,Chinese,Malay": 0.26704545454545453, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,English,Spanish,Chinese,Malay": 0.3693181818181818, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.25, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182, + "English,Filipino,Spanish,Chinese,Malay": 0.23863636363636365, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1875, + "English,Filipino,Chinese,Malay,Vietnamese": 0.19886363636363635, + "English,Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1875 }, "6_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.19886363636363635, - "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, - "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.1875, - "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.19318181818181818, - "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.22727272727272727, - "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, - "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1875 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.23295454545454544, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 }, "7_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.16477272727272727 } }, - "AC3_2": 0.5001167896358829, - "AC3_3": 0.42256773394028146, - "AC3_4": 0.36409808673883565, - "AC3_5": 0.31762610187096524, - "AC3_6": 0.27962459411278195, - "AC3_7": 0.2465864174950085 + "AC3_2": 0.5043355981272797, + "AC3_3": 0.42370408529840126, + "AC3_4": 0.3628311241638483, + "AC3_5": 0.31439541552900846, + "AC3_6": 0.274955873261516, + "AC3_7": 0.2417531614975395 }, "prompt_2": { - "overall_acc": 0.4383116883116883, + "overall_acc": 0.4310064935064935, "language_acc": { - "English": 0.48863636363636365, - "Filipino": 0.36363636363636365, - "Vietnamese": 0.3181818181818182, - "Chinese": 0.4943181818181818, - "Indonesian": 0.4943181818181818, - "Malay": 0.4375, - "Spanish": 0.4715909090909091 + "Indonesian": 0.48863636363636365, + "English": 0.4659090909090909, + "Filipino": 0.3465909090909091, + "Spanish": 0.4943181818181818, + "Chinese": 0.48863636363636365, + "Malay": 0.4090909090909091, + "Vietnamese": 0.32386363636363635 }, - "consistency_score_2": 0.5638528138528138, - "consistency_score_3": 0.3915584415584415, - "consistency_score_4": 0.2904220779220779, - "consistency_score_5": 0.22077922077922074, - "consistency_score_6": 0.1712662337662338, - "consistency_score_7": 0.13636363636363635, + "consistency_score_2": 0.5546536796536797, + "consistency_score_3": 0.3774350649350649, + "consistency_score_4": 0.27402597402597406, + "consistency_score_5": 0.20427489177489178, + "consistency_score_6": 0.15503246753246752, + "consistency_score_7": 0.11931818181818182, "detailed_consistency_score": { "2_combine": { - "English,Filipino": 0.4715909090909091, + "Indonesian,English": 0.6931818181818182, + "Indonesian,Filipino": 0.4715909090909091, + "Indonesian,Spanish": 0.7159090909090909, + "Indonesian,Chinese": 0.6420454545454546, + "Indonesian,Malay": 0.6988636363636364, + "Indonesian,Vietnamese": 0.5056818181818182, + "English,Filipino": 0.4602272727272727, + "English,Spanish": 0.6363636363636364, + "English,Chinese": 0.5852272727272727, + "English,Malay": 0.5909090909090909, "English,Vietnamese": 0.4431818181818182, - "English,Chinese": 0.6136363636363636, - "English,Indonesian": 0.7102272727272727, - "English,Malay": 0.6420454545454546, - "English,Spanish": 0.6647727272727273, - "Filipino,Vietnamese": 0.5397727272727273, + "Filipino,Spanish": 0.4772727272727273, "Filipino,Chinese": 0.4602272727272727, - "Filipino,Indonesian": 0.5227272727272727, - "Filipino,Malay": 0.4943181818181818, - "Filipino,Spanish": 0.4715909090909091, - "Vietnamese,Chinese": 0.4431818181818182, - "Vietnamese,Indonesian": 0.48863636363636365, - "Vietnamese,Malay": 0.4772727272727273, - "Vietnamese,Spanish": 0.4772727272727273, - "Chinese,Indonesian": 0.6477272727272727, - "Chinese,Malay": 0.5965909090909091, - "Chinese,Spanish": 0.5965909090909091, - "Indonesian,Malay": 0.7159090909090909, - "Indonesian,Spanish": 0.6818181818181818, - "Malay,Spanish": 0.6818181818181818 - }, - "3_combine": { + "Filipino,Malay": 0.5056818181818182, + "Filipino,Vietnamese": 0.5284090909090909, + "Spanish,Chinese": 0.5738636363636364, + "Spanish,Malay": 0.6477272727272727, + "Spanish,Vietnamese": 0.48295454545454547, + "Chinese,Malay": 0.5852272727272727, + "Chinese,Vietnamese": 0.4375, + "Malay,Vietnamese": 0.5056818181818182 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.36363636363636365, + "Indonesian,English,Spanish": 0.5511363636363636, + "Indonesian,English,Chinese": 0.5, + "Indonesian,English,Malay": 0.5113636363636364, + "Indonesian,English,Vietnamese": 0.3465909090909091, + "Indonesian,Filipino,Spanish": 0.375, + "Indonesian,Filipino,Chinese": 0.32954545454545453, + "Indonesian,Filipino,Malay": 0.39204545454545453, + "Indonesian,Filipino,Vietnamese": 0.3125, + "Indonesian,Spanish,Chinese": 0.4943181818181818, + "Indonesian,Spanish,Malay": 0.5454545454545454, + "Indonesian,Spanish,Vietnamese": 0.39204545454545453, + "Indonesian,Chinese,Malay": 0.48863636363636365, + "Indonesian,Chinese,Vietnamese": 0.32954545454545453, + "Indonesian,Malay,Vietnamese": 0.38636363636363635, + "English,Filipino,Spanish": 0.3409090909090909, + "English,Filipino,Chinese": 0.32386363636363635, + "English,Filipino,Malay": 0.3465909090909091, "English,Filipino,Vietnamese": 0.2784090909090909, - "English,Filipino,Chinese": 0.3352272727272727, - "English,Filipino,Indonesian": 0.4034090909090909, - "English,Filipino,Malay": 0.3693181818181818, - "English,Filipino,Spanish": 0.35795454545454547, - "English,Vietnamese,Chinese": 0.32954545454545453, - "English,Vietnamese,Indonesian": 0.3522727272727273, - "English,Vietnamese,Malay": 0.3409090909090909, - "English,Vietnamese,Spanish": 0.3465909090909091, - "English,Chinese,Indonesian": 0.5170454545454546, - "English,Chinese,Malay": 0.45454545454545453, - "English,Chinese,Spanish": 0.48863636363636365, - "English,Indonesian,Malay": 0.5568181818181818, - "English,Indonesian,Spanish": 0.5511363636363636, - "English,Malay,Spanish": 0.5227272727272727, - "Filipino,Vietnamese,Chinese": 0.2897727272727273, - "Filipino,Vietnamese,Indonesian": 0.32386363636363635, - "Filipino,Vietnamese,Malay": 0.3068181818181818, - "Filipino,Vietnamese,Spanish": 0.3068181818181818, - "Filipino,Chinese,Indonesian": 0.36363636363636365, + "English,Spanish,Chinese": 0.44886363636363635, + "English,Spanish,Malay": 0.4602272727272727, + "English,Spanish,Vietnamese": 0.32954545454545453, + "English,Chinese,Malay": 0.4147727272727273, + "English,Chinese,Vietnamese": 0.30113636363636365, + "English,Malay,Vietnamese": 0.32386363636363635, + "Filipino,Spanish,Chinese": 0.3068181818181818, + "Filipino,Spanish,Malay": 0.3693181818181818, + "Filipino,Spanish,Vietnamese": 0.3181818181818182, "Filipino,Chinese,Malay": 0.3181818181818182, - "Filipino,Chinese,Spanish": 0.3125, - "Filipino,Indonesian,Malay": 0.42613636363636365, - "Filipino,Indonesian,Spanish": 0.38636363636363635, - "Filipino,Malay,Spanish": 0.3806818181818182, - "Vietnamese,Chinese,Indonesian": 0.32954545454545453, - "Vietnamese,Chinese,Malay": 0.3068181818181818, - "Vietnamese,Chinese,Spanish": 0.3181818181818182, - "Vietnamese,Indonesian,Malay": 0.375, - "Vietnamese,Indonesian,Spanish": 0.36363636363636365, - "Vietnamese,Malay,Spanish": 0.3693181818181818, - "Chinese,Indonesian,Malay": 0.5056818181818182, - "Chinese,Indonesian,Spanish": 0.48863636363636365, - "Chinese,Malay,Spanish": 0.4659090909090909, - "Indonesian,Malay,Spanish": 0.5625 - }, - "4_combine": { - "English,Filipino,Vietnamese,Chinese": 0.2215909090909091, - "English,Filipino,Vietnamese,Indonesian": 0.23863636363636365, - "English,Filipino,Vietnamese,Malay": 0.2159090909090909, - "English,Filipino,Vietnamese,Spanish": 0.2159090909090909, - "English,Filipino,Chinese,Indonesian": 0.3068181818181818, - "English,Filipino,Chinese,Malay": 0.2727272727272727, - "English,Filipino,Chinese,Spanish": 0.26704545454545453, - "English,Filipino,Indonesian,Malay": 0.3409090909090909, - "English,Filipino,Indonesian,Spanish": 0.3068181818181818, - "English,Filipino,Malay,Spanish": 0.30113636363636365, - "English,Vietnamese,Chinese,Indonesian": 0.2784090909090909, - "English,Vietnamese,Chinese,Malay": 0.26136363636363635, - "English,Vietnamese,Chinese,Spanish": 0.2784090909090909, - "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, - "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, - "English,Vietnamese,Malay,Spanish": 0.2784090909090909, - "English,Chinese,Indonesian,Malay": 0.42045454545454547, - "English,Chinese,Indonesian,Spanish": 0.42613636363636365, - "English,Chinese,Malay,Spanish": 0.3977272727272727, - "English,Indonesian,Malay,Spanish": 0.4602272727272727, - "Filipino,Vietnamese,Chinese,Indonesian": 0.23863636363636365, - "Filipino,Vietnamese,Chinese,Malay": 0.2159090909090909, - "Filipino,Vietnamese,Chinese,Spanish": 0.21022727272727273, - "Filipino,Vietnamese,Indonesian,Malay": 0.2727272727272727, - "Filipino,Vietnamese,Indonesian,Spanish": 0.25, - "Filipino,Vietnamese,Malay,Spanish": 0.24431818181818182, - "Filipino,Chinese,Indonesian,Malay": 0.30113636363636365, - "Filipino,Chinese,Indonesian,Spanish": 0.2784090909090909, - "Filipino,Chinese,Malay,Spanish": 0.26136363636363635, - "Filipino,Indonesian,Malay,Spanish": 0.3352272727272727, - "Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, - "Vietnamese,Chinese,Indonesian,Spanish": 0.2556818181818182, - "Vietnamese,Chinese,Malay,Spanish": 0.26136363636363635, - "Vietnamese,Indonesian,Malay,Spanish": 0.30113636363636365, - "Chinese,Indonesian,Malay,Spanish": 0.4147727272727273 + "Filipino,Chinese,Vietnamese": 0.2897727272727273, + "Filipino,Malay,Vietnamese": 0.3181818181818182, + "Spanish,Chinese,Malay": 0.42613636363636365, + "Spanish,Chinese,Vietnamese": 0.3068181818181818, + "Spanish,Malay,Vietnamese": 0.36363636363636365, + "Chinese,Malay,Vietnamese": 0.3068181818181818 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.29545454545454547, + "Indonesian,English,Filipino,Chinese": 0.2840909090909091, + "Indonesian,English,Filipino,Malay": 0.30113636363636365, + "Indonesian,English,Filipino,Vietnamese": 0.22727272727272727, + "Indonesian,English,Spanish,Chinese": 0.4147727272727273, + "Indonesian,English,Spanish,Malay": 0.42613636363636365, + "Indonesian,English,Spanish,Vietnamese": 0.2840909090909091, + "Indonesian,English,Chinese,Malay": 0.3806818181818182, + "Indonesian,English,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,English,Malay,Vietnamese": 0.2784090909090909, + "Indonesian,Filipino,Spanish,Chinese": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Malay": 0.3125, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2556818181818182, + "Indonesian,Filipino,Chinese,Malay": 0.2784090909090909, + "Indonesian,Filipino,Chinese,Vietnamese": 0.22727272727272727, + "Indonesian,Filipino,Malay,Vietnamese": 0.26704545454545453, + "Indonesian,Spanish,Chinese,Malay": 0.39204545454545453, + "Indonesian,Spanish,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,Spanish,Malay,Vietnamese": 0.3181818181818182, + "Indonesian,Chinese,Malay,Vietnamese": 0.26136363636363635, + "English,Filipino,Spanish,Chinese": 0.25, + "English,Filipino,Spanish,Malay": 0.25, + "English,Filipino,Spanish,Vietnamese": 0.21022727272727273, + "English,Filipino,Chinese,Malay": 0.26136363636363635, + "English,Filipino,Chinese,Vietnamese": 0.2159090909090909, + "English,Filipino,Malay,Vietnamese": 0.21022727272727273, + "English,Spanish,Chinese,Malay": 0.3465909090909091, + "English,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "English,Spanish,Malay,Vietnamese": 0.24431818181818182, + "English,Chinese,Malay,Vietnamese": 0.22727272727272727, + "Filipino,Spanish,Chinese,Malay": 0.24431818181818182, + "Filipino,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "Filipino,Spanish,Malay,Vietnamese": 0.24431818181818182, + "Filipino,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Spanish,Chinese,Malay,Vietnamese": 0.22727272727272727 }, "5_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian": 0.19886363636363635, - "English,Filipino,Vietnamese,Chinese,Malay": 0.17613636363636365, - "English,Filipino,Vietnamese,Chinese,Spanish": 0.17613636363636365, - "English,Filipino,Vietnamese,Indonesian,Malay": 0.19886363636363635, - "English,Filipino,Vietnamese,Indonesian,Spanish": 0.18181818181818182, - "English,Filipino,Vietnamese,Malay,Spanish": 0.17613636363636365, - "English,Filipino,Chinese,Indonesian,Malay": 0.26136363636363635, - "English,Filipino,Chinese,Indonesian,Spanish": 0.24431818181818182, - "English,Filipino,Chinese,Malay,Spanish": 0.22727272727272727, - "English,Filipino,Indonesian,Malay,Spanish": 0.2727272727272727, - "English,Vietnamese,Chinese,Indonesian,Malay": 0.22727272727272727, - "English,Vietnamese,Chinese,Indonesian,Spanish": 0.22727272727272727, - "English,Vietnamese,Chinese,Malay,Spanish": 0.22727272727272727, - "English,Vietnamese,Indonesian,Malay,Spanish": 0.23295454545454544, - "English,Chinese,Indonesian,Malay,Spanish": 0.36363636363636365, - "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, - "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, - "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.17613636363636365, - "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.2159090909090909, - "Filipino,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, - "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2215909090909091 + "Indonesian,English,Filipino,Spanish,Chinese": 0.23295454545454544, + "Indonesian,English,Filipino,Spanish,Malay": 0.23863636363636365, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.18181818181818182, + "Indonesian,English,Filipino,Chinese,Malay": 0.23863636363636365, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1875, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese,Malay": 0.32954545454545453, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.22727272727272727, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.16477272727272727, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1534090909090909, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "English,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 }, "6_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, - "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, - "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.14772727272727273, - "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, - "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909, - "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818, - "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 }, "7_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13636363636363635 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 } }, - "AC3_2": 0.49321898399050307, - "AC3_3": 0.4136180720189349, - "AC3_4": 0.3493604857571875, - "AC3_5": 0.2936472394154939, - "AC3_6": 0.24629498327188942, - "AC3_7": 0.20801232662019795 + "AC3_2": 0.4850745603067087, + "AC3_3": 0.40244582220025865, + "AC3_4": 0.33503981622646506, + "AC3_5": 0.27718049620628593, + "AC3_6": 0.22803944666396958, + "AC3_7": 0.18689662104407584 }, "prompt_3": { - "overall_acc": 0.4439935064935065, + "overall_acc": 0.43831168831168826, "language_acc": { - "English": 0.5113636363636364, + "Indonesian": 0.4659090909090909, + "English": 0.5, "Filipino": 0.36363636363636365, - "Vietnamese": 0.3352272727272727, - "Chinese": 0.5, - "Indonesian": 0.48295454545454547, - "Malay": 0.44886363636363635, - "Spanish": 0.4659090909090909 + "Spanish": 0.4715909090909091, + "Chinese": 0.4943181818181818, + "Malay": 0.4318181818181818, + "Vietnamese": 0.3409090909090909 }, - "consistency_score_2": 0.5451839826839827, - "consistency_score_3": 0.36704545454545456, - "consistency_score_4": 0.2637987012987013, - "consistency_score_5": 0.19345238095238096, - "consistency_score_6": 0.14204545454545456, - "consistency_score_7": 0.10227272727272728, + "consistency_score_2": 0.5462662337662337, + "consistency_score_3": 0.36980519480519464, + "consistency_score_4": 0.2683441558441559, + "consistency_score_5": 0.20075757575757583, + "consistency_score_6": 0.15178571428571427, + "consistency_score_7": 0.11363636363636363, "detailed_consistency_score": { "2_combine": { - "English,Filipino": 0.4147727272727273, - "English,Vietnamese": 0.4147727272727273, - "English,Chinese": 0.5909090909090909, - "English,Indonesian": 0.7045454545454546, - "English,Malay": 0.5795454545454546, - "English,Spanish": 0.6136363636363636, - "Filipino,Vietnamese": 0.5056818181818182, + "Indonesian,English": 0.6761363636363636, + "Indonesian,Filipino": 0.4772727272727273, + "Indonesian,Spanish": 0.6988636363636364, + "Indonesian,Chinese": 0.6136363636363636, + "Indonesian,Malay": 0.6875, + "Indonesian,Vietnamese": 0.5113636363636364, + "English,Filipino": 0.4375, + "English,Spanish": 0.625, + "English,Chinese": 0.6022727272727273, + "English,Malay": 0.5738636363636364, + "English,Vietnamese": 0.42045454545454547, + "Filipino,Spanish": 0.44886363636363635, "Filipino,Chinese": 0.4318181818181818, - "Filipino,Indonesian": 0.4772727272727273, - "Filipino,Malay": 0.4772727272727273, - "Filipino,Spanish": 0.45454545454545453, - "Vietnamese,Chinese": 0.4318181818181818, - "Vietnamese,Indonesian": 0.5113636363636364, - "Vietnamese,Malay": 0.5340909090909091, - "Vietnamese,Spanish": 0.5170454545454546, - "Chinese,Indonesian": 0.6136363636363636, - "Chinese,Malay": 0.5681818181818182, - "Chinese,Spanish": 0.5965909090909091, - "Indonesian,Malay": 0.6931818181818182, - "Indonesian,Spanish": 0.6818181818181818, - "Malay,Spanish": 0.6363636363636364 + "Filipino,Malay": 0.5284090909090909, + "Filipino,Vietnamese": 0.5227272727272727, + "Spanish,Chinese": 0.5852272727272727, + "Spanish,Malay": 0.6136363636363636, + "Spanish,Vietnamese": 0.4943181818181818, + "Chinese,Malay": 0.5625, + "Chinese,Vietnamese": 0.42045454545454547, + "Malay,Vietnamese": 0.5397727272727273 }, "3_combine": { - "English,Filipino,Vietnamese": 0.24431818181818182, - "English,Filipino,Chinese": 0.2897727272727273, - "English,Filipino,Indonesian": 0.3465909090909091, - "English,Filipino,Malay": 0.3068181818181818, - "English,Filipino,Spanish": 0.29545454545454547, - "English,Vietnamese,Chinese": 0.29545454545454547, - "English,Vietnamese,Indonesian": 0.35795454545454547, - "English,Vietnamese,Malay": 0.3068181818181818, - "English,Vietnamese,Spanish": 0.3181818181818182, - "English,Chinese,Indonesian": 0.4943181818181818, + "Indonesian,English,Filipino": 0.35795454545454547, + "Indonesian,English,Spanish": 0.5227272727272727, + "Indonesian,English,Chinese": 0.48295454545454547, + "Indonesian,English,Malay": 0.4943181818181818, + "Indonesian,English,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.36363636363636365, + "Indonesian,Filipino,Chinese": 0.3352272727272727, + "Indonesian,Filipino,Malay": 0.4034090909090909, + "Indonesian,Filipino,Vietnamese": 0.3125, + "Indonesian,Spanish,Chinese": 0.4659090909090909, + "Indonesian,Spanish,Malay": 0.5170454545454546, + "Indonesian,Spanish,Vietnamese": 0.3977272727272727, + "Indonesian,Chinese,Malay": 0.4659090909090909, + "Indonesian,Chinese,Vietnamese": 0.3125, + "Indonesian,Malay,Vietnamese": 0.4034090909090909, + "English,Filipino,Spanish": 0.3125, + "English,Filipino,Chinese": 0.3068181818181818, + "English,Filipino,Malay": 0.3465909090909091, + "English,Filipino,Vietnamese": 0.26136363636363635, + "English,Spanish,Chinese": 0.44886363636363635, + "English,Spanish,Malay": 0.42613636363636365, + "English,Spanish,Vietnamese": 0.32386363636363635, "English,Chinese,Malay": 0.4147727272727273, - "English,Chinese,Spanish": 0.4375, - "English,Indonesian,Malay": 0.5227272727272727, - "English,Indonesian,Spanish": 0.5170454545454546, - "English,Malay,Spanish": 0.42613636363636365, - "Filipino,Vietnamese,Chinese": 0.26704545454545453, - "Filipino,Vietnamese,Indonesian": 0.3181818181818182, - "Filipino,Vietnamese,Malay": 0.30113636363636365, - "Filipino,Vietnamese,Spanish": 0.30113636363636365, - "Filipino,Chinese,Indonesian": 0.3352272727272727, - "Filipino,Chinese,Malay": 0.2897727272727273, - "Filipino,Chinese,Spanish": 0.3125, - "Filipino,Indonesian,Malay": 0.38636363636363635, - "Filipino,Indonesian,Spanish": 0.36363636363636365, - "Filipino,Malay,Spanish": 0.3352272727272727, - "Vietnamese,Chinese,Indonesian": 0.32954545454545453, - "Vietnamese,Chinese,Malay": 0.3068181818181818, - "Vietnamese,Chinese,Spanish": 0.32386363636363635, - "Vietnamese,Indonesian,Malay": 0.4034090909090909, - "Vietnamese,Indonesian,Spanish": 0.4034090909090909, - "Vietnamese,Malay,Spanish": 0.39204545454545453, - "Chinese,Indonesian,Malay": 0.4659090909090909, - "Chinese,Indonesian,Spanish": 0.4715909090909091, - "Chinese,Malay,Spanish": 0.4318181818181818, - "Indonesian,Malay,Spanish": 0.5340909090909091 + "English,Chinese,Vietnamese": 0.2897727272727273, + "English,Malay,Vietnamese": 0.32954545454545453, + "Filipino,Spanish,Chinese": 0.30113636363636365, + "Filipino,Spanish,Malay": 0.35795454545454547, + "Filipino,Spanish,Vietnamese": 0.2897727272727273, + "Filipino,Chinese,Malay": 0.32386363636363635, + "Filipino,Chinese,Vietnamese": 0.2840909090909091, + "Filipino,Malay,Vietnamese": 0.3465909090909091, + "Spanish,Chinese,Malay": 0.4090909090909091, + "Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Spanish,Malay,Vietnamese": 0.38636363636363635, + "Chinese,Malay,Vietnamese": 0.30113636363636365 }, "4_combine": { - "English,Filipino,Vietnamese,Chinese": 0.1875, - "English,Filipino,Vietnamese,Indonesian": 0.2159090909090909, - "English,Filipino,Vietnamese,Malay": 0.18181818181818182, - "English,Filipino,Vietnamese,Spanish": 0.1875, - "English,Filipino,Chinese,Indonesian": 0.26704545454545453, - "English,Filipino,Chinese,Malay": 0.2215909090909091, - "English,Filipino,Chinese,Spanish": 0.22727272727272727, - "English,Filipino,Indonesian,Malay": 0.2784090909090909, - "English,Filipino,Indonesian,Spanish": 0.26136363636363635, - "English,Filipino,Malay,Spanish": 0.2215909090909091, - "English,Vietnamese,Chinese,Indonesian": 0.2727272727272727, - "English,Vietnamese,Chinese,Malay": 0.23295454545454544, - "English,Vietnamese,Chinese,Spanish": 0.23863636363636365, - "English,Vietnamese,Indonesian,Malay": 0.2897727272727273, - "English,Vietnamese,Indonesian,Spanish": 0.2840909090909091, - "English,Vietnamese,Malay,Spanish": 0.23863636363636365, - "English,Chinese,Indonesian,Malay": 0.38636363636363635, - "English,Chinese,Indonesian,Spanish": 0.3977272727272727, - "English,Chinese,Malay,Spanish": 0.32954545454545453, - "English,Indonesian,Malay,Spanish": 0.3977272727272727, - "Filipino,Vietnamese,Chinese,Indonesian": 0.2215909090909091, - "Filipino,Vietnamese,Chinese,Malay": 0.19318181818181818, - "Filipino,Vietnamese,Chinese,Spanish": 0.20454545454545456, - "Filipino,Vietnamese,Indonesian,Malay": 0.2556818181818182, - "Filipino,Vietnamese,Indonesian,Spanish": 0.24431818181818182, - "Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, - "Filipino,Chinese,Indonesian,Malay": 0.2727272727272727, - "Filipino,Chinese,Indonesian,Spanish": 0.2727272727272727, - "Filipino,Chinese,Malay,Spanish": 0.23863636363636365, - "Filipino,Indonesian,Malay,Spanish": 0.30113636363636365, - "Vietnamese,Chinese,Indonesian,Malay": 0.26704545454545453, - "Vietnamese,Chinese,Indonesian,Spanish": 0.26704545454545453, - "Vietnamese,Chinese,Malay,Spanish": 0.25, - "Vietnamese,Indonesian,Malay,Spanish": 0.32386363636363635, - "Chinese,Indonesian,Malay,Spanish": 0.3806818181818182 + "Indonesian,English,Filipino,Spanish": 0.2727272727272727, + "Indonesian,English,Filipino,Chinese": 0.2727272727272727, + "Indonesian,English,Filipino,Malay": 0.3068181818181818, + "Indonesian,English,Filipino,Vietnamese": 0.2159090909090909, + "Indonesian,English,Spanish,Chinese": 0.39204545454545453, + "Indonesian,English,Spanish,Malay": 0.38636363636363635, + "Indonesian,English,Spanish,Vietnamese": 0.2897727272727273, + "Indonesian,English,Chinese,Malay": 0.375, + "Indonesian,English,Chinese,Vietnamese": 0.2556818181818182, + "Indonesian,English,Malay,Vietnamese": 0.2897727272727273, + "Indonesian,Filipino,Spanish,Chinese": 0.26704545454545453, + "Indonesian,Filipino,Spanish,Malay": 0.3068181818181818, + "Indonesian,Filipino,Spanish,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Chinese,Malay": 0.2897727272727273, + "Indonesian,Filipino,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,Filipino,Malay,Vietnamese": 0.2727272727272727, + "Indonesian,Spanish,Chinese,Malay": 0.36363636363636365, + "Indonesian,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "Indonesian,Spanish,Malay,Vietnamese": 0.32954545454545453, + "Indonesian,Chinese,Malay,Vietnamese": 0.2556818181818182, + "English,Filipino,Spanish,Chinese": 0.23295454545454544, + "English,Filipino,Spanish,Malay": 0.24431818181818182, + "English,Filipino,Spanish,Vietnamese": 0.19318181818181818, + "English,Filipino,Chinese,Malay": 0.26136363636363635, + "English,Filipino,Chinese,Vietnamese": 0.19886363636363635, + "English,Filipino,Malay,Vietnamese": 0.2159090909090909, + "English,Spanish,Chinese,Malay": 0.32386363636363635, + "English,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "English,Spanish,Malay,Vietnamese": 0.25, + "English,Chinese,Malay,Vietnamese": 0.23863636363636365, + "Filipino,Spanish,Chinese,Malay": 0.25, + "Filipino,Spanish,Chinese,Vietnamese": 0.19886363636363635, + "Filipino,Spanish,Malay,Vietnamese": 0.24431818181818182, + "Filipino,Chinese,Malay,Vietnamese": 0.22727272727272727, + "Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365 }, "5_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian": 0.17613636363636365, - "English,Filipino,Vietnamese,Chinese,Malay": 0.14772727272727273, - "English,Filipino,Vietnamese,Chinese,Spanish": 0.14772727272727273, - "English,Filipino,Vietnamese,Indonesian,Malay": 0.17045454545454544, - "English,Filipino,Vietnamese,Indonesian,Spanish": 0.16477272727272727, - "English,Filipino,Vietnamese,Malay,Spanish": 0.13636363636363635, - "English,Filipino,Chinese,Indonesian,Malay": 0.21022727272727273, - "English,Filipino,Chinese,Indonesian,Spanish": 0.2159090909090909, - "English,Filipino,Chinese,Malay,Spanish": 0.17613636363636365, - "English,Filipino,Indonesian,Malay,Spanish": 0.20454545454545456, - "English,Vietnamese,Chinese,Indonesian,Malay": 0.2215909090909091, - "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2215909090909091, - "English,Vietnamese,Chinese,Malay,Spanish": 0.1875, - "English,Vietnamese,Indonesian,Malay,Spanish": 0.2215909090909091, - "English,Chinese,Indonesian,Malay,Spanish": 0.3125, - "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.18181818181818182, - "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17613636363636365, - "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.1534090909090909, - "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.19886363636363635, - "Filipino,Chinese,Indonesian,Malay,Spanish": 0.2215909090909091, - "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909 + "Indonesian,English,Filipino,Spanish,Chinese": 0.2159090909090909, + "Indonesian,English,Filipino,Spanish,Malay": 0.22727272727272727, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Chinese,Malay": 0.23863636363636365, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,English,Spanish,Chinese,Malay": 0.30113636363636365, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.16477272727272727, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.21022727272727273, + "English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Vietnamese": 0.17613636363636365, + "English,Spanish,Chinese,Malay,Vietnamese": 0.1875, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544 }, "6_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, - "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.13636363636363635, - "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11363636363636363, - "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.125, - "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, - "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365, - "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.14204545454545456 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13068181818181818, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.14772727272727273, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 }, "7_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.10227272727272728 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11363636363636363 } }, - "AC3_2": 0.4894129734649738, - "AC3_3": 0.40186922260129804, - "AC3_4": 0.33095846236239124, - "AC3_5": 0.2694867207920679, - "AC3_6": 0.21523230920527794, - "AC3_7": 0.16625016882003213 + "AC3_2": 0.48637059555450246, + "AC3_3": 0.4011546909805022, + "AC3_4": 0.33288730561759317, + "AC3_5": 0.27538295738897717, + "AC3_6": 0.22548634305473386, + "AC3_7": 0.18048128338976094 }, "prompt_4": { - "overall_acc": 0.4448051948051948, + "overall_acc": 0.4375, "language_acc": { - "English": 0.5170454545454546, - "Filipino": 0.3693181818181818, - "Vietnamese": 0.3806818181818182, + "Indonesian": 0.4659090909090909, + "English": 0.4943181818181818, + "Filipino": 0.3465909090909091, + "Spanish": 0.48295454545454547, "Chinese": 0.4943181818181818, - "Indonesian": 0.4715909090909091, - "Malay": 0.39204545454545453, - "Spanish": 0.48863636363636365 + "Malay": 0.3977272727272727, + "Vietnamese": 0.3806818181818182 }, - "consistency_score_2": 0.5527597402597403, - "consistency_score_3": 0.3732142857142858, - "consistency_score_4": 0.27094155844155854, - "consistency_score_5": 0.20427489177489183, - "consistency_score_6": 0.15827922077922077, - "consistency_score_7": 0.125, + "consistency_score_2": 0.5514069264069262, + "consistency_score_3": 0.37061688311688307, + "consistency_score_4": 0.2641233766233766, + "consistency_score_5": 0.1931818181818182, + "consistency_score_6": 0.14366883116883117, + "consistency_score_7": 0.10795454545454546, "detailed_consistency_score": { "2_combine": { - "English,Filipino": 0.4431818181818182, + "Indonesian,English": 0.6363636363636364, + "Indonesian,Filipino": 0.5, + "Indonesian,Spanish": 0.6988636363636364, + "Indonesian,Chinese": 0.6477272727272727, + "Indonesian,Malay": 0.7272727272727273, + "Indonesian,Vietnamese": 0.5738636363636364, + "English,Filipino": 0.3806818181818182, + "English,Spanish": 0.6079545454545454, + "English,Chinese": 0.5852272727272727, + "English,Malay": 0.5625, "English,Vietnamese": 0.42045454545454547, - "English,Chinese": 0.5965909090909091, - "English,Indonesian": 0.625, - "English,Malay": 0.5738636363636364, - "English,Spanish": 0.6590909090909091, - "Filipino,Vietnamese": 0.5170454545454546, - "Filipino,Chinese": 0.4659090909090909, - "Filipino,Indonesian": 0.5113636363636364, + "Filipino,Spanish": 0.4659090909090909, + "Filipino,Chinese": 0.44886363636363635, "Filipino,Malay": 0.4943181818181818, - "Filipino,Spanish": 0.4715909090909091, - "Vietnamese,Chinese": 0.4772727272727273, - "Vietnamese,Indonesian": 0.5340909090909091, - "Vietnamese,Malay": 0.5284090909090909, - "Vietnamese,Spanish": 0.5170454545454546, - "Chinese,Indonesian": 0.6136363636363636, - "Chinese,Malay": 0.5511363636363636, - "Chinese,Spanish": 0.6363636363636364, - "Indonesian,Malay": 0.7102272727272727, - "Indonesian,Spanish": 0.6704545454545454, - "Malay,Spanish": 0.5909090909090909 + "Filipino,Vietnamese": 0.5056818181818182, + "Spanish,Chinese": 0.6022727272727273, + "Spanish,Malay": 0.6079545454545454, + "Spanish,Vietnamese": 0.5170454545454546, + "Chinese,Malay": 0.5625, + "Chinese,Vietnamese": 0.4943181818181818, + "Malay,Vietnamese": 0.5397727272727273 }, "3_combine": { - "English,Filipino,Vietnamese": 0.26136363636363635, - "English,Filipino,Chinese": 0.32386363636363635, - "English,Filipino,Indonesian": 0.3522727272727273, - "English,Filipino,Malay": 0.3068181818181818, - "English,Filipino,Spanish": 0.3352272727272727, - "English,Vietnamese,Chinese": 0.30113636363636365, - "English,Vietnamese,Indonesian": 0.32386363636363635, - "English,Vietnamese,Malay": 0.30113636363636365, - "English,Vietnamese,Spanish": 0.32954545454545453, - "English,Chinese,Indonesian": 0.4659090909090909, - "English,Chinese,Malay": 0.4090909090909091, - "English,Chinese,Spanish": 0.4943181818181818, - "English,Indonesian,Malay": 0.48863636363636365, - "English,Indonesian,Spanish": 0.5, - "English,Malay,Spanish": 0.4431818181818182, - "Filipino,Vietnamese,Chinese": 0.29545454545454547, - "Filipino,Vietnamese,Indonesian": 0.3352272727272727, - "Filipino,Vietnamese,Malay": 0.3181818181818182, - "Filipino,Vietnamese,Spanish": 0.3125, - "Filipino,Chinese,Indonesian": 0.3409090909090909, - "Filipino,Chinese,Malay": 0.3068181818181818, - "Filipino,Chinese,Spanish": 0.3352272727272727, - "Filipino,Indonesian,Malay": 0.4034090909090909, - "Filipino,Indonesian,Spanish": 0.3806818181818182, - "Filipino,Malay,Spanish": 0.32954545454545453, - "Vietnamese,Chinese,Indonesian": 0.3409090909090909, - "Vietnamese,Chinese,Malay": 0.32954545454545453, - "Vietnamese,Chinese,Spanish": 0.35795454545454547, - "Vietnamese,Indonesian,Malay": 0.42045454545454547, - "Vietnamese,Indonesian,Spanish": 0.3806818181818182, - "Vietnamese,Malay,Spanish": 0.35795454545454547, - "Chinese,Indonesian,Malay": 0.4659090909090909, - "Chinese,Indonesian,Spanish": 0.48863636363636365, - "Chinese,Malay,Spanish": 0.42613636363636365, - "Indonesian,Malay,Spanish": 0.5 - }, - "4_combine": { - "English,Filipino,Vietnamese,Chinese": 0.2159090909090909, - "English,Filipino,Vietnamese,Indonesian": 0.2215909090909091, - "English,Filipino,Vietnamese,Malay": 0.19318181818181818, - "English,Filipino,Vietnamese,Spanish": 0.21022727272727273, - "English,Filipino,Chinese,Indonesian": 0.2727272727272727, - "English,Filipino,Chinese,Malay": 0.23295454545454544, - "English,Filipino,Chinese,Spanish": 0.26704545454545453, - "English,Filipino,Indonesian,Malay": 0.2784090909090909, - "English,Filipino,Indonesian,Spanish": 0.2784090909090909, - "English,Filipino,Malay,Spanish": 0.23295454545454544, - "English,Vietnamese,Chinese,Indonesian": 0.23863636363636365, - "English,Vietnamese,Chinese,Malay": 0.2215909090909091, - "English,Vietnamese,Chinese,Spanish": 0.2556818181818182, - "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, - "English,Vietnamese,Indonesian,Spanish": 0.2727272727272727, - "English,Vietnamese,Malay,Spanish": 0.25, - "English,Chinese,Indonesian,Malay": 0.3693181818181818, - "English,Chinese,Indonesian,Spanish": 0.3977272727272727, - "English,Chinese,Malay,Spanish": 0.3465909090909091, - "English,Indonesian,Malay,Spanish": 0.3977272727272727, - "Filipino,Vietnamese,Chinese,Indonesian": 0.23295454545454544, - "Filipino,Vietnamese,Chinese,Malay": 0.2215909090909091, - "Filipino,Vietnamese,Chinese,Spanish": 0.23295454545454544, - "Filipino,Vietnamese,Indonesian,Malay": 0.2784090909090909, - "Filipino,Vietnamese,Indonesian,Spanish": 0.2556818181818182, - "Filipino,Vietnamese,Malay,Spanish": 0.23295454545454544, - "Filipino,Chinese,Indonesian,Malay": 0.2784090909090909, - "Filipino,Chinese,Indonesian,Spanish": 0.2727272727272727, - "Filipino,Chinese,Malay,Spanish": 0.23295454545454544, - "Filipino,Indonesian,Malay,Spanish": 0.29545454545454547, - "Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, - "Vietnamese,Chinese,Indonesian,Spanish": 0.26704545454545453, - "Vietnamese,Chinese,Malay,Spanish": 0.26704545454545453, - "Vietnamese,Indonesian,Malay,Spanish": 0.3181818181818182, - "Chinese,Indonesian,Malay,Spanish": 0.375 + "Indonesian,English,Filipino": 0.32386363636363635, + "Indonesian,English,Spanish": 0.4943181818181818, + "Indonesian,English,Chinese": 0.48295454545454547, + "Indonesian,English,Malay": 0.4943181818181818, + "Indonesian,English,Vietnamese": 0.3522727272727273, + "Indonesian,Filipino,Spanish": 0.38636363636363635, + "Indonesian,Filipino,Chinese": 0.3409090909090909, + "Indonesian,Filipino,Malay": 0.39204545454545453, + "Indonesian,Filipino,Vietnamese": 0.3352272727272727, + "Indonesian,Spanish,Chinese": 0.5, + "Indonesian,Spanish,Malay": 0.5284090909090909, + "Indonesian,Spanish,Vietnamese": 0.4147727272727273, + "Indonesian,Chinese,Malay": 0.48863636363636365, + "Indonesian,Chinese,Vietnamese": 0.38636363636363635, + "Indonesian,Malay,Vietnamese": 0.4431818181818182, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Filipino,Chinese": 0.2840909090909091, + "English,Filipino,Malay": 0.2784090909090909, + "English,Filipino,Vietnamese": 0.23295454545454544, + "English,Spanish,Chinese": 0.4431818181818182, + "English,Spanish,Malay": 0.42045454545454547, + "English,Spanish,Vietnamese": 0.3181818181818182, + "English,Chinese,Malay": 0.4034090909090909, + "English,Chinese,Vietnamese": 0.3068181818181818, + "English,Malay,Vietnamese": 0.3068181818181818, + "Filipino,Spanish,Chinese": 0.3068181818181818, + "Filipino,Spanish,Malay": 0.3352272727272727, + "Filipino,Spanish,Vietnamese": 0.30113636363636365, + "Filipino,Chinese,Malay": 0.2897727272727273, + "Filipino,Chinese,Vietnamese": 0.2897727272727273, + "Filipino,Malay,Vietnamese": 0.3181818181818182, + "Spanish,Chinese,Malay": 0.42613636363636365, + "Spanish,Chinese,Vietnamese": 0.3465909090909091, + "Spanish,Malay,Vietnamese": 0.375, + "Chinese,Malay,Vietnamese": 0.3409090909090909 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.25, + "Indonesian,English,Filipino,Chinese": 0.2556818181818182, + "Indonesian,English,Filipino,Malay": 0.25, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.39204545454545453, + "Indonesian,English,Spanish,Malay": 0.39204545454545453, + "Indonesian,English,Spanish,Vietnamese": 0.2897727272727273, + "Indonesian,English,Chinese,Malay": 0.375, + "Indonesian,English,Chinese,Vietnamese": 0.2727272727272727, + "Indonesian,English,Malay,Vietnamese": 0.2840909090909091, + "Indonesian,Filipino,Spanish,Chinese": 0.2727272727272727, + "Indonesian,Filipino,Spanish,Malay": 0.30113636363636365, + "Indonesian,Filipino,Spanish,Vietnamese": 0.2556818181818182, + "Indonesian,Filipino,Chinese,Malay": 0.26704545454545453, + "Indonesian,Filipino,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Malay,Vietnamese": 0.26704545454545453, + "Indonesian,Spanish,Chinese,Malay": 0.38636363636363635, + "Indonesian,Spanish,Chinese,Vietnamese": 0.2897727272727273, + "Indonesian,Spanish,Malay,Vietnamese": 0.3352272727272727, + "Indonesian,Chinese,Malay,Vietnamese": 0.3068181818181818, + "English,Filipino,Spanish,Chinese": 0.2159090909090909, + "English,Filipino,Spanish,Malay": 0.21022727272727273, + "English,Filipino,Spanish,Vietnamese": 0.17613636363636365, + "English,Filipino,Chinese,Malay": 0.21022727272727273, + "English,Filipino,Chinese,Vietnamese": 0.19318181818181818, + "English,Filipino,Malay,Vietnamese": 0.17045454545454544, + "English,Spanish,Chinese,Malay": 0.32954545454545453, + "English,Spanish,Chinese,Vietnamese": 0.24431818181818182, + "English,Spanish,Malay,Vietnamese": 0.23863636363636365, + "English,Chinese,Malay,Vietnamese": 0.23295454545454544, + "Filipino,Spanish,Chinese,Malay": 0.22727272727272727, + "Filipino,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "Filipino,Spanish,Malay,Vietnamese": 0.2215909090909091, + "Filipino,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Spanish,Chinese,Malay,Vietnamese": 0.26136363636363635 }, "5_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian": 0.18181818181818182, - "English,Filipino,Vietnamese,Chinese,Malay": 0.1590909090909091, - "English,Filipino,Vietnamese,Chinese,Spanish": 0.17613636363636365, - "English,Filipino,Vietnamese,Indonesian,Malay": 0.1875, - "English,Filipino,Vietnamese,Indonesian,Spanish": 0.18181818181818182, - "English,Filipino,Vietnamese,Malay,Spanish": 0.1590909090909091, - "English,Filipino,Chinese,Indonesian,Malay": 0.2215909090909091, - "English,Filipino,Chinese,Indonesian,Spanish": 0.2215909090909091, - "English,Filipino,Chinese,Malay,Spanish": 0.1875, - "English,Filipino,Indonesian,Malay,Spanish": 0.2159090909090909, - "English,Vietnamese,Chinese,Indonesian,Malay": 0.21022727272727273, - "English,Vietnamese,Chinese,Indonesian,Spanish": 0.20454545454545456, - "English,Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, - "English,Vietnamese,Indonesian,Malay,Spanish": 0.23863636363636365, - "English,Chinese,Indonesian,Malay,Spanish": 0.3181818181818182, - "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, - "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1875, - "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.17613636363636365, - "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.2159090909090909, - "Filipino,Chinese,Indonesian,Malay,Spanish": 0.2159090909090909, - "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.23295454545454544 + "Indonesian,English,Filipino,Spanish,Chinese": 0.19886363636363635, + "Indonesian,English,Filipino,Spanish,Malay": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.16477272727272727, + "Indonesian,English,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1590909090909091, + "Indonesian,English,Spanish,Chinese,Malay": 0.3125, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2215909090909091, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.23295454545454544, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.2159090909090909, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.1875, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.23863636363636365, + "English,Filipino,Spanish,Chinese,Malay": 0.16477272727272727, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Malay,Vietnamese": 0.13068181818181818, + "English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Spanish,Chinese,Malay,Vietnamese": 0.1875, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1590909090909091 }, "6_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, - "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, - "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13068181818181818, - "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1534090909090909, - "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365, - "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, - "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.125, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.13636363636363635, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.1534090909090909, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11363636363636363 }, "7_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10795454545454546 } }, - "AC3_2": 0.49294115155731894, - "AC3_3": 0.40587701636520457, - "AC3_4": 0.336756575198979, - "AC3_5": 0.27997325721232436, - "AC3_6": 0.23347782765176514, - "AC3_7": 0.19515669512244546 + "AC3_2": 0.48789329680428845, + "AC3_3": 0.40129067893788767, + "AC3_4": 0.32939032850464955, + "AC3_5": 0.2680180179755215, + "AC3_6": 0.21630586588456857, + "AC3_7": 0.1731770833015842 }, "prompt_5": { - "overall_acc": 0.4375000000000001, + "overall_acc": 0.4391233766233766, "language_acc": { - "English": 0.5170454545454546, - "Filipino": 0.3465909090909091, - "Vietnamese": 0.3693181818181818, - "Chinese": 0.4772727272727273, - "Indonesian": 0.4602272727272727, - "Malay": 0.4147727272727273, - "Spanish": 0.4772727272727273 + "Indonesian": 0.4715909090909091, + "English": 0.5113636363636364, + "Filipino": 0.3352272727272727, + "Spanish": 0.4943181818181818, + "Chinese": 0.48295454545454547, + "Malay": 0.39204545454545453, + "Vietnamese": 0.38636363636363635 }, - "consistency_score_2": 0.5392316017316018, - "consistency_score_3": 0.3577922077922079, - "consistency_score_4": 0.2540584415584416, - "consistency_score_5": 0.1856060606060606, + "consistency_score_2": 0.5462662337662338, + "consistency_score_3": 0.3625, + "consistency_score_4": 0.25649350649350655, + "consistency_score_5": 0.18614718614718617, "consistency_score_6": 0.13555194805194806, "consistency_score_7": 0.09659090909090909, "detailed_consistency_score": { "2_combine": { - "English,Filipino": 0.3977272727272727, - "English,Vietnamese": 0.4602272727272727, - "English,Chinese": 0.5284090909090909, - "English,Indonesian": 0.6363636363636364, - "English,Malay": 0.6079545454545454, - "English,Spanish": 0.6136363636363636, - "Filipino,Vietnamese": 0.5056818181818182, - "Filipino,Chinese": 0.4034090909090909, - "Filipino,Indonesian": 0.45454545454545453, - "Filipino,Malay": 0.4715909090909091, - "Filipino,Spanish": 0.45454545454545453, - "Vietnamese,Chinese": 0.4659090909090909, - "Vietnamese,Indonesian": 0.5397727272727273, - "Vietnamese,Malay": 0.5227272727272727, - "Vietnamese,Spanish": 0.5625, - "Chinese,Indonesian": 0.6079545454545454, - "Chinese,Malay": 0.5227272727272727, - "Chinese,Spanish": 0.5738636363636364, - "Indonesian,Malay": 0.7045454545454546, - "Indonesian,Spanish": 0.6818181818181818, - "Malay,Spanish": 0.6079545454545454 + "Indonesian,English": 0.6306818181818182, + "Indonesian,Filipino": 0.4659090909090909, + "Indonesian,Spanish": 0.6875, + "Indonesian,Chinese": 0.6477272727272727, + "Indonesian,Malay": 0.6931818181818182, + "Indonesian,Vietnamese": 0.5397727272727273, + "English,Filipino": 0.4090909090909091, + "English,Spanish": 0.6534090909090909, + "English,Chinese": 0.5625, + "English,Malay": 0.5738636363636364, + "English,Vietnamese": 0.4431818181818182, + "Filipino,Spanish": 0.42045454545454547, + "Filipino,Chinese": 0.4375, + "Filipino,Malay": 0.4659090909090909, + "Filipino,Vietnamese": 0.5113636363636364, + "Spanish,Chinese": 0.5738636363636364, + "Spanish,Malay": 0.6193181818181818, + "Spanish,Vietnamese": 0.5511363636363636, + "Chinese,Malay": 0.5568181818181818, + "Chinese,Vietnamese": 0.5, + "Malay,Vietnamese": 0.5284090909090909 }, "3_combine": { - "English,Filipino,Vietnamese": 0.25, - "English,Filipino,Chinese": 0.25, - "English,Filipino,Indonesian": 0.30113636363636365, - "English,Filipino,Malay": 0.30113636363636365, - "English,Filipino,Spanish": 0.29545454545454547, - "English,Vietnamese,Chinese": 0.30113636363636365, - "English,Vietnamese,Indonesian": 0.35795454545454547, - "English,Vietnamese,Malay": 0.3352272727272727, - "English,Vietnamese,Spanish": 0.3522727272727273, - "English,Chinese,Indonesian": 0.4375, - "English,Chinese,Malay": 0.3693181818181818, - "English,Chinese,Spanish": 0.4147727272727273, - "English,Indonesian,Malay": 0.5056818181818182, - "English,Indonesian,Spanish": 0.4943181818181818, - "English,Malay,Spanish": 0.44886363636363635, - "Filipino,Vietnamese,Chinese": 0.2727272727272727, - "Filipino,Vietnamese,Indonesian": 0.3068181818181818, - "Filipino,Vietnamese,Malay": 0.29545454545454547, - "Filipino,Vietnamese,Spanish": 0.3181818181818182, - "Filipino,Chinese,Indonesian": 0.29545454545454547, - "Filipino,Chinese,Malay": 0.26704545454545453, - "Filipino,Chinese,Spanish": 0.2784090909090909, - "Filipino,Indonesian,Malay": 0.375, - "Filipino,Indonesian,Spanish": 0.3409090909090909, - "Filipino,Malay,Spanish": 0.32386363636363635, - "Vietnamese,Chinese,Indonesian": 0.3465909090909091, - "Vietnamese,Chinese,Malay": 0.3068181818181818, - "Vietnamese,Chinese,Spanish": 0.3409090909090909, - "Vietnamese,Indonesian,Malay": 0.4147727272727273, - "Vietnamese,Indonesian,Spanish": 0.4090909090909091, - "Vietnamese,Malay,Spanish": 0.375, - "Chinese,Indonesian,Malay": 0.4602272727272727, - "Chinese,Indonesian,Spanish": 0.4659090909090909, - "Chinese,Malay,Spanish": 0.39204545454545453, - "Indonesian,Malay,Spanish": 0.5227272727272727 + "Indonesian,English,Filipino": 0.3181818181818182, + "Indonesian,English,Spanish": 0.5056818181818182, + "Indonesian,English,Chinese": 0.4602272727272727, + "Indonesian,English,Malay": 0.4715909090909091, + "Indonesian,English,Vietnamese": 0.3465909090909091, + "Indonesian,Filipino,Spanish": 0.32954545454545453, + "Indonesian,Filipino,Chinese": 0.3181818181818182, + "Indonesian,Filipino,Malay": 0.375, + "Indonesian,Filipino,Vietnamese": 0.3181818181818182, + "Indonesian,Spanish,Chinese": 0.48295454545454547, + "Indonesian,Spanish,Malay": 0.5227272727272727, + "Indonesian,Spanish,Vietnamese": 0.4147727272727273, + "Indonesian,Chinese,Malay": 0.4772727272727273, + "Indonesian,Chinese,Vietnamese": 0.375, + "Indonesian,Malay,Vietnamese": 0.4090909090909091, + "English,Filipino,Spanish": 0.2897727272727273, + "English,Filipino,Chinese": 0.26704545454545453, + "English,Filipino,Malay": 0.2897727272727273, + "English,Filipino,Vietnamese": 0.23863636363636365, + "English,Spanish,Chinese": 0.4375, + "English,Spanish,Malay": 0.44886363636363635, + "English,Spanish,Vietnamese": 0.3522727272727273, + "English,Chinese,Malay": 0.375, + "English,Chinese,Vietnamese": 0.3125, + "English,Malay,Vietnamese": 0.3068181818181818, + "Filipino,Spanish,Chinese": 0.26704545454545453, + "Filipino,Spanish,Malay": 0.3125, + "Filipino,Spanish,Vietnamese": 0.29545454545454547, + "Filipino,Chinese,Malay": 0.2784090909090909, + "Filipino,Chinese,Vietnamese": 0.3068181818181818, + "Filipino,Malay,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Malay": 0.4147727272727273, + "Spanish,Chinese,Vietnamese": 0.3522727272727273, + "Spanish,Malay,Vietnamese": 0.38636363636363635, + "Chinese,Malay,Vietnamese": 0.32954545454545453 }, "4_combine": { - "English,Filipino,Vietnamese,Chinese": 0.18181818181818182, - "English,Filipino,Vietnamese,Indonesian": 0.21022727272727273, - "English,Filipino,Vietnamese,Malay": 0.1875, - "English,Filipino,Vietnamese,Spanish": 0.19886363636363635, - "English,Filipino,Chinese,Indonesian": 0.22727272727272727, + "Indonesian,English,Filipino,Spanish": 0.24431818181818182, + "Indonesian,English,Filipino,Chinese": 0.24431818181818182, + "Indonesian,English,Filipino,Malay": 0.25, + "Indonesian,English,Filipino,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Chinese": 0.3806818181818182, + "Indonesian,English,Spanish,Malay": 0.3977272727272727, + "Indonesian,English,Spanish,Vietnamese": 0.2897727272727273, + "Indonesian,English,Chinese,Malay": 0.3352272727272727, + "Indonesian,English,Chinese,Vietnamese": 0.26136363636363635, + "Indonesian,English,Malay,Vietnamese": 0.2727272727272727, + "Indonesian,Filipino,Spanish,Chinese": 0.23863636363636365, + "Indonesian,Filipino,Spanish,Malay": 0.2784090909090909, + "Indonesian,Filipino,Spanish,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Chinese,Malay": 0.25, + "Indonesian,Filipino,Chinese,Vietnamese": 0.23863636363636365, + "Indonesian,Filipino,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Spanish,Chinese,Malay": 0.38636363636363635, + "Indonesian,Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Indonesian,Spanish,Malay,Vietnamese": 0.3409090909090909, + "Indonesian,Chinese,Malay,Vietnamese": 0.2897727272727273, + "English,Filipino,Spanish,Chinese": 0.21022727272727273, + "English,Filipino,Spanish,Malay": 0.2159090909090909, + "English,Filipino,Spanish,Vietnamese": 0.18181818181818182, "English,Filipino,Chinese,Malay": 0.19318181818181818, - "English,Filipino,Chinese,Spanish": 0.20454545454545456, - "English,Filipino,Indonesian,Malay": 0.26136363636363635, - "English,Filipino,Indonesian,Spanish": 0.24431818181818182, - "English,Filipino,Malay,Spanish": 0.22727272727272727, - "English,Vietnamese,Chinese,Indonesian": 0.2556818181818182, - "English,Vietnamese,Chinese,Malay": 0.2215909090909091, - "English,Vietnamese,Chinese,Spanish": 0.24431818181818182, - "English,Vietnamese,Indonesian,Malay": 0.30113636363636365, - "English,Vietnamese,Indonesian,Spanish": 0.2840909090909091, - "English,Vietnamese,Malay,Spanish": 0.26136363636363635, - "English,Chinese,Indonesian,Malay": 0.3522727272727273, - "English,Chinese,Indonesian,Spanish": 0.36363636363636365, - "English,Chinese,Malay,Spanish": 0.3125, - "English,Indonesian,Malay,Spanish": 0.4147727272727273, - "Filipino,Vietnamese,Chinese,Indonesian": 0.21022727272727273, - "Filipino,Vietnamese,Chinese,Malay": 0.18181818181818182, - "Filipino,Vietnamese,Chinese,Spanish": 0.19886363636363635, - "Filipino,Vietnamese,Indonesian,Malay": 0.23863636363636365, - "Filipino,Vietnamese,Indonesian,Spanish": 0.24431818181818182, - "Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, - "Filipino,Chinese,Indonesian,Malay": 0.24431818181818182, - "Filipino,Chinese,Indonesian,Spanish": 0.23295454545454544, - "Filipino,Chinese,Malay,Spanish": 0.19318181818181818, - "Filipino,Indonesian,Malay,Spanish": 0.2840909090909091, - "Vietnamese,Chinese,Indonesian,Malay": 0.2840909090909091, - "Vietnamese,Chinese,Indonesian,Spanish": 0.2784090909090909, - "Vietnamese,Chinese,Malay,Spanish": 0.23295454545454544, - "Vietnamese,Indonesian,Malay,Spanish": 0.32954545454545453, - "Chinese,Indonesian,Malay,Spanish": 0.3693181818181818 + "English,Filipino,Chinese,Vietnamese": 0.19318181818181818, + "English,Filipino,Malay,Vietnamese": 0.16477272727272727, + "English,Spanish,Chinese,Malay": 0.32386363636363635, + "English,Spanish,Chinese,Vietnamese": 0.25, + "English,Spanish,Malay,Vietnamese": 0.2556818181818182, + "English,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Filipino,Spanish,Chinese,Malay": 0.20454545454545456, + "Filipino,Spanish,Chinese,Vietnamese": 0.20454545454545456, + "Filipino,Spanish,Malay,Vietnamese": 0.2159090909090909, + "Filipino,Chinese,Malay,Vietnamese": 0.21022727272727273, + "Spanish,Chinese,Malay,Vietnamese": 0.2556818181818182 }, "5_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian": 0.16477272727272727, - "English,Filipino,Vietnamese,Chinese,Malay": 0.13068181818181818, - "English,Filipino,Vietnamese,Chinese,Spanish": 0.14204545454545456, - "English,Filipino,Vietnamese,Indonesian,Malay": 0.17045454545454544, - "English,Filipino,Vietnamese,Indonesian,Spanish": 0.17045454545454544, - "English,Filipino,Vietnamese,Malay,Spanish": 0.14772727272727273, - "English,Filipino,Chinese,Indonesian,Malay": 0.1875, - "English,Filipino,Chinese,Indonesian,Spanish": 0.1875, - "English,Filipino,Chinese,Malay,Spanish": 0.1534090909090909, - "English,Filipino,Indonesian,Malay,Spanish": 0.21022727272727273, - "English,Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, - "English,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, - "English,Vietnamese,Chinese,Malay,Spanish": 0.17613636363636365, - "English,Vietnamese,Indonesian,Malay,Spanish": 0.24431818181818182, - "English,Chinese,Indonesian,Malay,Spanish": 0.3068181818181818, - "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.16477272727272727, - "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, - "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13068181818181818, - "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.19318181818181818, - "Filipino,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818, - "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + "Indonesian,English,Filipino,Spanish,Chinese": 0.19318181818181818, + "Indonesian,English,Filipino,Spanish,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.16477272727272727, + "Indonesian,English,Filipino,Chinese,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.17613636363636365, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,English,Spanish,Chinese,Malay": 0.30113636363636365, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.2159090909090909, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.23863636363636365, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.19318181818181818, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.24431818181818182, + "English,Filipino,Spanish,Chinese,Malay": 0.1590909090909091, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.14772727272727273, + "English,Filipino,Spanish,Malay,Vietnamese": 0.13068181818181818, + "English,Filipino,Chinese,Malay,Vietnamese": 0.13068181818181818, + "English,Spanish,Chinese,Malay,Vietnamese": 0.17613636363636365, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14772727272727273 }, "6_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.125, - "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.13068181818181818, - "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.09659090909090909, - "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.13636363636363635, - "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909, - "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.17613636363636365, - "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13068181818181818 + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.1534090909090909, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.125, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.11931818181818182, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.14204545454545456, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.10227272727272728 }, "7_combine": { - "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09659090909090909 + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.09659090909090909 } }, - "AC3_2": 0.4830678669865536, - "AC3_3": 0.39365176561696485, - "AC3_4": 0.3214495304699501, - "AC3_5": 0.2606382978305115, - "AC3_6": 0.20697592064376855, - "AC3_7": 0.15824468082143503 + "AC3_2": 0.48686990516893125, + "AC3_3": 0.397149655681522, + "AC3_4": 0.3238342753629844, + "AC3_5": 0.26145987284168354, + "AC3_6": 0.2071570731167882, + "AC3_7": 0.15835055093462852 } }, "sg_eval": { @@ -127415,7 +127580,7 @@ "accuracy": 0.6116504854368932 }, "prompt_4": { - "accuracy": 0.5922330097087378 + "accuracy": 0.5728155339805825 }, "prompt_5": { "accuracy": 0.6310679611650486 @@ -127426,13 +127591,13 @@ "accuracy": 0.4 }, "prompt_2": { - "accuracy": 0.4 + "accuracy": 0.38095238095238093 }, "prompt_3": { "accuracy": 0.41904761904761906 }, "prompt_4": { - "accuracy": 0.38095238095238093 + "accuracy": 0.3904761904761905 }, "prompt_5": { "accuracy": 0.44761904761904764 @@ -127440,24 +127605,24 @@ }, "us_eval": { "prompt_1": { - "accuracy": 0.5981308411214953 + "accuracy": 0.6074766355140186 }, "prompt_2": { - "accuracy": 0.6355140186915887 + "accuracy": 0.6074766355140186 }, "prompt_3": { "accuracy": 0.6448598130841121 }, "prompt_4": { - "accuracy": 0.6448598130841121 + "accuracy": 0.6542056074766355 }, "prompt_5": { - "accuracy": 0.6635514018691588 + "accuracy": 0.6542056074766355 } }, "ph_eval": { "prompt_1": { - "accuracy": 0.39, + "accuracy": 0.38, "category_acc": { "brand": 0.4, "demographics": 0.0, @@ -127466,31 +127631,31 @@ "literature": 0.2, "politics": 0.6, "culture": 0.8, - "film": 0.5, + "film": 0.4, "law": 0.2, "geography": 0.7 } }, "prompt_2": { - "accuracy": 0.38, + "accuracy": 0.39, "category_acc": { "brand": 0.4, - "demographics": 0.0, - "biology": 0.2, + "demographics": 0.2, + "biology": 0.3, "history": 0.2, "literature": 0.2, "politics": 0.6, - "culture": 0.7, + "culture": 0.6, "film": 0.5, "law": 0.2, "geography": 0.7 } }, "prompt_3": { - "accuracy": 0.35, + "accuracy": 0.36, "category_acc": { "brand": 0.3, - "demographics": 0.0, + "demographics": 0.2, "biology": 0.4, "history": 0.26666666666666666, "literature": 0.2, @@ -127517,13 +127682,13 @@ } }, "prompt_5": { - "accuracy": 0.36, + "accuracy": 0.38, "category_acc": { "brand": 0.4, "demographics": 0.0, "biology": 0.2, - "history": 0.2, - "literature": 0.1, + "history": 0.26666666666666666, + "literature": 0.2, "politics": 0.6, "culture": 0.6, "film": 0.4, @@ -127534,379 +127699,379 @@ }, "sing2eng": { "prompt_1": { - "bleu_score": 0.25859969958242307 + "bleu_score": 0.2574017356444316 }, "prompt_2": { - "bleu_score": 0.2700184145316593 + "bleu_score": 0.2697397453044214 }, "prompt_3": { - "bleu_score": 0.27268833004417553 + "bleu_score": 0.2710806888883188 }, "prompt_4": { - "bleu_score": 0.25234038638468387 + "bleu_score": 0.252810003719897 }, "prompt_5": { - "bleu_score": 0.2254835066581723 + "bleu_score": 0.223438870981906 } }, "indommlu": { "prompt_1": { - "accuracy": 0.4956272114293344, + "accuracy": 0.49495961012083584, "category_acc": { - "History": 0.4839357429718876, - "Geography": 0.4714285714285714, + "History": 0.4819277108433735, + "Geography": 0.46938775510204084, "Lampungic": 0.3877551020408163, - "Social science": 0.7312186978297162, - "Balinese": 0.34394904458598724, + "Social science": 0.7328881469115192, + "Balinese": 0.34182590233545646, "Makassarese": 0.34946236559139787, - "Banjarese": 0.3888888888888889, + "Banjarese": 0.4027777777777778, "Chemistry": 0.32554744525547447, - "Biology": 0.4378698224852071, - "Science": 0.6140350877192983, - "Christian religion": 0.5870646766169154, + "Biology": 0.43431952662721895, + "Science": 0.6171310629514963, + "Christian religion": 0.5920398009950248, "Art": 0.6039933444259568, - "Islam religion": 0.6031294452347084, + "Islam religion": 0.6002844950213371, "Hindu religion": 0.5066666666666667, "Madurese": 0.3152542372881356, - "Sport": 0.5405405405405406, - "Indonesian language": 0.571917808219178, - "Physics": 0.4222222222222222, - "Minangkabau culture": 0.3969849246231156, + "Sport": 0.5337837837837838, + "Indonesian language": 0.5703611457036114, + "Physics": 0.4202020202020202, + "Minangkabau culture": 0.39195979899497485, "Dayak language": 0.26605504587155965, - "Sociology": 0.4838709677419355, - "Economy": 0.4713114754098361, + "Sociology": 0.4879032258064516, + "Economy": 0.4733606557377049, "Sundanese": 0.3777009507346586, - "Javanese": 0.3558467741935484, - "Civic education": 0.597997138769671 + "Javanese": 0.3538306451612903, + "Civic education": 0.5951359084406295 } }, "prompt_2": { - "accuracy": 0.4966953735229321, + "accuracy": 0.4969624140463315, "category_acc": { - "History": 0.4839357429718876, - "Geography": 0.4714285714285714, - "Lampungic": 0.3333333333333333, - "Social science": 0.7412353923205343, - "Balinese": 0.3333333333333333, - "Makassarese": 0.3655913978494624, - "Banjarese": 0.3611111111111111, + "History": 0.4799196787148594, + "Geography": 0.47346938775510206, + "Lampungic": 0.3469387755102041, + "Social science": 0.7445742904841403, + "Balinese": 0.3375796178343949, + "Makassarese": 0.3602150537634409, + "Banjarese": 0.3680555555555556, "Chemistry": 0.32408759124087594, - "Biology": 0.4437869822485207, - "Science": 0.6233230134158927, - "Christian religion": 0.6169154228855721, - "Art": 0.6156405990016639, - "Islam religion": 0.6372688477951636, + "Biology": 0.44260355029585796, + "Science": 0.6243550051599587, + "Christian religion": 0.6119402985074627, + "Art": 0.6173044925124792, + "Islam religion": 0.6287339971550497, "Hindu religion": 0.52, - "Madurese": 0.31186440677966104, - "Sport": 0.5202702702702703, - "Indonesian language": 0.5594645080946451, - "Physics": 0.402020202020202, - "Minangkabau culture": 0.38190954773869346, - "Dayak language": 0.26605504587155965, - "Sociology": 0.4838709677419355, - "Economy": 0.4692622950819672, - "Sundanese": 0.38720829732065687, - "Javanese": 0.375, + "Madurese": 0.3152542372881356, + "Sport": 0.527027027027027, + "Indonesian language": 0.5625778331257784, + "Physics": 0.40404040404040403, + "Minangkabau culture": 0.3768844221105528, + "Dayak language": 0.27522935779816515, + "Sociology": 0.48185483870967744, + "Economy": 0.4651639344262295, + "Sundanese": 0.38547968885047534, + "Javanese": 0.3719758064516129, "Civic education": 0.597997138769671 } }, "prompt_3": { - "accuracy": 0.493490887242139, + "accuracy": 0.49302356632619, "category_acc": { - "History": 0.4678714859437751, - "Geography": 0.45714285714285713, - "Lampungic": 0.3333333333333333, + "History": 0.4738955823293173, + "Geography": 0.463265306122449, + "Lampungic": 0.3401360544217687, "Social science": 0.7345575959933222, - "Balinese": 0.3099787685774947, - "Makassarese": 0.3655913978494624, - "Banjarese": 0.4166666666666667, - "Chemistry": 0.31386861313868614, + "Balinese": 0.31210191082802546, + "Makassarese": 0.3548387096774194, + "Banjarese": 0.4097222222222222, + "Chemistry": 0.31532846715328466, "Biology": 0.44023668639053254, - "Science": 0.6191950464396285, - "Christian religion": 0.6119402985074627, - "Art": 0.6123128119800333, - "Islam religion": 0.6273115220483642, - "Hindu religion": 0.5, - "Madurese": 0.30847457627118646, - "Sport": 0.5337837837837838, - "Indonesian language": 0.5610211706102117, - "Physics": 0.4121212121212121, - "Minangkabau culture": 0.41708542713567837, - "Dayak language": 0.30275229357798167, - "Sociology": 0.4717741935483871, - "Economy": 0.4569672131147541, - "Sundanese": 0.3837510803802939, - "Javanese": 0.3790322580645161, + "Science": 0.6181630546955624, + "Christian religion": 0.6069651741293532, + "Art": 0.610648918469218, + "Islam religion": 0.6244665718349929, + "Hindu religion": 0.5133333333333333, + "Madurese": 0.3050847457627119, + "Sport": 0.527027027027027, + "Indonesian language": 0.5619551681195517, + "Physics": 0.4080808080808081, + "Minangkabau culture": 0.40703517587939697, + "Dayak language": 0.3211009174311927, + "Sociology": 0.4798387096774194, + "Economy": 0.45901639344262296, + "Sundanese": 0.37251512532411407, + "Javanese": 0.3780241935483871, "Civic education": 0.5851216022889842 } }, "prompt_4": { - "accuracy": 0.4932906068495894, + "accuracy": 0.4936244075038387, "category_acc": { - "History": 0.4799196787148594, - "Geography": 0.4530612244897959, - "Lampungic": 0.36054421768707484, + "History": 0.4738955823293173, + "Geography": 0.45510204081632655, + "Lampungic": 0.35374149659863946, "Social science": 0.7245409015025042, - "Balinese": 0.3375796178343949, - "Makassarese": 0.3817204301075269, - "Banjarese": 0.3958333333333333, + "Balinese": 0.34182590233545646, + "Makassarese": 0.3763440860215054, + "Banjarese": 0.3888888888888889, "Chemistry": 0.3167883211678832, - "Biology": 0.4437869822485207, - "Science": 0.6171310629514963, - "Christian religion": 0.6019900497512438, + "Biology": 0.4461538461538462, + "Science": 0.6160990712074303, + "Christian religion": 0.6069651741293532, "Art": 0.6073211314475874, - "Islam religion": 0.6073968705547653, + "Islam religion": 0.6116642958748222, "Hindu religion": 0.5, - "Madurese": 0.288135593220339, - "Sport": 0.5405405405405406, - "Indonesian language": 0.5666251556662516, - "Physics": 0.40404040404040403, + "Madurese": 0.2847457627118644, + "Sport": 0.5337837837837838, + "Indonesian language": 0.5675591531755916, + "Physics": 0.402020202020202, "Minangkabau culture": 0.38190954773869346, "Dayak language": 0.27522935779816515, - "Sociology": 0.4899193548387097, - "Economy": 0.4692622950819672, - "Sundanese": 0.37683664649956783, - "Javanese": 0.3659274193548387, - "Civic education": 0.592274678111588 + "Sociology": 0.49193548387096775, + "Economy": 0.4672131147540984, + "Sundanese": 0.3777009507346586, + "Javanese": 0.3629032258064516, + "Civic education": 0.5994277539341917 } }, "prompt_5": { - "accuracy": 0.4938246878963883, + "accuracy": 0.4945590493357367, "category_acc": { - "History": 0.4819277108433735, - "Geography": 0.44693877551020406, - "Lampungic": 0.35374149659863946, - "Social science": 0.7128547579298832, + "History": 0.4779116465863454, + "Geography": 0.4448979591836735, + "Lampungic": 0.3469387755102041, + "Social science": 0.7145242070116862, "Balinese": 0.3333333333333333, - "Makassarese": 0.3655913978494624, + "Makassarese": 0.3709677419354839, "Banjarese": 0.4097222222222222, - "Chemistry": 0.32554744525547447, - "Biology": 0.4414201183431953, + "Chemistry": 0.32408759124087594, + "Biology": 0.44260355029585796, "Science": 0.6202270381836945, - "Christian religion": 0.6218905472636815, - "Art": 0.6006655574043261, + "Christian religion": 0.6268656716417911, + "Art": 0.6023294509151415, "Islam religion": 0.6031294452347084, "Hindu religion": 0.5, - "Madurese": 0.3220338983050847, + "Madurese": 0.3152542372881356, "Sport": 0.5540540540540541, - "Indonesian language": 0.5675591531755916, - "Physics": 0.4080808080808081, - "Minangkabau culture": 0.39195979899497485, + "Indonesian language": 0.5691158156911582, + "Physics": 0.41414141414141414, + "Minangkabau culture": 0.3969849246231156, "Dayak language": 0.28440366972477066, "Sociology": 0.4717741935483871, - "Economy": 0.4713114754098361, - "Sundanese": 0.3777009507346586, + "Economy": 0.4672131147540984, + "Sundanese": 0.3802938634399309, "Javanese": 0.37399193548387094, - "Civic education": 0.586552217453505 + "Civic education": 0.5908440629470673 } } }, "flores_ind2eng": { "prompt_1": { - "bleu_score": 0.2945734165524307 + "bleu_score": 0.2904210030825523 }, "prompt_2": { - "bleu_score": 0.3692955172898761 + "bleu_score": 0.36955498730132713 }, "prompt_3": { - "bleu_score": 0.364828613086595 + "bleu_score": 0.36547048717499736 }, "prompt_4": { - "bleu_score": 0.3655536803035773 + "bleu_score": 0.3656155033164473 }, "prompt_5": { - "bleu_score": 0.36007330333317294 + "bleu_score": 0.3613834087838442 } }, "flores_vie2eng": { "prompt_1": { - "bleu_score": 0.1403105435966192 + "bleu_score": 0.1361309104530537 }, "prompt_2": { - "bleu_score": 0.22544940331341598 + "bleu_score": 0.22571437892643548 }, "prompt_3": { - "bleu_score": 0.22079608054335573 + "bleu_score": 0.2207638338470559 }, "prompt_4": { - "bleu_score": 0.22036498649478095 + "bleu_score": 0.21907103091395946 }, "prompt_5": { - "bleu_score": 0.1860033226221875 + "bleu_score": 0.18624794146447873 } }, "flores_zho2eng": { "prompt_1": { - "bleu_score": 0.21427609114132906 + "bleu_score": 0.2141645930096053 }, "prompt_2": { - "bleu_score": 0.22296732240907358 + "bleu_score": 0.22386319490546705 }, "prompt_3": { - "bleu_score": 0.22041723456504964 + "bleu_score": 0.22159896503568985 }, "prompt_4": { - "bleu_score": 0.2200073679825278 + "bleu_score": 0.22119680625556593 }, "prompt_5": { - "bleu_score": 0.2075904622418602 + "bleu_score": 0.20898856132141055 } }, "flores_zsm2eng": { "prompt_1": { - "bleu_score": 0.20811074696851103 + "bleu_score": 0.20246195784882284 }, "prompt_2": { - "bleu_score": 0.3429854927377655 + "bleu_score": 0.34339277822910913 }, "prompt_3": { - "bleu_score": 0.34166049307528756 + "bleu_score": 0.3414705878211933 }, "prompt_4": { - "bleu_score": 0.3417635271630678 + "bleu_score": 0.3421444160411124 }, "prompt_5": { - "bleu_score": 0.313538277245778 + "bleu_score": 0.31648254812909576 } }, "mmlu": { "prompt_1": { - "accuracy": 0.5484247374562428 + "accuracy": 0.544924154025671 }, "prompt_2": { "accuracy": 0.5694282380396732 }, "prompt_3": { - "accuracy": 0.5495915985997666 + "accuracy": 0.5472578763127188 }, "prompt_4": { - "accuracy": 0.5565927654609102 + "accuracy": 0.5530921820303384 }, "prompt_5": { - "accuracy": 0.5239206534422404 + "accuracy": 0.5390898483080513 } }, "mmlu_full": { "prompt_1": { - "accuracy": 0.557311405076868, + "accuracy": 0.5572398998927423, "category_acc": { - "high_school_european_history": 0.7012195121951219, - "business_ethics": 0.6161616161616161, - "clinical_knowledge": 0.6401515151515151, + "high_school_european_history": 0.7073170731707317, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6363636363636364, "medical_genetics": 0.6262626262626263, "high_school_us_history": 0.7438423645320197, - "high_school_physics": 0.3, + "high_school_physics": 0.26666666666666666, "high_school_world_history": 0.7288135593220338, "virology": 0.4666666666666667, - "high_school_microeconomics": 0.5611814345991561, - "econometrics": 0.36283185840707965, - "college_computer_science": 0.5656565656565656, - "high_school_biology": 0.6634304207119741, - "abstract_algebra": 0.3434343434343434, - "professional_accounting": 0.41637010676156583, - "philosophy": 0.5935483870967742, - "professional_medicine": 0.6531365313653137, - "nutrition": 0.5836065573770491, - "global_facts": 0.3434343434343434, - "machine_learning": 0.40540540540540543, - "security_studies": 0.5819672131147541, - "public_relations": 0.6513761467889908, - "professional_psychology": 0.5368248772504092, - "prehistory": 0.5851393188854489, + "high_school_microeconomics": 0.5864978902953587, + "econometrics": 0.34513274336283184, + "college_computer_science": 0.5555555555555556, + "high_school_biology": 0.6699029126213593, + "abstract_algebra": 0.35353535353535354, + "professional_accounting": 0.4128113879003559, + "philosophy": 0.5903225806451613, + "professional_medicine": 0.6494464944649446, + "nutrition": 0.5868852459016394, + "global_facts": 0.35353535353535354, + "machine_learning": 0.3963963963963964, + "security_studies": 0.5737704918032787, + "public_relations": 0.6605504587155964, + "professional_psychology": 0.5417348608837971, + "prehistory": 0.5882352941176471, "anatomy": 0.5298507462686567, - "human_sexuality": 0.6538461538461539, + "human_sexuality": 0.6384615384615384, "college_medicine": 0.6104651162790697, - "high_school_government_and_politics": 0.8072916666666666, - "college_chemistry": 0.3939393939393939, + "high_school_government_and_politics": 0.8020833333333334, + "college_chemistry": 0.37373737373737376, "logical_fallacies": 0.7037037037037037, - "high_school_geography": 0.7411167512690355, - "elementary_mathematics": 0.3713527851458886, - "human_aging": 0.6576576576576577, + "high_school_geography": 0.7258883248730964, + "elementary_mathematics": 0.3740053050397878, + "human_aging": 0.6441441441441441, "college_mathematics": 0.3434343434343434, - "high_school_psychology": 0.7849264705882353, + "high_school_psychology": 0.7867647058823529, "formal_logic": 0.48, - "high_school_statistics": 0.4046511627906977, - "international_law": 0.7083333333333334, - "high_school_mathematics": 0.30855018587360594, - "high_school_computer_science": 0.5858585858585859, - "conceptual_physics": 0.46153846153846156, - "miscellaneous": 0.789002557544757, - "high_school_chemistry": 0.4306930693069307, - "marketing": 0.8454935622317596, - "professional_law": 0.42204827136333983, - "management": 0.7549019607843137, + "high_school_statistics": 0.413953488372093, + "international_law": 0.7, + "high_school_mathematics": 0.3159851301115242, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.45726495726495725, + "miscellaneous": 0.7902813299232737, + "high_school_chemistry": 0.43564356435643564, + "marketing": 0.8412017167381974, + "professional_law": 0.4187866927592955, + "management": 0.7647058823529411, "college_physics": 0.36633663366336633, - "jurisprudence": 0.616822429906542, - "world_religions": 0.8294117647058824, - "sociology": 0.77, + "jurisprudence": 0.6261682242990654, + "world_religions": 0.8411764705882353, + "sociology": 0.765, "us_foreign_policy": 0.7474747474747475, - "high_school_macroeconomics": 0.532133676092545, + "high_school_macroeconomics": 0.5295629820051414, "computer_security": 0.7272727272727273, - "moral_scenarios": 0.2516778523489933, + "moral_scenarios": 0.25727069351230425, "moral_disputes": 0.6202898550724638, - "electrical_engineering": 0.5069444444444444, - "astronomy": 0.5761589403973509, - "college_biology": 0.6293706293706294 + "electrical_engineering": 0.5138888888888888, + "astronomy": 0.5695364238410596, + "college_biology": 0.6223776223776224 } }, "prompt_2": { - "accuracy": 0.5629603146228102, + "accuracy": 0.5602431176260279, "category_acc": { "high_school_european_history": 0.7195121951219512, - "business_ethics": 0.5656565656565656, - "clinical_knowledge": 0.6628787878787878, - "medical_genetics": 0.6363636363636364, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.6553030303030303, + "medical_genetics": 0.6262626262626263, "high_school_us_history": 0.7536945812807881, - "high_school_physics": 0.3333333333333333, - "high_school_world_history": 0.7457627118644068, - "virology": 0.48484848484848486, - "high_school_microeconomics": 0.6160337552742616, - "econometrics": 0.37168141592920356, - "college_computer_science": 0.5555555555555556, - "high_school_biology": 0.6828478964401294, - "abstract_algebra": 0.3333333333333333, - "professional_accounting": 0.4234875444839858, - "philosophy": 0.6129032258064516, - "professional_medicine": 0.6457564575645757, - "nutrition": 0.580327868852459, - "global_facts": 0.35353535353535354, - "machine_learning": 0.3963963963963964, - "security_studies": 0.6229508196721312, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.7372881355932204, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.6033755274261603, + "econometrics": 0.3805309734513274, + "college_computer_science": 0.5454545454545454, + "high_school_biology": 0.6763754045307443, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.4234875444839858, + "philosophy": 0.6096774193548387, + "professional_medicine": 0.6346863468634686, + "nutrition": 0.5770491803278689, + "global_facts": 0.36363636363636365, + "machine_learning": 0.4144144144144144, + "security_studies": 0.6311475409836066, "public_relations": 0.6697247706422018, - "professional_psychology": 0.5450081833060556, - "prehistory": 0.6160990712074303, + "professional_psychology": 0.5482815057283142, + "prehistory": 0.6130030959752322, "anatomy": 0.5298507462686567, - "human_sexuality": 0.6538461538461539, - "college_medicine": 0.5872093023255814, - "high_school_government_and_politics": 0.8020833333333334, + "human_sexuality": 0.6384615384615384, + "college_medicine": 0.5988372093023255, + "high_school_government_and_politics": 0.796875, "college_chemistry": 0.42424242424242425, - "logical_fallacies": 0.691358024691358, - "high_school_geography": 0.7563451776649747, - "elementary_mathematics": 0.38461538461538464, - "human_aging": 0.6351351351351351, + "logical_fallacies": 0.6975308641975309, + "high_school_geography": 0.751269035532995, + "elementary_mathematics": 0.3819628647214854, + "human_aging": 0.6306306306306306, "college_mathematics": 0.30303030303030304, - "high_school_psychology": 0.7904411764705882, + "high_school_psychology": 0.7849264705882353, "formal_logic": 0.464, - "high_school_statistics": 0.4511627906976744, + "high_school_statistics": 0.4372093023255814, "international_law": 0.7, "high_school_mathematics": 0.2788104089219331, "high_school_computer_science": 0.5959595959595959, "conceptual_physics": 0.48717948717948717, - "miscellaneous": 0.7902813299232737, - "high_school_chemistry": 0.4306930693069307, - "marketing": 0.8540772532188842, - "professional_law": 0.3979125896934116, + "miscellaneous": 0.789002557544757, + "high_school_chemistry": 0.4405940594059406, + "marketing": 0.8583690987124464, + "professional_law": 0.39399869536855836, "management": 0.7647058823529411, - "college_physics": 0.42574257425742573, - "jurisprudence": 0.6448598130841121, - "world_religions": 0.8235294117647058, - "sociology": 0.785, - "us_foreign_policy": 0.7777777777777778, - "high_school_macroeconomics": 0.5501285347043702, + "college_physics": 0.4158415841584158, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.8176470588235294, + "sociology": 0.77, + "us_foreign_policy": 0.7676767676767676, + "high_school_macroeconomics": 0.5424164524421594, "computer_security": 0.7171717171717171, - "moral_scenarios": 0.25615212527964204, - "moral_disputes": 0.6376811594202898, - "electrical_engineering": 0.5416666666666666, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.6434782608695652, + "electrical_engineering": 0.5347222222222222, "astronomy": 0.5629139072847682, "college_biology": 0.6713286713286714 } @@ -127914,210 +128079,210 @@ "prompt_3": { "accuracy": 0.5461565963532357, "category_acc": { - "high_school_european_history": 0.7012195121951219, + "high_school_european_history": 0.6951219512195121, "business_ethics": 0.6161616161616161, - "clinical_knowledge": 0.6060606060606061, - "medical_genetics": 0.6363636363636364, - "high_school_us_history": 0.7536945812807881, - "high_school_physics": 0.30666666666666664, - "high_school_world_history": 0.7288135593220338, - "virology": 0.47878787878787876, - "high_school_microeconomics": 0.5738396624472574, - "econometrics": 0.336283185840708, - "college_computer_science": 0.5454545454545454, + "clinical_knowledge": 0.6212121212121212, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.31333333333333335, + "high_school_world_history": 0.7245762711864406, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.6118143459915611, + "econometrics": 0.36283185840707965, + "college_computer_science": 0.5353535353535354, "high_school_biology": 0.6472491909385113, - "abstract_algebra": 0.30303030303030304, - "professional_accounting": 0.41637010676156583, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.40213523131672596, "philosophy": 0.603225806451613, - "professional_medicine": 0.6199261992619927, - "nutrition": 0.5573770491803278, - "global_facts": 0.3434343434343434, - "machine_learning": 0.3783783783783784, - "security_studies": 0.6147540983606558, - "public_relations": 0.6788990825688074, + "professional_medicine": 0.6125461254612546, + "nutrition": 0.5540983606557377, + "global_facts": 0.35353535353535354, + "machine_learning": 0.3963963963963964, + "security_studies": 0.6065573770491803, + "public_relations": 0.6880733944954128, "professional_psychology": 0.5270049099836334, "prehistory": 0.5882352941176471, - "anatomy": 0.5149253731343284, - "human_sexuality": 0.6230769230769231, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.6153846153846154, "college_medicine": 0.5523255813953488, - "high_school_government_and_politics": 0.78125, - "college_chemistry": 0.41414141414141414, + "high_school_government_and_politics": 0.7708333333333334, + "college_chemistry": 0.42424242424242425, "logical_fallacies": 0.6604938271604939, - "high_school_geography": 0.700507614213198, - "elementary_mathematics": 0.3660477453580902, - "human_aging": 0.6396396396396397, - "college_mathematics": 0.29292929292929293, - "high_school_psychology": 0.7463235294117647, - "formal_logic": 0.472, - "high_school_statistics": 0.413953488372093, - "international_law": 0.7, - "high_school_mathematics": 0.29739776951672864, - "high_school_computer_science": 0.5555555555555556, - "conceptual_physics": 0.452991452991453, - "miscellaneous": 0.7787723785166241, - "high_school_chemistry": 0.4207920792079208, - "marketing": 0.7896995708154506, - "professional_law": 0.410958904109589, + "high_school_geography": 0.7055837563451777, + "elementary_mathematics": 0.3740053050397878, + "human_aging": 0.6261261261261262, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.7536764705882353, + "formal_logic": 0.48, + "high_school_statistics": 0.4046511627906977, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.5757575757575758, + "conceptual_physics": 0.44017094017094016, + "miscellaneous": 0.7774936061381074, + "high_school_chemistry": 0.43564356435643564, + "marketing": 0.7939914163090128, + "professional_law": 0.4090019569471624, "management": 0.7549019607843137, "college_physics": 0.3564356435643564, - "jurisprudence": 0.6822429906542056, - "world_religions": 0.8058823529411765, - "sociology": 0.735, - "us_foreign_policy": 0.7272727272727273, - "high_school_macroeconomics": 0.5115681233933161, - "computer_security": 0.696969696969697, - "moral_scenarios": 0.25279642058165547, + "jurisprudence": 0.6635514018691588, + "world_religions": 0.8176470588235294, + "sociology": 0.715, + "us_foreign_policy": 0.7171717171717171, + "high_school_macroeconomics": 0.519280205655527, + "computer_security": 0.6868686868686869, + "moral_scenarios": 0.25615212527964204, "moral_disputes": 0.6144927536231884, - "electrical_engineering": 0.5277777777777778, - "astronomy": 0.5562913907284768, - "college_biology": 0.6433566433566433 + "electrical_engineering": 0.5347222222222222, + "astronomy": 0.5496688741721855, + "college_biology": 0.6153846153846154 } }, "prompt_4": { - "accuracy": 0.5483017518770111, + "accuracy": 0.5447264926707186, "category_acc": { - "high_school_european_history": 0.7073170731707317, + "high_school_european_history": 0.7134146341463414, "business_ethics": 0.5858585858585859, - "clinical_knowledge": 0.5984848484848485, - "medical_genetics": 0.6363636363636364, - "high_school_us_history": 0.7536945812807881, - "high_school_physics": 0.32666666666666666, - "high_school_world_history": 0.7076271186440678, - "virology": 0.48484848484848486, - "high_school_microeconomics": 0.5822784810126582, - "econometrics": 0.34513274336283184, - "college_computer_science": 0.5454545454545454, - "high_school_biology": 0.6504854368932039, - "abstract_algebra": 0.30303030303030304, - "professional_accounting": 0.42704626334519574, - "philosophy": 0.603225806451613, - "professional_medicine": 0.6236162361623616, - "nutrition": 0.5934426229508196, + "clinical_knowledge": 0.5909090909090909, + "medical_genetics": 0.6262626262626263, + "high_school_us_history": 0.7586206896551724, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.7161016949152542, + "virology": 0.47878787878787876, + "high_school_microeconomics": 0.6033755274261603, + "econometrics": 0.35398230088495575, + "college_computer_science": 0.5252525252525253, + "high_school_biology": 0.6601941747572816, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.4128113879003559, + "philosophy": 0.5838709677419355, + "professional_medicine": 0.6051660516605166, + "nutrition": 0.6, "global_facts": 0.35353535353535354, - "machine_learning": 0.3783783783783784, - "security_studies": 0.5737704918032787, + "machine_learning": 0.35135135135135137, + "security_studies": 0.5942622950819673, "public_relations": 0.6422018348623854, - "professional_psychology": 0.513911620294599, + "professional_psychology": 0.5122749590834698, "prehistory": 0.6037151702786377, - "anatomy": 0.5149253731343284, - "human_sexuality": 0.6307692307692307, - "college_medicine": 0.5581395348837209, - "high_school_government_and_politics": 0.78125, - "college_chemistry": 0.40404040404040403, - "logical_fallacies": 0.691358024691358, + "anatomy": 0.5074626865671642, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.5523255813953488, + "high_school_government_and_politics": 0.765625, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6728395061728395, "high_school_geography": 0.7360406091370558, - "elementary_mathematics": 0.3660477453580902, - "human_aging": 0.6486486486486487, - "college_mathematics": 0.29292929292929293, - "high_school_psychology": 0.7573529411764706, - "formal_logic": 0.48, - "high_school_statistics": 0.4232558139534884, - "international_law": 0.6833333333333333, - "high_school_mathematics": 0.31226765799256506, - "high_school_computer_science": 0.5757575757575758, - "conceptual_physics": 0.46153846153846156, - "miscellaneous": 0.7659846547314578, - "high_school_chemistry": 0.43564356435643564, - "marketing": 0.8197424892703863, - "professional_law": 0.41030658838878015, - "management": 0.7549019607843137, + "elementary_mathematics": 0.3793103448275862, + "human_aging": 0.6441441441441441, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.7555147058823529, + "formal_logic": 0.488, + "high_school_statistics": 0.3953488372093023, + "international_law": 0.7, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.5959595959595959, + "conceptual_physics": 0.452991452991453, + "miscellaneous": 0.7583120204603581, + "high_school_chemistry": 0.4158415841584158, + "marketing": 0.8240343347639485, + "professional_law": 0.40769732550554466, + "management": 0.7647058823529411, "college_physics": 0.3564356435643564, "jurisprudence": 0.6261682242990654, "world_religions": 0.8176470588235294, - "sociology": 0.76, - "us_foreign_policy": 0.7676767676767676, - "high_school_macroeconomics": 0.519280205655527, - "computer_security": 0.696969696969697, - "moral_scenarios": 0.2516778523489933, - "moral_disputes": 0.6057971014492753, - "electrical_engineering": 0.5416666666666666, - "astronomy": 0.5496688741721855, + "sociology": 0.75, + "us_foreign_policy": 0.7070707070707071, + "high_school_macroeconomics": 0.4910025706940874, + "computer_security": 0.6868686868686869, + "moral_scenarios": 0.2550335570469799, + "moral_disputes": 0.5942028985507246, + "electrical_engineering": 0.5208333333333334, + "astronomy": 0.5562913907284768, "college_biology": 0.6293706293706294 } }, "prompt_5": { - "accuracy": 0.5329281372899535, + "accuracy": 0.5319985698963174, "category_acc": { - "high_school_european_history": 0.6890243902439024, - "business_ethics": 0.5555555555555556, - "clinical_knowledge": 0.5984848484848485, + "high_school_european_history": 0.6829268292682927, + "business_ethics": 0.5757575757575758, + "clinical_knowledge": 0.5871212121212122, "medical_genetics": 0.5858585858585859, - "high_school_us_history": 0.7438423645320197, - "high_school_physics": 0.29333333333333333, + "high_school_us_history": 0.7536945812807881, + "high_school_physics": 0.2733333333333333, "high_school_world_history": 0.7076271186440678, - "virology": 0.4484848484848485, - "high_school_microeconomics": 0.5864978902953587, - "econometrics": 0.3274336283185841, - "college_computer_science": 0.5151515151515151, - "high_school_biology": 0.6537216828478964, + "virology": 0.44242424242424244, + "high_school_microeconomics": 0.5738396624472574, + "econometrics": 0.35398230088495575, + "college_computer_science": 0.5050505050505051, + "high_school_biology": 0.6472491909385113, "abstract_algebra": 0.2727272727272727, - "professional_accounting": 0.40569395017793597, - "philosophy": 0.567741935483871, - "professional_medicine": 0.5904059040590406, - "nutrition": 0.5606557377049181, - "global_facts": 0.37373737373737376, - "machine_learning": 0.38738738738738737, - "security_studies": 0.5819672131147541, - "public_relations": 0.6238532110091743, - "professional_psychology": 0.49918166939443537, - "prehistory": 0.5944272445820433, - "anatomy": 0.5149253731343284, - "human_sexuality": 0.6307692307692307, - "college_medicine": 0.5406976744186046, + "professional_accounting": 0.4199288256227758, + "philosophy": 0.5580645161290323, + "professional_medicine": 0.5977859778597786, + "nutrition": 0.5540983606557377, + "global_facts": 0.36363636363636365, + "machine_learning": 0.3783783783783784, + "security_studies": 0.6024590163934426, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.5090016366612111, + "prehistory": 0.5758513931888545, + "anatomy": 0.4925373134328358, + "human_sexuality": 0.6615384615384615, + "college_medicine": 0.5465116279069767, "high_school_government_and_politics": 0.7552083333333334, - "college_chemistry": 0.3939393939393939, - "logical_fallacies": 0.6604938271604939, - "high_school_geography": 0.7360406091370558, - "elementary_mathematics": 0.3952254641909814, - "human_aging": 0.6261261261261262, - "college_mathematics": 0.30303030303030304, - "high_school_psychology": 0.7279411764705882, - "formal_logic": 0.464, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.7309644670050761, + "elementary_mathematics": 0.3925729442970822, + "human_aging": 0.6306306306306306, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.7316176470588235, + "formal_logic": 0.48, "high_school_statistics": 0.4232558139534884, - "international_law": 0.675, - "high_school_mathematics": 0.2788104089219331, - "high_school_computer_science": 0.5252525252525253, - "conceptual_physics": 0.452991452991453, - "miscellaneous": 0.7493606138107417, - "high_school_chemistry": 0.4405940594059406, - "marketing": 0.7939914163090128, - "professional_law": 0.41291585127201563, - "management": 0.7254901960784313, - "college_physics": 0.32673267326732675, - "jurisprudence": 0.6355140186915887, - "world_religions": 0.8, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.2936802973977695, + "high_school_computer_science": 0.5454545454545454, + "conceptual_physics": 0.44871794871794873, + "miscellaneous": 0.7583120204603581, + "high_school_chemistry": 0.4306930693069307, + "marketing": 0.759656652360515, + "professional_law": 0.41226353555120676, + "management": 0.7450980392156863, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.6728971962616822, + "world_religions": 0.7823529411764706, "sociology": 0.755, - "us_foreign_policy": 0.6868686868686869, - "high_school_macroeconomics": 0.4755784061696658, - "computer_security": 0.6868686868686869, - "moral_scenarios": 0.22483221476510068, - "moral_disputes": 0.5681159420289855, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.4832904884318766, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.22930648769574943, + "moral_disputes": 0.5594202898550724, "electrical_engineering": 0.5, - "astronomy": 0.5364238410596026, - "college_biology": 0.6013986013986014 + "astronomy": 0.48344370860927155, + "college_biology": 0.5734265734265734 } } }, "c_eval": { "prompt_1": { - "accuracy": 0.42719167904903416 + "accuracy": 0.4227340267459138 }, "prompt_2": { - "accuracy": 0.42421991084695393 + "accuracy": 0.4279346210995542 }, "prompt_3": { - "accuracy": 0.43164933135215455 + "accuracy": 0.4323922734026746 }, "prompt_4": { - "accuracy": 0.40416047548291234 + "accuracy": 0.4078751857355126 }, "prompt_5": { - "accuracy": 0.4294205052005943 + "accuracy": 0.43313521545319467 } }, "c_eval_full": { "prompt_1": { - "accuracy": 0.42403486924034867, + "accuracy": 0.4246575342465753, "category_acc": { "computer_network": 0.5, "operating_system": 0.5416666666666666, @@ -128126,7 +128291,7 @@ "college_physics": 0.4166666666666667, "college_chemistry": 0.20689655172413793, "advanced_mathematics": 0.375, - "probability_and_statistics": 0.2608695652173913, + "probability_and_statistics": 0.21739130434782608, "discrete_mathematics": 0.47619047619047616, "electrical_engineer": 0.35714285714285715, "metrology_engineer": 0.41379310344827586, @@ -128139,7 +128304,7 @@ "middle_school_physics": 0.625, "middle_school_chemistry": 0.56, "veterinary_medicine": 0.39285714285714285, - "college_economics": 0.4666666666666667, + "college_economics": 0.45, "business_administration": 0.4473684210526316, "marxism": 0.4166666666666667, "mao_zedong_thought": 0.6206896551724138, @@ -128150,23 +128315,23 @@ "middle_school_politics": 0.5384615384615384, "middle_school_geography": 0.47058823529411764, "modern_chinese_history": 0.4642857142857143, - "ideological_and_moral_cultivation": 0.7916666666666666, + "ideological_and_moral_cultivation": 0.75, "logic": 0.48148148148148145, "law": 0.3793103448275862, "chinese_language_and_literature": 0.39285714285714285, "art_studies": 0.3684210526315789, - "professional_tour_guide": 0.47058823529411764, + "professional_tour_guide": 0.5, "legal_professional": 0.5, "high_school_chinese": 0.375, "high_school_history": 0.56, "middle_school_history": 0.6296296296296297, "civil_servant": 0.36538461538461536, - "sports_science": 0.3333333333333333, - "plant_protection": 0.4074074074074074, - "basic_medicine": 0.5, + "sports_science": 0.375, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.4583333333333333, "clinical_medicine": 0.37037037037037035, "urban_and_rural_planner": 0.45098039215686275, - "accountant": 0.3333333333333333, + "accountant": 0.37037037037037035, "fire_engineer": 0.3888888888888889, "environmental_impact_assessment_engineer": 0.4166666666666667, "tax_accountant": 0.2962962962962963, @@ -128174,7 +128339,7 @@ } }, "prompt_2": { - "accuracy": 0.4277708592777086, + "accuracy": 0.42528019925280197, "category_acc": { "computer_network": 0.5416666666666666, "operating_system": 0.5833333333333334, @@ -128182,24 +128347,24 @@ "college_programming": 0.42857142857142855, "college_physics": 0.375, "college_chemistry": 0.20689655172413793, - "advanced_mathematics": 0.375, + "advanced_mathematics": 0.3333333333333333, "probability_and_statistics": 0.17391304347826086, "discrete_mathematics": 0.42857142857142855, - "electrical_engineer": 0.3333333333333333, + "electrical_engineer": 0.35714285714285715, "metrology_engineer": 0.5172413793103449, - "high_school_mathematics": 0.43478260869565216, + "high_school_mathematics": 0.34782608695652173, "high_school_physics": 0.25, "high_school_chemistry": 0.2916666666666667, "high_school_biology": 0.2916666666666667, "middle_school_mathematics": 0.20833333333333334, - "middle_school_biology": 0.5, + "middle_school_biology": 0.46153846153846156, "middle_school_physics": 0.625, - "middle_school_chemistry": 0.48, - "veterinary_medicine": 0.42857142857142855, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.39285714285714285, "college_economics": 0.4166666666666667, "business_administration": 0.42105263157894735, "marxism": 0.4583333333333333, - "mao_zedong_thought": 0.6551724137931034, + "mao_zedong_thought": 0.6896551724137931, "education_science": 0.47058823529411764, "teacher_qualification": 0.6326530612244898, "high_school_politics": 0.375, @@ -128207,23 +128372,23 @@ "middle_school_politics": 0.6538461538461539, "middle_school_geography": 0.29411764705882354, "modern_chinese_history": 0.42857142857142855, - "ideological_and_moral_cultivation": 0.7083333333333334, + "ideological_and_moral_cultivation": 0.6666666666666666, "logic": 0.5185185185185185, "law": 0.3793103448275862, "chinese_language_and_literature": 0.39285714285714285, "art_studies": 0.3684210526315789, "professional_tour_guide": 0.5294117647058824, - "legal_professional": 0.5, + "legal_professional": 0.5357142857142857, "high_school_chinese": 0.375, - "high_school_history": 0.56, + "high_school_history": 0.52, "middle_school_history": 0.6666666666666666, - "civil_servant": 0.3269230769230769, + "civil_servant": 0.34615384615384615, "sports_science": 0.25, - "plant_protection": 0.48148148148148145, - "basic_medicine": 0.5416666666666666, - "clinical_medicine": 0.3333333333333333, - "urban_and_rural_planner": 0.47058823529411764, - "accountant": 0.35185185185185186, + "plant_protection": 0.4444444444444444, + "basic_medicine": 0.5, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.37037037037037035, "fire_engineer": 0.3611111111111111, "environmental_impact_assessment_engineer": 0.4444444444444444, "tax_accountant": 0.2962962962962963, @@ -128231,7 +128396,7 @@ } }, "prompt_3": { - "accuracy": 0.43773349937733497, + "accuracy": 0.4339975093399751, "category_acc": { "computer_network": 0.5416666666666666, "operating_system": 0.5833333333333334, @@ -128241,9 +128406,9 @@ "college_chemistry": 0.2413793103448276, "advanced_mathematics": 0.375, "probability_and_statistics": 0.21739130434782608, - "discrete_mathematics": 0.42857142857142855, + "discrete_mathematics": 0.47619047619047616, "electrical_engineer": 0.38095238095238093, - "metrology_engineer": 0.4827586206896552, + "metrology_engineer": 0.4482758620689655, "high_school_mathematics": 0.34782608695652173, "high_school_physics": 0.25, "high_school_chemistry": 0.3333333333333333, @@ -128252,11 +128417,11 @@ "middle_school_biology": 0.46153846153846156, "middle_school_physics": 0.6666666666666666, "middle_school_chemistry": 0.52, - "veterinary_medicine": 0.4642857142857143, - "college_economics": 0.4666666666666667, + "veterinary_medicine": 0.42857142857142855, + "college_economics": 0.45, "business_administration": 0.42105263157894735, "marxism": 0.5, - "mao_zedong_thought": 0.6551724137931034, + "mao_zedong_thought": 0.6206896551724138, "education_science": 0.5, "teacher_qualification": 0.6122448979591837, "high_school_politics": 0.4166666666666667, @@ -128274,21 +128439,21 @@ "high_school_chinese": 0.2916666666666667, "high_school_history": 0.52, "middle_school_history": 0.6296296296296297, - "civil_servant": 0.4230769230769231, + "civil_servant": 0.40384615384615385, "sports_science": 0.25, "plant_protection": 0.5185185185185185, "basic_medicine": 0.5, - "clinical_medicine": 0.3333333333333333, - "urban_and_rural_planner": 0.43137254901960786, - "accountant": 0.4074074074074074, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.4117647058823529, + "accountant": 0.3888888888888889, "fire_engineer": 0.3611111111111111, "environmental_impact_assessment_engineer": 0.5, - "tax_accountant": 0.2962962962962963, + "tax_accountant": 0.25925925925925924, "physician": 0.4444444444444444 } }, "prompt_4": { - "accuracy": 0.4178082191780822, + "accuracy": 0.41594022415940224, "category_acc": { "computer_network": 0.4583333333333333, "operating_system": 0.5833333333333334, @@ -128297,26 +128462,26 @@ "college_physics": 0.375, "college_chemistry": 0.1724137931034483, "advanced_mathematics": 0.25, - "probability_and_statistics": 0.21739130434782608, + "probability_and_statistics": 0.2608695652173913, "discrete_mathematics": 0.38095238095238093, - "electrical_engineer": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.5217391304347826, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.3333333333333333, + "high_school_mathematics": 0.391304347826087, + "high_school_physics": 0.25, + "high_school_chemistry": 0.375, "high_school_biology": 0.375, - "middle_school_mathematics": 0.375, - "middle_school_biology": 0.46153846153846156, + "middle_school_mathematics": 0.25, + "middle_school_biology": 0.4230769230769231, "middle_school_physics": 0.625, - "middle_school_chemistry": 0.64, + "middle_school_chemistry": 0.6, "veterinary_medicine": 0.39285714285714285, "college_economics": 0.36666666666666664, - "business_administration": 0.39473684210526316, + "business_administration": 0.42105263157894735, "marxism": 0.375, "mao_zedong_thought": 0.6896551724137931, "education_science": 0.5, - "teacher_qualification": 0.5306122448979592, - "high_school_politics": 0.4166666666666667, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.375, "high_school_geography": 0.3333333333333333, "middle_school_politics": 0.5384615384615384, "middle_school_geography": 0.5294117647058824, @@ -128325,18 +128490,18 @@ "logic": 0.48148148148148145, "law": 0.3448275862068966, "chinese_language_and_literature": 0.42857142857142855, - "art_studies": 0.3684210526315789, + "art_studies": 0.39473684210526316, "professional_tour_guide": 0.4411764705882353, "legal_professional": 0.5, "high_school_chinese": 0.3333333333333333, "high_school_history": 0.48, "middle_school_history": 0.6296296296296297, - "civil_servant": 0.3076923076923077, + "civil_servant": 0.34615384615384615, "sports_science": 0.375, "plant_protection": 0.4444444444444444, "basic_medicine": 0.5, "clinical_medicine": 0.4074074074074074, - "urban_and_rural_planner": 0.39215686274509803, + "urban_and_rural_planner": 0.4117647058823529, "accountant": 0.37037037037037035, "fire_engineer": 0.3888888888888889, "environmental_impact_assessment_engineer": 0.3611111111111111, @@ -128345,29 +128510,29 @@ } }, "prompt_5": { - "accuracy": 0.4190535491905355, + "accuracy": 0.4246575342465753, "category_acc": { - "computer_network": 0.5416666666666666, + "computer_network": 0.5833333333333334, "operating_system": 0.5416666666666666, "computer_architecture": 0.4230769230769231, - "college_programming": 0.4523809523809524, + "college_programming": 0.42857142857142855, "college_physics": 0.4166666666666667, "college_chemistry": 0.20689655172413793, - "advanced_mathematics": 0.25, + "advanced_mathematics": 0.375, "probability_and_statistics": 0.21739130434782608, - "discrete_mathematics": 0.3333333333333333, + "discrete_mathematics": 0.2857142857142857, "electrical_engineer": 0.3333333333333333, "metrology_engineer": 0.4482758620689655, - "high_school_mathematics": 0.43478260869565216, + "high_school_mathematics": 0.30434782608695654, "high_school_physics": 0.2916666666666667, "high_school_chemistry": 0.2916666666666667, "high_school_biology": 0.3333333333333333, - "middle_school_mathematics": 0.08333333333333333, - "middle_school_biology": 0.4230769230769231, + "middle_school_mathematics": 0.2916666666666667, + "middle_school_biology": 0.38461538461538464, "middle_school_physics": 0.625, - "middle_school_chemistry": 0.48, + "middle_school_chemistry": 0.44, "veterinary_medicine": 0.39285714285714285, - "college_economics": 0.4666666666666667, + "college_economics": 0.48333333333333334, "business_administration": 0.42105263157894735, "marxism": 0.4166666666666667, "mao_zedong_thought": 0.6896551724137931, @@ -128376,25 +128541,25 @@ "high_school_politics": 0.4583333333333333, "high_school_geography": 0.3333333333333333, "middle_school_politics": 0.5, - "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.35714285714285715, + "middle_school_geography": 0.5294117647058824, + "modern_chinese_history": 0.39285714285714285, "ideological_and_moral_cultivation": 0.7916666666666666, "logic": 0.48148148148148145, "law": 0.3103448275862069, "chinese_language_and_literature": 0.39285714285714285, "art_studies": 0.39473684210526316, - "professional_tour_guide": 0.4411764705882353, + "professional_tour_guide": 0.47058823529411764, "legal_professional": 0.5357142857142857, "high_school_chinese": 0.3333333333333333, - "high_school_history": 0.56, + "high_school_history": 0.52, "middle_school_history": 0.6296296296296297, "civil_servant": 0.36538461538461536, "sports_science": 0.2916666666666667, - "plant_protection": 0.48148148148148145, + "plant_protection": 0.5555555555555556, "basic_medicine": 0.4166666666666667, "clinical_medicine": 0.37037037037037035, "urban_and_rural_planner": 0.43137254901960786, - "accountant": 0.3148148148148148, + "accountant": 0.35185185185185186, "fire_engineer": 0.3888888888888889, "environmental_impact_assessment_engineer": 0.4444444444444444, "tax_accountant": 0.2962962962962963, @@ -128404,379 +128569,379 @@ }, "cmmlu": { "prompt_1": { - "accuracy": 0.40860215053763443 + "accuracy": 0.4157706093189964 }, "prompt_2": { - "accuracy": 0.3942652329749104 + "accuracy": 0.3978494623655914 }, "prompt_3": { - "accuracy": 0.40860215053763443 + "accuracy": 0.4121863799283154 }, "prompt_4": { - "accuracy": 0.3942652329749104 + "accuracy": 0.4014336917562724 }, "prompt_5": { - "accuracy": 0.3870967741935484 + "accuracy": 0.3942652329749104 } }, "cmmlu_full": { "prompt_1": { - "accuracy": 0.41495423933690206, + "accuracy": 0.41504058021067175, "category_acc": { - "agronomy": 0.3431952662721893, - "anatomy": 0.25, + "agronomy": 0.34911242603550297, + "anatomy": 0.22297297297297297, "ancient_chinese": 0.25609756097560976, - "arts": 0.425, + "arts": 0.43125, "astronomy": 0.3090909090909091, "business_ethics": 0.46411483253588515, - "chinese_civil_service_exam": 0.34375, + "chinese_civil_service_exam": 0.3375, "chinese_driving_rule": 0.6183206106870229, "chinese_food_culture": 0.3161764705882353, - "chinese_foreign_policy": 0.4766355140186916, - "chinese_history": 0.43343653250773995, + "chinese_foreign_policy": 0.48598130841121495, + "chinese_history": 0.43653250773993807, "chinese_literature": 0.3088235294117647, "chinese_teacher_qualification": 0.4860335195530726, - "clinical_knowledge": 0.33755274261603374, - "college_actuarial_science": 0.27358490566037735, + "clinical_knowledge": 0.34177215189873417, + "college_actuarial_science": 0.2830188679245283, "college_education": 0.4766355140186916, - "college_engineering_hydrology": 0.46226415094339623, - "college_law": 0.37962962962962965, - "college_mathematics": 0.2857142857142857, + "college_engineering_hydrology": 0.4528301886792453, + "college_law": 0.3888888888888889, + "college_mathematics": 0.2761904761904762, "college_medical_statistics": 0.36792452830188677, - "college_medicine": 0.37362637362637363, + "college_medicine": 0.38095238095238093, "computer_science": 0.47058823529411764, "computer_security": 0.5263157894736842, - "conceptual_physics": 0.43537414965986393, - "construction_project_management": 0.3597122302158273, - "economics": 0.49056603773584906, - "education": 0.4539877300613497, - "electrical_engineering": 0.43023255813953487, + "conceptual_physics": 0.4421768707482993, + "construction_project_management": 0.3669064748201439, + "economics": 0.4968553459119497, + "education": 0.44785276073619634, + "electrical_engineering": 0.42441860465116277, "elementary_chinese": 0.30158730158730157, - "elementary_commonsense": 0.3888888888888889, + "elementary_commonsense": 0.37373737373737376, "elementary_information_and_technology": 0.6176470588235294, - "elementary_mathematics": 0.30869565217391304, - "ethnology": 0.48148148148148145, - "food_science": 0.46153846153846156, + "elementary_mathematics": 0.30434782608695654, + "ethnology": 0.4888888888888889, + "food_science": 0.45454545454545453, "genetics": 0.3409090909090909, - "global_facts": 0.46308724832214765, - "high_school_biology": 0.3431952662721893, + "global_facts": 0.4563758389261745, + "high_school_biology": 0.34911242603550297, "high_school_chemistry": 0.32575757575757575, - "high_school_geography": 0.4067796610169492, - "high_school_mathematics": 0.2804878048780488, - "high_school_physics": 0.3181818181818182, - "high_school_politics": 0.40559440559440557, + "high_school_geography": 0.4152542372881356, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.4195804195804196, "human_sexuality": 0.4365079365079365, - "international_law": 0.372972972972973, - "journalism": 0.5, - "jurisprudence": 0.48905109489051096, - "legal_and_moral_basis": 0.7710280373831776, - "logical": 0.36585365853658536, - "machine_learning": 0.30327868852459017, - "management": 0.5238095238095238, - "marketing": 0.5722222222222222, + "international_law": 0.3783783783783784, + "journalism": 0.47674418604651164, + "jurisprudence": 0.49148418491484186, + "legal_and_moral_basis": 0.7757009345794392, + "logical": 0.37398373983739835, + "machine_learning": 0.3114754098360656, + "management": 0.5190476190476191, + "marketing": 0.5666666666666667, "marxist_theory": 0.48148148148148145, - "modern_chinese": 0.35344827586206895, + "modern_chinese": 0.3620689655172414, "nutrition": 0.3793103448275862, - "philosophy": 0.45714285714285713, - "professional_accounting": 0.44, - "professional_law": 0.3412322274881517, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.4342857142857143, + "professional_law": 0.3459715639810427, "professional_medicine": 0.28191489361702127, - "professional_psychology": 0.49137931034482757, - "public_relations": 0.5632183908045977, - "security_study": 0.45185185185185184, - "sociology": 0.5, + "professional_psychology": 0.4827586206896552, + "public_relations": 0.5574712643678161, + "security_study": 0.45925925925925926, + "sociology": 0.504424778761062, "sports_science": 0.41818181818181815, - "traditional_chinese_medicine": 0.31351351351351353, - "virology": 0.3905325443786982, - "world_history": 0.38509316770186336, - "world_religions": 0.40625 + "traditional_chinese_medicine": 0.3027027027027027, + "virology": 0.39644970414201186, + "world_history": 0.391304347826087, + "world_religions": 0.41875 } }, "prompt_2": { - "accuracy": 0.4178034881713003, + "accuracy": 0.41849421516145746, "category_acc": { "agronomy": 0.33727810650887574, - "anatomy": 0.21621621621621623, + "anatomy": 0.22297297297297297, "ancient_chinese": 0.2804878048780488, - "arts": 0.41875, - "astronomy": 0.296969696969697, + "arts": 0.4125, + "astronomy": 0.3090909090909091, "business_ethics": 0.44019138755980863, - "chinese_civil_service_exam": 0.325, - "chinese_driving_rule": 0.6259541984732825, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.6335877862595419, "chinese_food_culture": 0.3088235294117647, "chinese_foreign_policy": 0.4766355140186916, - "chinese_history": 0.4117647058823529, + "chinese_history": 0.4086687306501548, "chinese_literature": 0.3137254901960784, "chinese_teacher_qualification": 0.4972067039106145, - "clinical_knowledge": 0.3628691983122363, - "college_actuarial_science": 0.27358490566037735, - "college_education": 0.514018691588785, - "college_engineering_hydrology": 0.49056603773584906, - "college_law": 0.35185185185185186, - "college_mathematics": 0.26666666666666666, - "college_medical_statistics": 0.3584905660377358, - "college_medicine": 0.3516483516483517, + "clinical_knowledge": 0.3459915611814346, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.5233644859813084, + "college_engineering_hydrology": 0.4811320754716981, + "college_law": 0.37037037037037035, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.34798534798534797, "computer_science": 0.4803921568627451, "computer_security": 0.4853801169590643, "conceptual_physics": 0.4421768707482993, "construction_project_management": 0.38848920863309355, - "economics": 0.48427672955974843, + "economics": 0.4779874213836478, "education": 0.48466257668711654, - "electrical_engineering": 0.4186046511627907, - "elementary_chinese": 0.2976190476190476, - "elementary_commonsense": 0.3838383838383838, - "elementary_information_and_technology": 0.6386554621848739, + "electrical_engineering": 0.43023255813953487, + "elementary_chinese": 0.30158730158730157, + "elementary_commonsense": 0.3888888888888889, + "elementary_information_and_technology": 0.6428571428571429, "elementary_mathematics": 0.3, "ethnology": 0.4962962962962963, - "food_science": 0.44755244755244755, + "food_science": 0.46153846153846156, "genetics": 0.3693181818181818, - "global_facts": 0.4563758389261745, + "global_facts": 0.4429530201342282, "high_school_biology": 0.35502958579881655, "high_school_chemistry": 0.29545454545454547, - "high_school_geography": 0.4152542372881356, - "high_school_mathematics": 0.2682926829268293, - "high_school_physics": 0.3181818181818182, + "high_school_geography": 0.3983050847457627, + "high_school_mathematics": 0.2621951219512195, + "high_school_physics": 0.33636363636363636, "high_school_politics": 0.43356643356643354, - "human_sexuality": 0.4523809523809524, - "international_law": 0.3783783783783784, + "human_sexuality": 0.4444444444444444, + "international_law": 0.3837837837837838, "journalism": 0.5232558139534884, - "jurisprudence": 0.48905109489051096, - "legal_and_moral_basis": 0.7663551401869159, + "jurisprudence": 0.49391727493917276, + "legal_and_moral_basis": 0.7757009345794392, "logical": 0.3983739837398374, "machine_learning": 0.36065573770491804, - "management": 0.5380952380952381, - "marketing": 0.5611111111111111, + "management": 0.5428571428571428, + "marketing": 0.5555555555555556, "marxist_theory": 0.4708994708994709, "modern_chinese": 0.35344827586206895, - "nutrition": 0.4206896551724138, + "nutrition": 0.42758620689655175, "philosophy": 0.45714285714285713, - "professional_accounting": 0.4514285714285714, - "professional_law": 0.3412322274881517, + "professional_accounting": 0.44571428571428573, + "professional_law": 0.33175355450236965, "professional_medicine": 0.3058510638297872, - "professional_psychology": 0.46551724137931033, - "public_relations": 0.5459770114942529, + "professional_psychology": 0.4698275862068966, + "public_relations": 0.5517241379310345, "security_study": 0.5185185185185185, - "sociology": 0.48672566371681414, + "sociology": 0.4823008849557522, "sports_science": 0.41818181818181815, "traditional_chinese_medicine": 0.3081081081081081, - "virology": 0.4260355029585799, - "world_history": 0.39751552795031053, + "virology": 0.4319526627218935, + "world_history": 0.40993788819875776, "world_religions": 0.43125 } }, "prompt_3": { - "accuracy": 0.4170264203073735, + "accuracy": 0.41745812467622173, "category_acc": { - "agronomy": 0.33727810650887574, - "anatomy": 0.21621621621621623, - "ancient_chinese": 0.2682926829268293, + "agronomy": 0.3254437869822485, + "anatomy": 0.20270270270270271, + "ancient_chinese": 0.2621951219512195, "arts": 0.41875, - "astronomy": 0.2606060606060606, - "business_ethics": 0.46411483253588515, - "chinese_civil_service_exam": 0.33125, - "chinese_driving_rule": 0.6106870229007634, + "astronomy": 0.2545454545454545, + "business_ethics": 0.45933014354066987, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.5954198473282443, "chinese_food_culture": 0.3161764705882353, "chinese_foreign_policy": 0.48598130841121495, - "chinese_history": 0.3993808049535604, - "chinese_literature": 0.29901960784313725, + "chinese_history": 0.3931888544891641, + "chinese_literature": 0.3137254901960784, "chinese_teacher_qualification": 0.5027932960893855, - "clinical_knowledge": 0.33755274261603374, - "college_actuarial_science": 0.29245283018867924, + "clinical_knowledge": 0.34177215189873417, + "college_actuarial_science": 0.3018867924528302, "college_education": 0.5233644859813084, "college_engineering_hydrology": 0.49056603773584906, - "college_law": 0.3611111111111111, - "college_mathematics": 0.29523809523809524, - "college_medical_statistics": 0.3584905660377358, - "college_medicine": 0.358974358974359, - "computer_science": 0.4852941176470588, - "computer_security": 0.5029239766081871, + "college_law": 0.37037037037037035, + "college_mathematics": 0.2857142857142857, + "college_medical_statistics": 0.3490566037735849, + "college_medicine": 0.34798534798534797, + "computer_science": 0.49019607843137253, + "computer_security": 0.49707602339181284, "conceptual_physics": 0.41496598639455784, "construction_project_management": 0.381294964028777, - "economics": 0.5094339622641509, + "economics": 0.5031446540880503, "education": 0.49693251533742333, - "electrical_engineering": 0.43023255813953487, + "electrical_engineering": 0.436046511627907, "elementary_chinese": 0.30158730158730157, "elementary_commonsense": 0.3888888888888889, - "elementary_information_and_technology": 0.6554621848739496, - "elementary_mathematics": 0.2956521739130435, + "elementary_information_and_technology": 0.6596638655462185, + "elementary_mathematics": 0.3, "ethnology": 0.4740740740740741, - "food_science": 0.46853146853146854, - "genetics": 0.3693181818181818, + "food_science": 0.46153846153846156, + "genetics": 0.375, "global_facts": 0.42953020134228187, "high_school_biology": 0.3609467455621302, - "high_school_chemistry": 0.29545454545454547, - "high_school_geography": 0.3983050847457627, - "high_school_mathematics": 0.2621951219512195, + "high_school_chemistry": 0.30303030303030304, + "high_school_geography": 0.423728813559322, + "high_school_mathematics": 0.27439024390243905, "high_school_physics": 0.34545454545454546, - "high_school_politics": 0.42657342657342656, - "human_sexuality": 0.4523809523809524, - "international_law": 0.3891891891891892, - "journalism": 0.4941860465116279, + "high_school_politics": 0.4195804195804196, + "human_sexuality": 0.46825396825396826, + "international_law": 0.3837837837837838, + "journalism": 0.4883720930232558, "jurisprudence": 0.48418491484184917, - "legal_and_moral_basis": 0.780373831775701, - "logical": 0.3902439024390244, - "machine_learning": 0.36065573770491804, + "legal_and_moral_basis": 0.7757009345794392, + "logical": 0.3821138211382114, + "machine_learning": 0.36885245901639346, "management": 0.5142857142857142, "marketing": 0.5555555555555556, - "marxist_theory": 0.4708994708994709, + "marxist_theory": 0.4656084656084656, "modern_chinese": 0.3620689655172414, - "nutrition": 0.41379310344827586, + "nutrition": 0.4206896551724138, "philosophy": 0.4380952380952381, "professional_accounting": 0.42857142857142855, - "professional_law": 0.3412322274881517, + "professional_law": 0.3459715639810427, "professional_medicine": 0.324468085106383, - "professional_psychology": 0.46551724137931033, - "public_relations": 0.5402298850574713, + "professional_psychology": 0.47844827586206895, + "public_relations": 0.5517241379310345, "security_study": 0.5111111111111111, - "sociology": 0.48672566371681414, + "sociology": 0.4823008849557522, "sports_science": 0.4, - "traditional_chinese_medicine": 0.33513513513513515, + "traditional_chinese_medicine": 0.34054054054054056, "virology": 0.4319526627218935, "world_history": 0.40372670807453415, - "world_religions": 0.39375 + "world_religions": 0.41875 } }, "prompt_4": { - "accuracy": 0.4105508547746503, + "accuracy": 0.40960110516318426, "category_acc": { - "agronomy": 0.33727810650887574, - "anatomy": 0.21621621621621623, - "ancient_chinese": 0.25, - "arts": 0.40625, - "astronomy": 0.296969696969697, - "business_ethics": 0.46411483253588515, + "agronomy": 0.33136094674556216, + "anatomy": 0.22297297297297297, + "ancient_chinese": 0.24390243902439024, + "arts": 0.4, + "astronomy": 0.30303030303030304, + "business_ethics": 0.45933014354066987, "chinese_civil_service_exam": 0.3625, "chinese_driving_rule": 0.6106870229007634, "chinese_food_culture": 0.3014705882352941, - "chinese_foreign_policy": 0.4485981308411215, - "chinese_history": 0.4148606811145511, - "chinese_literature": 0.29411764705882354, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.4117647058823529, + "chinese_literature": 0.30392156862745096, "chinese_teacher_qualification": 0.5139664804469274, - "clinical_knowledge": 0.3459915611814346, - "college_actuarial_science": 0.27358490566037735, - "college_education": 0.5046728971962616, - "college_engineering_hydrology": 0.46226415094339623, + "clinical_knowledge": 0.350210970464135, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.44339622641509435, "college_law": 0.39814814814814814, "college_mathematics": 0.3333333333333333, - "college_medical_statistics": 0.4339622641509434, - "college_medicine": 0.32234432234432236, - "computer_science": 0.4803921568627451, - "computer_security": 0.5263157894736842, - "conceptual_physics": 0.42857142857142855, - "construction_project_management": 0.37410071942446044, + "college_medical_statistics": 0.39622641509433965, + "college_medicine": 0.31868131868131866, + "computer_science": 0.49019607843137253, + "computer_security": 0.5087719298245614, + "conceptual_physics": 0.41496598639455784, + "construction_project_management": 0.381294964028777, "economics": 0.48427672955974843, - "education": 0.4785276073619632, + "education": 0.48466257668711654, "electrical_engineering": 0.4127906976744186, - "elementary_chinese": 0.2777777777777778, - "elementary_commonsense": 0.3787878787878788, - "elementary_information_and_technology": 0.634453781512605, - "elementary_mathematics": 0.3173913043478261, - "ethnology": 0.4740740740740741, - "food_science": 0.45454545454545453, - "genetics": 0.3522727272727273, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.37373737373737376, + "elementary_information_and_technology": 0.6386554621848739, + "elementary_mathematics": 0.2782608695652174, + "ethnology": 0.4888888888888889, + "food_science": 0.44755244755244755, + "genetics": 0.3465909090909091, "global_facts": 0.4228187919463087, - "high_school_biology": 0.3254437869822485, - "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.33727810650887574, + "high_school_chemistry": 0.3409090909090909, "high_school_geography": 0.3898305084745763, - "high_school_mathematics": 0.21341463414634146, - "high_school_physics": 0.3090909090909091, - "high_school_politics": 0.44755244755244755, - "human_sexuality": 0.4365079365079365, + "high_school_mathematics": 0.21951219512195122, + "high_school_physics": 0.3181818181818182, + "high_school_politics": 0.45454545454545453, + "human_sexuality": 0.42857142857142855, "international_law": 0.3621621621621622, - "journalism": 0.47093023255813954, - "jurisprudence": 0.45012165450121655, + "journalism": 0.47674418604651164, + "jurisprudence": 0.44768856447688565, "legal_and_moral_basis": 0.7523364485981309, - "logical": 0.36585365853658536, + "logical": 0.37398373983739835, "machine_learning": 0.3524590163934426, - "management": 0.5047619047619047, - "marketing": 0.5444444444444444, - "marxist_theory": 0.4973544973544973, - "modern_chinese": 0.3793103448275862, - "nutrition": 0.42758620689655175, + "management": 0.5142857142857142, + "marketing": 0.55, + "marxist_theory": 0.48148148148148145, + "modern_chinese": 0.3879310344827586, + "nutrition": 0.43448275862068964, "philosophy": 0.44761904761904764, "professional_accounting": 0.41714285714285715, "professional_law": 0.35071090047393366, - "professional_medicine": 0.27925531914893614, + "professional_medicine": 0.2872340425531915, "professional_psychology": 0.47844827586206895, "public_relations": 0.5344827586206896, - "security_study": 0.4666666666666667, + "security_study": 0.45925925925925926, "sociology": 0.49557522123893805, "sports_science": 0.41818181818181815, - "traditional_chinese_medicine": 0.32972972972972975, - "virology": 0.39644970414201186, - "world_history": 0.391304347826087, - "world_religions": 0.4125 + "traditional_chinese_medicine": 0.31351351351351353, + "virology": 0.40236686390532544, + "world_history": 0.37888198757763975, + "world_religions": 0.4 } }, "prompt_5": { - "accuracy": 0.4197893282680021, + "accuracy": 0.41875323778276635, "category_acc": { "agronomy": 0.33136094674556216, - "anatomy": 0.25675675675675674, - "ancient_chinese": 0.2865853658536585, - "arts": 0.425, + "anatomy": 0.24324324324324326, + "ancient_chinese": 0.2804878048780488, + "arts": 0.4125, "astronomy": 0.296969696969697, "business_ethics": 0.47368421052631576, - "chinese_civil_service_exam": 0.35625, + "chinese_civil_service_exam": 0.35, "chinese_driving_rule": 0.6259541984732825, - "chinese_food_culture": 0.3161764705882353, + "chinese_food_culture": 0.3235294117647059, "chinese_foreign_policy": 0.4953271028037383, "chinese_history": 0.43962848297213625, "chinese_literature": 0.30392156862745096, "chinese_teacher_qualification": 0.5083798882681564, "clinical_knowledge": 0.35864978902953587, - "college_actuarial_science": 0.29245283018867924, + "college_actuarial_science": 0.25471698113207547, "college_education": 0.5420560747663551, - "college_engineering_hydrology": 0.4716981132075472, + "college_engineering_hydrology": 0.4811320754716981, "college_law": 0.35185185185185186, "college_mathematics": 0.23809523809523808, - "college_medical_statistics": 0.4056603773584906, - "college_medicine": 0.36996336996337, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.3772893772893773, "computer_science": 0.47058823529411764, "computer_security": 0.5029239766081871, "conceptual_physics": 0.4421768707482993, "construction_project_management": 0.381294964028777, - "economics": 0.4716981132075472, + "economics": 0.48427672955974843, "education": 0.44785276073619634, - "electrical_engineering": 0.4186046511627907, - "elementary_chinese": 0.3055555555555556, + "electrical_engineering": 0.4127906976744186, + "elementary_chinese": 0.30158730158730157, "elementary_commonsense": 0.40404040404040403, - "elementary_information_and_technology": 0.6554621848739496, - "elementary_mathematics": 0.34347826086956523, + "elementary_information_and_technology": 0.6596638655462185, + "elementary_mathematics": 0.33043478260869563, "ethnology": 0.5037037037037037, - "food_science": 0.46853146853146854, + "food_science": 0.45454545454545453, "genetics": 0.35795454545454547, - "global_facts": 0.436241610738255, + "global_facts": 0.44966442953020136, "high_school_biology": 0.35502958579881655, "high_school_chemistry": 0.25, "high_school_geography": 0.4152542372881356, - "high_school_mathematics": 0.2865853658536585, - "high_school_physics": 0.34545454545454546, - "high_school_politics": 0.42657342657342656, + "high_school_mathematics": 0.25, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.4195804195804196, "human_sexuality": 0.4444444444444444, - "international_law": 0.3891891891891892, - "journalism": 0.5, - "jurisprudence": 0.49391727493917276, + "international_law": 0.3837837837837838, + "journalism": 0.5058139534883721, + "jurisprudence": 0.48905109489051096, "legal_and_moral_basis": 0.7616822429906542, - "logical": 0.3821138211382114, - "machine_learning": 0.36065573770491804, - "management": 0.5238095238095238, + "logical": 0.3902439024390244, + "machine_learning": 0.36885245901639346, + "management": 0.5190476190476191, "marketing": 0.5388888888888889, "marxist_theory": 0.48148148148148145, - "modern_chinese": 0.39655172413793105, + "modern_chinese": 0.3706896551724138, "nutrition": 0.36551724137931035, - "philosophy": 0.4666666666666667, - "professional_accounting": 0.4342857142857143, - "professional_law": 0.3412322274881517, - "professional_medicine": 0.30319148936170215, - "professional_psychology": 0.46551724137931033, + "philosophy": 0.47619047619047616, + "professional_accounting": 0.44, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.3058510638297872, + "professional_psychology": 0.4698275862068966, "public_relations": 0.5574712643678161, "security_study": 0.4666666666666667, "sociology": 0.48672566371681414, - "sports_science": 0.42424242424242425, + "sports_science": 0.41818181818181815, "traditional_chinese_medicine": 0.31351351351351353, - "virology": 0.41420118343195267, - "world_history": 0.38509316770186336, + "virology": 0.40828402366863903, + "world_history": 0.391304347826087, "world_religions": 0.4 } } @@ -128800,36 +128965,36 @@ }, "ind_emotion": { "prompt_1": { - "accuracy": 0.7068181818181818 + "accuracy": 0.7090909090909091 }, "prompt_2": { - "accuracy": 0.6636363636363637 + "accuracy": 0.6545454545454545 }, "prompt_3": { "accuracy": 0.6613636363636364 }, "prompt_4": { - "accuracy": 0.6863636363636364 + "accuracy": 0.6909090909090909 }, "prompt_5": { - "accuracy": 0.6840909090909091 + "accuracy": 0.6818181818181818 } }, "ocnli": { "prompt_1": { - "accuracy": 0.40711864406779663 + "accuracy": 0.4105084745762712 }, "prompt_2": { - "accuracy": 0.428135593220339 + "accuracy": 0.42915254237288136 }, "prompt_3": { - "accuracy": 0.408135593220339 + "accuracy": 0.4105084745762712 }, "prompt_4": { - "accuracy": 0.4206779661016949 + "accuracy": 0.41966101694915253 }, "prompt_5": { - "accuracy": 0.40711864406779663 + "accuracy": 0.408135593220339 } }, "c3": { @@ -128837,33 +129002,33 @@ "accuracy": 0.7857142857142857 }, "prompt_2": { - "accuracy": 0.7729992520568437 + "accuracy": 0.7756170531039641 }, "prompt_3": { - "accuracy": 0.7711293941660434 + "accuracy": 0.7729992520568437 }, "prompt_4": { - "accuracy": 0.7816005983545251 + "accuracy": 0.7789827973074046 }, "prompt_5": { - "accuracy": 0.7729992520568437 + "accuracy": 0.7756170531039641 } }, "dream": { "prompt_1": { - "accuracy": 0.8569328760411563 + "accuracy": 0.8564429201371877 }, "prompt_2": { - "accuracy": 0.8486036256736894 + "accuracy": 0.850563449289564 }, "prompt_3": { - "accuracy": 0.8388045075943165 + "accuracy": 0.842234198922097 }, "prompt_4": { - "accuracy": 0.8549730524252818 + "accuracy": 0.8593826555609995 }, "prompt_5": { - "accuracy": 0.854483096521313 + "accuracy": 0.8535031847133758 } }, "samsum": { @@ -128902,10 +129067,10 @@ }, "sst2": { "prompt_1": { - "accuracy": 0.9139908256880734 + "accuracy": 0.9151376146788991 }, "prompt_2": { - "accuracy": 0.8990825688073395 + "accuracy": 0.9002293577981652 }, "prompt_3": { "accuracy": 0.9139908256880734 @@ -128919,87 +129084,87 @@ }, "cola": { "prompt_1": { - "accuracy": 0.7775647171620326 + "accuracy": 0.7813998082454459 }, "prompt_2": { - "accuracy": 0.7775647171620326 + "accuracy": 0.7785234899328859 }, "prompt_3": { - "accuracy": 0.7766059443911792 + "accuracy": 0.7727708533077661 }, "prompt_4": { - "accuracy": 0.7804410354745925 + "accuracy": 0.7813998082454459 }, "prompt_5": { - "accuracy": 0.7804410354745925 + "accuracy": 0.7785234899328859 } }, "qqp": { "prompt_1": { - "accuracy": 0.694 + "accuracy": 0.693 }, "prompt_2": { - "accuracy": 0.697 + "accuracy": 0.695 }, "prompt_3": { - "accuracy": 0.6845 + "accuracy": 0.6865 }, "prompt_4": { - "accuracy": 0.691 + "accuracy": 0.6905 }, "prompt_5": { - "accuracy": 0.69 + "accuracy": 0.688 } }, "mnli": { "prompt_1": { - "accuracy": 0.561 + "accuracy": 0.5645 }, "prompt_2": { - "accuracy": 0.5865 + "accuracy": 0.587 }, "prompt_3": { - "accuracy": 0.56 + "accuracy": 0.5575 }, "prompt_4": { - "accuracy": 0.5725 + "accuracy": 0.5735 }, "prompt_5": { - "accuracy": 0.5665 + "accuracy": 0.5555 } }, "qnli": { "prompt_1": { - "accuracy": 0.605 + "accuracy": 0.6055 }, "prompt_2": { - "accuracy": 0.638 + "accuracy": 0.6415 }, "prompt_3": { - "accuracy": 0.6495 + "accuracy": 0.649 }, "prompt_4": { "accuracy": 0.629 }, "prompt_5": { - "accuracy": 0.6375 + "accuracy": 0.6405 } }, "wnli": { "prompt_1": { - "accuracy": 0.5492957746478874 + "accuracy": 0.5633802816901409 }, "prompt_2": { - "accuracy": 0.5774647887323944 + "accuracy": 0.5633802816901409 }, "prompt_3": { "accuracy": 0.5211267605633803 }, "prompt_4": { - "accuracy": 0.5915492957746479 + "accuracy": 0.5492957746478874 }, "prompt_5": { - "accuracy": 0.5774647887323944 + "accuracy": 0.5915492957746479 } }, "rte": { @@ -129007,13 +129172,13 @@ "accuracy": 0.6209386281588448 }, "prompt_2": { - "accuracy": 0.5703971119133574 + "accuracy": 0.5740072202166066 }, "prompt_3": { - "accuracy": 0.6498194945848376 + "accuracy": 0.6534296028880866 }, "prompt_4": { - "accuracy": 0.7184115523465704 + "accuracy": 0.7220216606498195 }, "prompt_5": { "accuracy": 0.592057761732852 @@ -129021,16 +129186,16 @@ }, "mrpc": { "prompt_1": { - "accuracy": 0.696078431372549 + "accuracy": 0.6936274509803921 }, "prompt_2": { - "accuracy": 0.7132352941176471 + "accuracy": 0.7058823529411765 }, "prompt_3": { "accuracy": 0.6544117647058824 }, "prompt_4": { - "accuracy": 0.6985294117647058 + "accuracy": 0.7034313725490197 }, "prompt_5": { "accuracy": 0.6151960784313726 @@ -129039,76 +129204,686 @@ }, "five_shot": { "cross_xquad": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.8642857142857143, + "language_acc": { + "Spanish": 0.8865546218487395, + "English": 0.9184873949579831, + "Chinese": 0.8504201680672269, + "Vietnamese": 0.8016806722689076 + }, + "consistency_score_2": 0.8295518207282914, + "consistency_score_3": 0.7560924369747899, + "consistency_score_4": 0.704201680672269, + "detailed_consistency_score": { + "2_combine": { + "Spanish,English": 0.8991596638655462, + "Spanish,Chinese": 0.8470588235294118, + "Spanish,Vietnamese": 0.7924369747899159, + "English,Chinese": 0.8546218487394958, + "English,Vietnamese": 0.8042016806722689, + "Chinese,Vietnamese": 0.7798319327731092 + }, + "3_combine": { + "Spanish,English,Chinese": 0.8092436974789916, + "Spanish,English,Vietnamese": 0.7571428571428571, + "Spanish,Chinese,Vietnamese": 0.7243697478991596, + "English,Chinese,Vietnamese": 0.7336134453781512 + }, + "4_combine": { + "Spanish,English,Chinese,Vietnamese": 0.704201680672269 + } + }, + "AC3_2": 0.8465626402202849, + "AC3_3": 0.8065770220464982, + "AC3_4": 0.7760743943479798 + } }, "cross_mmlu": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.52, + "language_acc": { + "Filipino": 0.46, + "Vietnamese": 0.41333333333333333, + "Chinese": 0.5066666666666667, + "Spanish": 0.5666666666666667, + "Malay": 0.48, + "Indonesian": 0.58, + "English": 0.6333333333333333 + }, + "consistency_score_2": 0.5955555555555557, + "consistency_score_3": 0.43542857142857133, + "consistency_score_4": 0.34323809523809534, + "consistency_score_5": 0.2844444444444444, + "consistency_score_6": 0.2457142857142857, + "consistency_score_7": 0.22, + "detailed_consistency_score": { + "2_combine": { + "Filipino,Vietnamese": 0.49333333333333335, + "Filipino,Chinese": 0.52, + "Filipino,Spanish": 0.5333333333333333, + "Filipino,Malay": 0.5066666666666667, + "Filipino,Indonesian": 0.56, + "Filipino,English": 0.56, + "Vietnamese,Chinese": 0.44, + "Vietnamese,Spanish": 0.5266666666666666, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,English": 0.5066666666666667, + "Chinese,Spanish": 0.6333333333333333, + "Chinese,Malay": 0.6, + "Chinese,Indonesian": 0.6533333333333333, + "Chinese,English": 0.66, + "Spanish,Malay": 0.64, + "Spanish,Indonesian": 0.72, + "Spanish,English": 0.7466666666666667, + "Malay,Indonesian": 0.6866666666666666, + "Malay,English": 0.6533333333333333, + "Indonesian,English": 0.74 + }, + "3_combine": { + "Filipino,Vietnamese,Chinese": 0.30666666666666664, + "Filipino,Vietnamese,Spanish": 0.34, + "Filipino,Vietnamese,Malay": 0.36, + "Filipino,Vietnamese,Indonesian": 0.35333333333333333, + "Filipino,Vietnamese,English": 0.3466666666666667, + "Filipino,Chinese,Spanish": 0.4, + "Filipino,Chinese,Malay": 0.36, + "Filipino,Chinese,Indonesian": 0.41333333333333333, + "Filipino,Chinese,English": 0.4066666666666667, + "Filipino,Spanish,Malay": 0.38666666666666666, + "Filipino,Spanish,Indonesian": 0.4533333333333333, + "Filipino,Spanish,English": 0.44666666666666666, + "Filipino,Malay,Indonesian": 0.42, + "Filipino,Malay,English": 0.3933333333333333, + "Filipino,Indonesian,English": 0.46, + "Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese,Malay": 0.37333333333333335, + "Vietnamese,Chinese,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,English": 0.36, + "Vietnamese,Spanish,Malay": 0.4266666666666667, + "Vietnamese,Spanish,Indonesian": 0.43333333333333335, + "Vietnamese,Spanish,English": 0.42, + "Vietnamese,Malay,Indonesian": 0.4533333333333333, + "Vietnamese,Malay,English": 0.43333333333333335, + "Vietnamese,Indonesian,English": 0.4266666666666667, + "Chinese,Spanish,Malay": 0.47333333333333333, + "Chinese,Spanish,Indonesian": 0.52, + "Chinese,Spanish,English": 0.5466666666666666, + "Chinese,Malay,Indonesian": 0.5, + "Chinese,Malay,English": 0.4866666666666667, + "Chinese,Indonesian,English": 0.5533333333333333, + "Spanish,Malay,Indonesian": 0.54, + "Spanish,Malay,English": 0.5333333333333333, + "Spanish,Indonesian,English": 0.62, + "Malay,Indonesian,English": 0.56 + }, + "4_combine": { + "Filipino,Vietnamese,Chinese,Spanish": 0.26, + "Filipino,Vietnamese,Chinese,Malay": 0.26666666666666666, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2733333333333333, + "Filipino,Vietnamese,Chinese,English": 0.2733333333333333, + "Filipino,Vietnamese,Spanish,Malay": 0.2866666666666667, + "Filipino,Vietnamese,Spanish,Indonesian": 0.3, + "Filipino,Vietnamese,Spanish,English": 0.28, + "Filipino,Vietnamese,Malay,Indonesian": 0.30666666666666664, + "Filipino,Vietnamese,Malay,English": 0.2866666666666667, + "Filipino,Vietnamese,Indonesian,English": 0.3, + "Filipino,Chinese,Spanish,Malay": 0.30666666666666664, + "Filipino,Chinese,Spanish,Indonesian": 0.3466666666666667, + "Filipino,Chinese,Spanish,English": 0.35333333333333333, + "Filipino,Chinese,Malay,Indonesian": 0.3333333333333333, + "Filipino,Chinese,Malay,English": 0.31333333333333335, + "Filipino,Chinese,Indonesian,English": 0.37333333333333335, + "Filipino,Spanish,Malay,Indonesian": 0.36, + "Filipino,Spanish,Malay,English": 0.34, + "Filipino,Spanish,Indonesian,English": 0.4066666666666667, + "Filipino,Malay,Indonesian,English": 0.35333333333333333, + "Vietnamese,Chinese,Spanish,Malay": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.32666666666666666, + "Vietnamese,Chinese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Malay,English": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian,English": 0.3333333333333333, + "Vietnamese,Spanish,Malay,Indonesian": 0.38, + "Vietnamese,Spanish,Malay,English": 0.38, + "Vietnamese,Spanish,Indonesian,English": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.38, + "Chinese,Spanish,Malay,Indonesian": 0.41333333333333333, + "Chinese,Spanish,Malay,English": 0.41333333333333333, + "Chinese,Spanish,Indonesian,English": 0.4666666666666667, + "Chinese,Malay,Indonesian,English": 0.43333333333333335, + "Spanish,Malay,Indonesian,English": 0.4666666666666667 + }, + "5_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian": 0.24, + "Filipino,Vietnamese,Chinese,Spanish,English": 0.24, + "Filipino,Vietnamese,Chinese,Malay,Indonesian": 0.25333333333333335, + "Filipino,Vietnamese,Chinese,Malay,English": 0.24666666666666667, + "Filipino,Vietnamese,Chinese,Indonesian,English": 0.26, + "Filipino,Vietnamese,Spanish,Malay,Indonesian": 0.26666666666666666, + "Filipino,Vietnamese,Spanish,Malay,English": 0.25333333333333335, + "Filipino,Vietnamese,Spanish,Indonesian,English": 0.26666666666666666, + "Filipino,Vietnamese,Malay,Indonesian,English": 0.26, + "Filipino,Chinese,Spanish,Malay,Indonesian": 0.2866666666666667, + "Filipino,Chinese,Spanish,Malay,English": 0.28, + "Filipino,Chinese,Spanish,Indonesian,English": 0.32666666666666666, + "Filipino,Chinese,Malay,Indonesian,English": 0.3, + "Filipino,Spanish,Malay,Indonesian,English": 0.32, + "Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Malay,English": 0.30666666666666664, + "Vietnamese,Chinese,Spanish,Indonesian,English": 0.30666666666666664, + "Vietnamese,Chinese,Malay,Indonesian,English": 0.30666666666666664, + "Vietnamese,Spanish,Malay,Indonesian,English": 0.34, + "Chinese,Spanish,Malay,Indonesian,English": 0.36666666666666664 + }, + "6_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Malay,English": 0.22666666666666666, + "Filipino,Vietnamese,Chinese,Spanish,Indonesian,English": 0.23333333333333334, + "Filipino,Vietnamese,Chinese,Malay,Indonesian,English": 0.24, + "Filipino,Vietnamese,Spanish,Malay,Indonesian,English": 0.24, + "Filipino,Chinese,Spanish,Malay,Indonesian,English": 0.26666666666666666, + "Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.2866666666666667 + }, + "7_combine": { + "Filipino,Vietnamese,Chinese,Spanish,Malay,Indonesian,English": 0.22 + } + }, + "AC3_2": 0.5552191234562055, + "AC3_3": 0.47397129181642045, + "AC3_4": 0.4135216239591574, + "AC3_5": 0.3677348065841213, + "AC3_6": 0.33373134323999776, + "AC3_7": 0.3091891891474069 + } }, "cross_logiqa": { - "prompt_1": -1 + "prompt_1": { + "overall_acc": 0.44155844155844154, + "language_acc": { + "Indonesian": 0.4715909090909091, + "English": 0.5454545454545454, + "Filipino": 0.3068181818181818, + "Spanish": 0.5056818181818182, + "Chinese": 0.4943181818181818, + "Malay": 0.44886363636363635, + "Vietnamese": 0.3181818181818182 + }, + "consistency_score_2": 0.5197510822510824, + "consistency_score_3": 0.33344155844155837, + "consistency_score_4": 0.2375, + "consistency_score_5": 0.1810064935064935, + "consistency_score_6": 0.1444805194805195, + "consistency_score_7": 0.11931818181818182, + "detailed_consistency_score": { + "2_combine": { + "Indonesian,English": 0.6647727272727273, + "Indonesian,Filipino": 0.45454545454545453, + "Indonesian,Spanish": 0.6534090909090909, + "Indonesian,Chinese": 0.5568181818181818, + "Indonesian,Malay": 0.6477272727272727, + "Indonesian,Vietnamese": 0.48295454545454547, + "English,Filipino": 0.4318181818181818, + "English,Spanish": 0.6477272727272727, + "English,Chinese": 0.6079545454545454, + "English,Malay": 0.6136363636363636, + "English,Vietnamese": 0.4147727272727273, + "Filipino,Spanish": 0.4147727272727273, + "Filipino,Chinese": 0.4090909090909091, + "Filipino,Malay": 0.4318181818181818, + "Filipino,Vietnamese": 0.4943181818181818, + "Spanish,Chinese": 0.5511363636363636, + "Spanish,Malay": 0.6022727272727273, + "Spanish,Vietnamese": 0.44886363636363635, + "Chinese,Malay": 0.5284090909090909, + "Chinese,Vietnamese": 0.39204545454545453, + "Malay,Vietnamese": 0.4659090909090909 + }, + "3_combine": { + "Indonesian,English,Filipino": 0.32954545454545453, + "Indonesian,English,Spanish": 0.5113636363636364, + "Indonesian,English,Chinese": 0.45454545454545453, + "Indonesian,English,Malay": 0.4943181818181818, + "Indonesian,English,Vietnamese": 0.32386363636363635, + "Indonesian,Filipino,Spanish": 0.3125, + "Indonesian,Filipino,Chinese": 0.2840909090909091, + "Indonesian,Filipino,Malay": 0.32386363636363635, + "Indonesian,Filipino,Vietnamese": 0.26704545454545453, + "Indonesian,Spanish,Chinese": 0.4090909090909091, + "Indonesian,Spanish,Malay": 0.48863636363636365, + "Indonesian,Spanish,Vietnamese": 0.3409090909090909, + "Indonesian,Chinese,Malay": 0.4147727272727273, + "Indonesian,Chinese,Vietnamese": 0.2727272727272727, + "Indonesian,Malay,Vietnamese": 0.32954545454545453, + "English,Filipino,Spanish": 0.30113636363636365, + "English,Filipino,Chinese": 0.3125, + "English,Filipino,Malay": 0.30113636363636365, + "English,Filipino,Vietnamese": 0.24431818181818182, + "English,Spanish,Chinese": 0.4659090909090909, + "English,Spanish,Malay": 0.4715909090909091, + "English,Spanish,Vietnamese": 0.30113636363636365, + "English,Chinese,Malay": 0.42613636363636365, + "English,Chinese,Vietnamese": 0.2727272727272727, + "English,Malay,Vietnamese": 0.29545454545454547, + "Filipino,Spanish,Chinese": 0.2556818181818182, + "Filipino,Spanish,Malay": 0.2897727272727273, + "Filipino,Spanish,Vietnamese": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.26136363636363635, + "Filipino,Chinese,Vietnamese": 0.2215909090909091, + "Filipino,Malay,Vietnamese": 0.24431818181818182, + "Spanish,Chinese,Malay": 0.3977272727272727, + "Spanish,Chinese,Vietnamese": 0.25, + "Spanish,Malay,Vietnamese": 0.30113636363636365, + "Chinese,Malay,Vietnamese": 0.2556818181818182 + }, + "4_combine": { + "Indonesian,English,Filipino,Spanish": 0.26136363636363635, + "Indonesian,English,Filipino,Chinese": 0.25, + "Indonesian,English,Filipino,Malay": 0.26136363636363635, + "Indonesian,English,Filipino,Vietnamese": 0.1875, + "Indonesian,English,Spanish,Chinese": 0.36363636363636365, + "Indonesian,English,Spanish,Malay": 0.4147727272727273, + "Indonesian,English,Spanish,Vietnamese": 0.26704545454545453, + "Indonesian,English,Chinese,Malay": 0.3693181818181818, + "Indonesian,English,Chinese,Vietnamese": 0.24431818181818182, + "Indonesian,English,Malay,Vietnamese": 0.24431818181818182, + "Indonesian,Filipino,Spanish,Chinese": 0.2215909090909091, + "Indonesian,Filipino,Spanish,Malay": 0.26136363636363635, + "Indonesian,Filipino,Spanish,Vietnamese": 0.18181818181818182, + "Indonesian,Filipino,Chinese,Malay": 0.21022727272727273, + "Indonesian,Filipino,Chinese,Vietnamese": 0.17045454545454544, + "Indonesian,Filipino,Malay,Vietnamese": 0.1875, + "Indonesian,Spanish,Chinese,Malay": 0.3409090909090909, + "Indonesian,Spanish,Chinese,Vietnamese": 0.2215909090909091, + "Indonesian,Spanish,Malay,Vietnamese": 0.2556818181818182, + "Indonesian,Chinese,Malay,Vietnamese": 0.2215909090909091, + "English,Filipino,Spanish,Chinese": 0.23863636363636365, + "English,Filipino,Spanish,Malay": 0.25, + "English,Filipino,Spanish,Vietnamese": 0.17045454545454544, + "English,Filipino,Chinese,Malay": 0.2215909090909091, + "English,Filipino,Chinese,Vietnamese": 0.18181818181818182, + "English,Filipino,Malay,Vietnamese": 0.17613636363636365, + "English,Spanish,Chinese,Malay": 0.35795454545454547, + "English,Spanish,Chinese,Vietnamese": 0.23295454545454544, + "English,Spanish,Malay,Vietnamese": 0.23863636363636365, + "English,Chinese,Malay,Vietnamese": 0.2215909090909091, + "Filipino,Spanish,Chinese,Malay": 0.19886363636363635, + "Filipino,Spanish,Chinese,Vietnamese": 0.1590909090909091, + "Filipino,Spanish,Malay,Vietnamese": 0.1590909090909091, + "Filipino,Chinese,Malay,Vietnamese": 0.17045454545454544, + "Spanish,Chinese,Malay,Vietnamese": 0.19886363636363635 + }, + "5_combine": { + "Indonesian,English,Filipino,Spanish,Chinese": 0.20454545454545456, + "Indonesian,English,Filipino,Spanish,Malay": 0.23295454545454544, + "Indonesian,English,Filipino,Spanish,Vietnamese": 0.1534090909090909, + "Indonesian,English,Filipino,Chinese,Malay": 0.19886363636363635, + "Indonesian,English,Filipino,Chinese,Vietnamese": 0.1590909090909091, + "Indonesian,English,Filipino,Malay,Vietnamese": 0.1534090909090909, + "Indonesian,English,Spanish,Chinese,Malay": 0.3181818181818182, + "Indonesian,English,Spanish,Chinese,Vietnamese": 0.21022727272727273, + "Indonesian,English,Spanish,Malay,Vietnamese": 0.2159090909090909, + "Indonesian,English,Chinese,Malay,Vietnamese": 0.20454545454545456, + "Indonesian,Filipino,Spanish,Chinese,Malay": 0.1875, + "Indonesian,Filipino,Spanish,Chinese,Vietnamese": 0.14204545454545456, + "Indonesian,Filipino,Spanish,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,Filipino,Chinese,Malay,Vietnamese": 0.14204545454545456, + "Indonesian,Spanish,Chinese,Malay,Vietnamese": 0.1875, + "English,Filipino,Spanish,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Spanish,Chinese,Vietnamese": 0.1534090909090909, + "English,Filipino,Spanish,Malay,Vietnamese": 0.13636363636363635, + "English,Filipino,Chinese,Malay,Vietnamese": 0.14772727272727273, + "English,Spanish,Chinese,Malay,Vietnamese": 0.19318181818181818, + "Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 + }, + "6_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay": 0.18181818181818182, + "Indonesian,English,Filipino,Spanish,Chinese,Vietnamese": 0.13636363636363635, + "Indonesian,English,Filipino,Spanish,Malay,Vietnamese": 0.13068181818181818, + "Indonesian,English,Filipino,Chinese,Malay,Vietnamese": 0.13636363636363635, + "Indonesian,English,Spanish,Chinese,Malay,Vietnamese": 0.18181818181818182, + "Indonesian,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182, + "English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.125 + }, + "7_combine": { + "Indonesian,English,Filipino,Spanish,Chinese,Malay,Vietnamese": 0.11931818181818182 + } + }, + "AC3_2": 0.477474678381947, + "AC3_3": 0.3799585416187732, + "AC3_4": 0.308869232562691, + "AC3_5": 0.2567601889223365, + "AC3_6": 0.2177213367977589, + "AC3_7": 0.18787001706951698 + } }, "sg_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.6504854368932039 + } }, "cn_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3238095238095238 + } }, "us_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.6542056074766355 + } }, "ph_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.42, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.4, + "history": 0.2, + "literature": 0.2, + "politics": 0.7, + "culture": 0.6, + "film": 0.4, + "law": 0.4, + "geography": 0.7 + } + } }, "sing2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.30846091455013425 + } }, "indommlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4982308565324788, + "category_acc": { + "History": 0.5160642570281124, + "Geography": 0.44693877551020406, + "Lampungic": 0.3129251700680272, + "Social science": 0.7395659432387313, + "Balinese": 0.32908704883227174, + "Makassarese": 0.41397849462365593, + "Banjarese": 0.4305555555555556, + "Chemistry": 0.30802919708029197, + "Biology": 0.46982248520710057, + "Science": 0.6099071207430341, + "Christian religion": 0.6467661691542289, + "Art": 0.6173044925124792, + "Islam religion": 0.6017069701280228, + "Hindu religion": 0.5, + "Madurese": 0.288135593220339, + "Sport": 0.5540540540540541, + "Indonesian language": 0.571917808219178, + "Physics": 0.4121212121212121, + "Minangkabau culture": 0.3768844221105528, + "Dayak language": 0.25688073394495414, + "Sociology": 0.4838709677419355, + "Economy": 0.4979508196721312, + "Sundanese": 0.3863439930855661, + "Javanese": 0.34274193548387094, + "Civic education": 0.6080114449213162 + } + } }, "flores_ind2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.37647845181633705 + } }, "flores_vie2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.22941829839366545 + } }, "flores_zho2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.2291244503731969 + } }, "flores_zsm2eng": { - "prompt_1": -1 + "prompt_1": { + "bleu_score": 0.35845911812249015 + } }, "mmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5834305717619603 + } }, "mmlu_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5736145870575616, + "category_acc": { + "high_school_european_history": 0.7317073170731707, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.625, + "medical_genetics": 0.5757575757575758, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.36666666666666664, + "high_school_world_history": 0.7457627118644068, + "virology": 0.4727272727272727, + "high_school_microeconomics": 0.620253164556962, + "econometrics": 0.3893805309734513, + "college_computer_science": 0.5050505050505051, + "high_school_biology": 0.7152103559870551, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.4234875444839858, + "philosophy": 0.6451612903225806, + "professional_medicine": 0.6383763837638377, + "nutrition": 0.6295081967213115, + "global_facts": 0.3434343434343434, + "machine_learning": 0.3963963963963964, + "security_studies": 0.6598360655737705, + "public_relations": 0.6697247706422018, + "professional_psychology": 0.5875613747954174, + "prehistory": 0.6253869969040248, + "anatomy": 0.5, + "human_sexuality": 0.7, + "college_medicine": 0.5813953488372093, + "high_school_government_and_politics": 0.8177083333333334, + "college_chemistry": 0.37373737373737376, + "logical_fallacies": 0.691358024691358, + "high_school_geography": 0.7766497461928934, + "elementary_mathematics": 0.3793103448275862, + "human_aging": 0.6531531531531531, + "college_mathematics": 0.3434343434343434, + "high_school_psychology": 0.7996323529411765, + "formal_logic": 0.44, + "high_school_statistics": 0.3767441860465116, + "international_law": 0.725, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.48717948717948717, + "miscellaneous": 0.789002557544757, + "high_school_chemistry": 0.4801980198019802, + "marketing": 0.8454935622317596, + "professional_law": 0.4272667971298108, + "management": 0.7843137254901961, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.6728971962616822, + "world_religions": 0.8058823529411765, + "sociology": 0.825, + "us_foreign_policy": 0.8383838383838383, + "high_school_macroeconomics": 0.5861182519280206, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.2796420581655481, + "moral_disputes": 0.6579710144927536, + "electrical_engineering": 0.5208333333333334, + "astronomy": 0.5894039735099338, + "college_biology": 0.6643356643356644 + } + } }, "c_eval": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4539375928677563 + } }, "c_eval_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.42714819427148193, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5, + "college_physics": 0.25, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.30952380952380953, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.2608695652173913, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.5769230769230769, + "middle_school_physics": 0.5833333333333334, + "middle_school_chemistry": 0.48, + "veterinary_medicine": 0.39285714285714285, + "college_economics": 0.5, + "business_administration": 0.42105263157894735, + "marxism": 0.5, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5, + "teacher_qualification": 0.7142857142857143, + "high_school_politics": 0.4583333333333333, + "high_school_geography": 0.4166666666666667, + "middle_school_politics": 0.5384615384615384, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.75, + "logic": 0.5925925925925926, + "law": 0.41379310344827586, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.2631578947368421, + "professional_tour_guide": 0.47058823529411764, + "legal_professional": 0.4642857142857143, + "high_school_chinese": 0.25, + "high_school_history": 0.52, + "middle_school_history": 0.5925925925925926, + "civil_servant": 0.36538461538461536, + "sports_science": 0.20833333333333334, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.4444444444444444, + "urban_and_rural_planner": 0.5294117647058824, + "accountant": 0.3888888888888889, + "fire_engineer": 0.3611111111111111, + "environmental_impact_assessment_engineer": 0.4722222222222222, + "tax_accountant": 0.3148148148148148, + "physician": 0.4444444444444444 + } + } }, "cmmlu": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.3942652329749104 + } }, "cmmlu_full": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4283370747711967, + "category_acc": { + "agronomy": 0.40236686390532544, + "anatomy": 0.2972972972972973, + "ancient_chinese": 0.23170731707317074, + "arts": 0.375, + "astronomy": 0.296969696969697, + "business_ethics": 0.5263157894736842, + "chinese_civil_service_exam": 0.39375, + "chinese_driving_rule": 0.6564885496183206, + "chinese_food_culture": 0.3161764705882353, + "chinese_foreign_policy": 0.4953271028037383, + "chinese_history": 0.4613003095975232, + "chinese_literature": 0.30392156862745096, + "chinese_teacher_qualification": 0.5251396648044693, + "clinical_knowledge": 0.35864978902953587, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.46226415094339623, + "college_law": 0.35185185185185186, + "college_mathematics": 0.2761904761904762, + "college_medical_statistics": 0.39622641509433965, + "college_medicine": 0.336996336996337, + "computer_science": 0.5049019607843137, + "computer_security": 0.5497076023391813, + "conceptual_physics": 0.3945578231292517, + "construction_project_management": 0.4316546762589928, + "economics": 0.4716981132075472, + "education": 0.5030674846625767, + "electrical_engineering": 0.4069767441860465, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.42424242424242425, + "elementary_information_and_technology": 0.6974789915966386, + "elementary_mathematics": 0.3347826086956522, + "ethnology": 0.43703703703703706, + "food_science": 0.45454545454545453, + "genetics": 0.42045454545454547, + "global_facts": 0.42953020134228187, + "high_school_biology": 0.3431952662721893, + "high_school_chemistry": 0.3181818181818182, + "high_school_geography": 0.423728813559322, + "high_school_mathematics": 0.23170731707317074, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.4125874125874126, + "human_sexuality": 0.48412698412698413, + "international_law": 0.3567567567567568, + "journalism": 0.5232558139534884, + "jurisprudence": 0.49878345498783455, + "legal_and_moral_basis": 0.7897196261682243, + "logical": 0.34959349593495936, + "machine_learning": 0.4344262295081967, + "management": 0.5142857142857142, + "marketing": 0.55, + "marxist_theory": 0.5343915343915344, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.4482758620689655, + "philosophy": 0.4666666666666667, + "professional_accounting": 0.4857142857142857, + "professional_law": 0.3459715639810427, + "professional_medicine": 0.300531914893617, + "professional_psychology": 0.44396551724137934, + "public_relations": 0.47701149425287354, + "security_study": 0.5481481481481482, + "sociology": 0.4646017699115044, + "sports_science": 0.41818181818181815, + "traditional_chinese_medicine": 0.3567567567567568, + "virology": 0.4319526627218935, + "world_history": 0.453416149068323, + "world_religions": 0.44375 + } + } }, "zbench": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.36363636363636365 + } }, "ind_emotion": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.6704545454545454 + } }, "ocnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.4471186440677966 + } }, "c3": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.7868362004487659 + } }, "dream": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.8588926996570309 + } }, "samsum": { "prompt_1": -1 @@ -129117,28 +129892,44 @@ "prompt_1": -1 }, "sst2": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.9369266055045872 + } }, "cola": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.7181208053691275 + } }, "qqp": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.6595 + } }, "mnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5855 + } }, "qnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5955 + } }, "wnli": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5633802816901409 + } }, "rte": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.6534296028880866 + } }, "mrpc": { - "prompt_1": -1 + "prompt_1": { + "accuracy": 0.5098039215686274 + } } } },