Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/cross_lingual/zero_shot/cross_logiqa.csv +2 -0
- results/cross_lingual/zero_shot/cross_mmlu.csv +2 -0
- results/cross_lingual/zero_shot/cross_xquad.csv +1 -0
- results/cultural_reasoning/zero_shot/cn_eval.csv +2 -0
- results/cultural_reasoning/zero_shot/ph_eval.csv +2 -0
- results/cultural_reasoning/zero_shot/sg_eval.csv +2 -0
- results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv +2 -0
- results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +1 -2
- results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +1 -0
- results/cultural_reasoning/zero_shot/us_eval.csv +2 -0
- results/dialogue/zero_shot/dialogsum.csv +2 -0
- results/dialogue/zero_shot/dream.csv +2 -0
- results/dialogue/zero_shot/samsum.csv +2 -0
- results/emotion/zero_shot/ind_emotion.csv +2 -0
- results/emotion/zero_shot/sst2.csv +2 -0
- results/flores_translation/zero_shot/ind2eng.csv +2 -0
- results/flores_translation/zero_shot/vie2eng.csv +2 -0
- results/flores_translation/zero_shot/zho2eng.csv +2 -0
- results/flores_translation/zero_shot/zsm2eng.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/c3.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/cola.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/mnli.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -0
- results/fundamental_nlp_tasks/zero_shot/ocnli.csv +1 -0
- results/fundamental_nlp_tasks/zero_shot/qnli.csv +1 -0
- results/fundamental_nlp_tasks/zero_shot/qqp.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/rte.csv +1 -0
- results/fundamental_nlp_tasks/zero_shot/wnli.csv +1 -0
- results/general_reasoning/zero_shot/c_eval.csv +2 -0
- results/general_reasoning/zero_shot/cmmlu.csv +1 -0
- results/general_reasoning/zero_shot/indommlu.csv +2 -0
- results/general_reasoning/zero_shot/mmlu.csv +1 -0
- results/general_reasoning/zero_shot/zbench.csv +2 -0
results/cross_lingual/zero_shot/cross_logiqa.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
Qwen2-7B-Instruct,0.564935064935065,0.48279220779220783,0.5206435955861558,0.6590909090909091,0.7045454545454546,0.5340909090909091,0.5738636363636364,0.5397727272727273,0.5113636363636364,0.4318181818181818
|
3 |
Meta-Llama-3.1-8B-Instruct,0.4472402597402597,0.43717532467532455,0.44215052105151864,0.5227272727272727,0.4602272727272727,0.4715909090909091,0.4715909090909091,0.4147727272727273,0.3977272727272727,0.39204545454545453
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.6931818181818182,0.6397727272727273,0.6654072695772988,0.7727272727272727,0.7897727272727273,0.6704545454545454,0.6761363636363636,0.6875,0.6875,0.5681818181818182
|
5 |
Qwen2_5_7B_Instruct,0.599025974025974,0.5034090909090908,0.5470709896292291,0.7102272727272727,0.7215909090909091,0.6136363636363636,0.6022727272727273,0.5738636363636364,0.5511363636363636,0.42045454545454547
|
6 |
Qwen2_5_1_5B_Instruct,0.46834415584415584,0.348538961038961,0.3996561615557665,0.5511363636363636,0.5909090909090909,0.4659090909090909,0.5113636363636364,0.4375,0.375,0.3465909090909091
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
Qwen2-7B-Instruct,0.564935064935065,0.48279220779220783,0.5206435955861558,0.6590909090909091,0.7045454545454546,0.5340909090909091,0.5738636363636364,0.5397727272727273,0.5113636363636364,0.4318181818181818
|
3 |
Meta-Llama-3.1-8B-Instruct,0.4472402597402597,0.43717532467532455,0.44215052105151864,0.5227272727272727,0.4602272727272727,0.4715909090909091,0.4715909090909091,0.4147727272727273,0.3977272727272727,0.39204545454545453
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.49918831168831174,0.45974025974025984,0.4786528859163277,0.5454545454545454,0.5340909090909091,0.5340909090909091,0.5340909090909091,0.5,0.45454545454545453,0.39204545454545453
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.5844155844155844,0.5899350649350649,0.5871623537698057,0.6534090909090909,0.6136363636363636,0.5625,0.5795454545454546,0.5795454545454546,0.5738636363636364,0.5284090909090909
|
6 |
Qwen2_5_32B_Instruct,0.6931818181818182,0.6397727272727273,0.6654072695772988,0.7727272727272727,0.7897727272727273,0.6704545454545454,0.6761363636363636,0.6875,0.6875,0.5681818181818182
|
7 |
Qwen2_5_7B_Instruct,0.599025974025974,0.5034090909090908,0.5470709896292291,0.7102272727272727,0.7215909090909091,0.6136363636363636,0.6022727272727273,0.5738636363636364,0.5511363636363636,0.42045454545454547
|
8 |
Qwen2_5_1_5B_Instruct,0.46834415584415584,0.348538961038961,0.3996561615557665,0.5511363636363636,0.5909090909090909,0.4659090909090909,0.5113636363636364,0.4375,0.375,0.3465909090909091
|
results/cross_lingual/zero_shot/cross_mmlu.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
Qwen2-7B-Instruct,0.6628571428571428,0.5257142857142858,0.5863736263242921,0.76,0.6666666666666666,0.72,0.5933333333333334,0.7066666666666667,0.6133333333333333,0.58
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5619047619047618,0.5020952380952383,0.5303189947159841,0.66,0.5266666666666666,0.5733333333333334,0.5266666666666666,0.5533333333333333,0.5533333333333333,0.54
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8019047619047619,0.7386666666666668,0.7689878008073214,0.8533333333333334,0.8533333333333334,0.82,0.7933333333333333,0.8,0.7866666666666666,0.7066666666666667
|
5 |
Qwen2_5_7B_Instruct,0.6733333333333332,0.580952380952381,0.6237408250578389,0.7666666666666667,0.7066666666666667,0.72,0.6666666666666666,0.6866666666666666,0.6266666666666667,0.54
|
6 |
Qwen2_5_1_5B_Instruct,0.5076190476190475,0.3721904761904762,0.42948154099799957,0.6,0.6066666666666667,0.5333333333333333,0.4866666666666667,0.5666666666666667,0.4,0.36
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
Qwen2-7B-Instruct,0.6628571428571428,0.5257142857142858,0.5863736263242921,0.76,0.6666666666666666,0.72,0.5933333333333334,0.7066666666666667,0.6133333333333333,0.58
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5619047619047618,0.5020952380952383,0.5303189947159841,0.66,0.5266666666666666,0.5733333333333334,0.5266666666666666,0.5533333333333333,0.5533333333333333,0.54
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.6114285714285713,0.5478095238095239,0.5778733392299966,0.72,0.6,0.6066666666666667,0.62,0.6466666666666666,0.56,0.5266666666666666
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.7114285714285715,0.7266666666666667,0.7189668873672241,0.7666666666666667,0.72,0.72,0.66,0.72,0.7266666666666667,0.6666666666666666
|
6 |
Qwen2_5_32B_Instruct,0.8019047619047619,0.7386666666666668,0.7689878008073214,0.8533333333333334,0.8533333333333334,0.82,0.7933333333333333,0.8,0.7866666666666666,0.7066666666666667
|
7 |
Qwen2_5_7B_Instruct,0.6733333333333332,0.580952380952381,0.6237408250578389,0.7666666666666667,0.7066666666666667,0.72,0.6666666666666666,0.6866666666666666,0.6266666666666667,0.54
|
8 |
Qwen2_5_1_5B_Instruct,0.5076190476190475,0.3721904761904762,0.42948154099799957,0.6,0.6066666666666667,0.5333333333333333,0.4866666666666667,0.5666666666666667,0.4,0.36
|
results/cross_lingual/zero_shot/cross_xquad.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
Qwen2-7B-Instruct,0.9418067226890756,0.9046218487394958,0.9228398561109394,0.957983193277311,0.9336134453781513,0.9436974789915966,0.9319327731092437,,,
|
3 |
Meta-Llama-3.1-8B-Instruct,0.9287815126050419,0.8867647058823529,0.9072869161050563,0.9420168067226891,0.9193277310924369,0.9361344537815126,0.9176470588235294,,,
|
|
|
4 |
Qwen2_5_7B_Instruct,0.9460084033613445,0.9178571428571428,0.9317201790045005,0.9554621848739496,0.9487394957983193,0.9445378151260504,0.9352941176470588,,,
|
5 |
Qwen2_5_1_5B_Instruct,0.8939075630252101,0.8308823529411764,0.8612434620121144,0.9100840336134454,0.9,0.8957983193277311,0.8697478991596639,,,
|
6 |
Qwen2-72B-Instruct,0.9613445378151261,0.9516806722689075,0.956488195931227,0.9638655462184874,0.9596638655462185,0.9596638655462185,0.9621848739495799,,,
|
|
|
1 |
Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
|
2 |
Qwen2-7B-Instruct,0.9418067226890756,0.9046218487394958,0.9228398561109394,0.957983193277311,0.9336134453781513,0.9436974789915966,0.9319327731092437,,,
|
3 |
Meta-Llama-3.1-8B-Instruct,0.9287815126050419,0.8867647058823529,0.9072869161050563,0.9420168067226891,0.9193277310924369,0.9361344537815126,0.9176470588235294,,,
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.9594537815126051,0.944327731092437,0.9518306660058761,0.9663865546218487,0.9546218487394958,0.9605042016806723,0.9563025210084034,,,
|
5 |
Qwen2_5_7B_Instruct,0.9460084033613445,0.9178571428571428,0.9317201790045005,0.9554621848739496,0.9487394957983193,0.9445378151260504,0.9352941176470588,,,
|
6 |
Qwen2_5_1_5B_Instruct,0.8939075630252101,0.8308823529411764,0.8612434620121144,0.9100840336134454,0.9,0.8957983193277311,0.8697478991596639,,,
|
7 |
Qwen2-72B-Instruct,0.9613445378151261,0.9516806722689075,0.956488195931227,0.9638655462184874,0.9596638655462185,0.9596638655462185,0.9621848739495799,,,
|
results/cultural_reasoning/zero_shot/cn_eval.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.8285714285714286
|
3 |
Meta-Llama-3.1-8B-Instruct,0.4857142857142857
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8476190476190476
|
5 |
Qwen2_5_7B_Instruct,0.8
|
6 |
Qwen2_5_1_5B_Instruct,0.5523809523809524
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.8285714285714286
|
3 |
Meta-Llama-3.1-8B-Instruct,0.4857142857142857
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.5047619047619047
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.5523809523809524
|
6 |
Qwen2_5_32B_Instruct,0.8476190476190476
|
7 |
Qwen2_5_7B_Instruct,0.8
|
8 |
Qwen2_5_1_5B_Instruct,0.5523809523809524
|
results/cultural_reasoning/zero_shot/ph_eval.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.52
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.7
|
5 |
Qwen2_5_7B_Instruct,0.55
|
6 |
Qwen2_5_1_5B_Instruct,0.37
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.52
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.56
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.6
|
6 |
Qwen2_5_32B_Instruct,0.7
|
7 |
Qwen2_5_7B_Instruct,0.55
|
8 |
Qwen2_5_1_5B_Instruct,0.37
|
results/cultural_reasoning/zero_shot/sg_eval.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6796116504854369
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5728155339805825
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.7184466019417476
|
5 |
Qwen2_5_7B_Instruct,0.6699029126213593
|
6 |
Qwen2_5_1_5B_Instruct,0.5048543689320388
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6796116504854369
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5728155339805825
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.6601941747572816
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.6893203883495146
|
6 |
Qwen2_5_32B_Instruct,0.7184466019417476
|
7 |
Qwen2_5_7B_Instruct,0.6699029126213593
|
8 |
Qwen2_5_1_5B_Instruct,0.5048543689320388
|
results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6323529411764706
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5294117647058824
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.6470588235294118
|
5 |
Qwen2_5_7B_Instruct,0.5882352941176471
|
6 |
Qwen2_5_1_5B_Instruct,0.47058823529411764
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6323529411764706
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5294117647058824
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.6617647058823529
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.6176470588235294
|
6 |
Qwen2_5_32B_Instruct,0.6470588235294118
|
7 |
Qwen2_5_7B_Instruct,0.5882352941176471
|
8 |
Qwen2_5_1_5B_Instruct,0.47058823529411764
|
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv
CHANGED
@@ -2,6 +2,7 @@ Model,Accuracy
|
|
2 |
Qwen2-7B-Instruct,0.7872727272727272
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7854545454545454
|
4 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.7836363636363637
|
|
|
5 |
Qwen2_5_32B_Instruct,0.8436363636363636
|
6 |
Qwen2_5_7B_Instruct,0.78
|
7 |
Qwen2_5_1_5B_Instruct,0.6636363636363637
|
@@ -18,7 +19,6 @@ gemma-2-9b-it,0.8036363636363636
|
|
18 |
Meta-Llama-3-70B-Instruct,0.8381818181818181
|
19 |
Qwen2_5_14B_Instruct,0.8345454545454546
|
20 |
sg_llama3_8192_8b,0.76
|
21 |
-
Meta-Llama-3.1-70B,0.44181818181818183
|
22 |
sg_llama3_70b_inst,0.8436363636363636
|
23 |
cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
|
24 |
gemma-2-2b-it,0.7163636363636363
|
@@ -29,4 +29,3 @@ GPT4o_0513,0.8709090909090909
|
|
29 |
cross_openhermes_llama3_8b_8192_inst,0.78
|
30 |
cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
|
31 |
cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
|
32 |
-
Meta-Llama-3.1-8B,0.4381818181818182
|
|
|
2 |
Qwen2-7B-Instruct,0.7872727272727272
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7854545454545454
|
4 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.7836363636363637
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.8327272727272728
|
6 |
Qwen2_5_32B_Instruct,0.8436363636363636
|
7 |
Qwen2_5_7B_Instruct,0.78
|
8 |
Qwen2_5_1_5B_Instruct,0.6636363636363637
|
|
|
19 |
Meta-Llama-3-70B-Instruct,0.8381818181818181
|
20 |
Qwen2_5_14B_Instruct,0.8345454545454546
|
21 |
sg_llama3_8192_8b,0.76
|
|
|
22 |
sg_llama3_70b_inst,0.8436363636363636
|
23 |
cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
|
24 |
gemma-2-2b-it,0.7163636363636363
|
|
|
29 |
cross_openhermes_llama3_8b_8192_inst,0.78
|
30 |
cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
|
31 |
cross_openhermes_llama3_8b_4096_2_inst,0.7654545454545455
|
|
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv
CHANGED
@@ -2,6 +2,7 @@ Model,Accuracy
|
|
2 |
Qwen2-7B-Instruct,56.559999999999995
|
3 |
Meta-Llama-3.1-8B-Instruct,49.2
|
4 |
llama3-8b-cpt-sea-lionv2.1-instruct,50.03999999999999
|
|
|
5 |
Qwen2_5_32B_Instruct,53.2
|
6 |
Qwen2_5_7B_Instruct,50.279999999999994
|
7 |
Qwen2_5_1_5B_Instruct,44.480000000000004
|
|
|
2 |
Qwen2-7B-Instruct,56.559999999999995
|
3 |
Meta-Llama-3.1-8B-Instruct,49.2
|
4 |
llama3-8b-cpt-sea-lionv2.1-instruct,50.03999999999999
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,49.2
|
6 |
Qwen2_5_32B_Instruct,53.2
|
7 |
Qwen2_5_7B_Instruct,50.279999999999994
|
8 |
Qwen2_5_1_5B_Instruct,44.480000000000004
|
results/cultural_reasoning/zero_shot/us_eval.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7289719626168224
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7289719626168224
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8411214953271028
|
5 |
Qwen2_5_7B_Instruct,0.7663551401869159
|
6 |
Qwen2_5_1_5B_Instruct,0.5981308411214953
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7289719626168224
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7289719626168224
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.7102803738317757
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.794392523364486
|
6 |
Qwen2_5_32B_Instruct,0.8411214953271028
|
7 |
Qwen2_5_7B_Instruct,0.7663551401869159
|
8 |
Qwen2_5_1_5B_Instruct,0.5981308411214953
|
results/dialogue/zero_shot/dialogsum.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
2 |
Qwen2-7B-Instruct,0.2092663759873139,0.30486100228371826,0.09413830506038247,0.22879982061784096
|
3 |
Meta-Llama-3.1-8B-Instruct,0.24990743661648132,0.3515557454075673,0.12563120411564133,0.2725353603262354
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.2393912015484827,0.3451081398022419,0.11160543395371676,0.26146003088948944
|
5 |
Qwen2_5_7B_Instruct,0.2502928721533066,0.35566069744050016,0.12210269253668227,0.27311522648273734
|
6 |
Qwen2_5_1_5B_Instruct,0.20263242988485167,0.30002072253966694,0.08416670238558713,0.22370986472930096
|
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
2 |
Qwen2-7B-Instruct,0.2092663759873139,0.30486100228371826,0.09413830506038247,0.22879982061784096
|
3 |
Meta-Llama-3.1-8B-Instruct,0.24990743661648132,0.3515557454075673,0.12563120411564133,0.2725353603262354
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.2538187048721643,0.3556160487203703,0.12835761178300684,0.27748245411311584
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.2604699718482717,0.3565321533422258,0.1435636503831484,0.2813141118194409
|
6 |
Qwen2_5_32B_Instruct,0.2393912015484827,0.3451081398022419,0.11160543395371676,0.26146003088948944
|
7 |
Qwen2_5_7B_Instruct,0.2502928721533066,0.35566069744050016,0.12210269253668227,0.27311522648273734
|
8 |
Qwen2_5_1_5B_Instruct,0.20263242988485167,0.30002072253966694,0.08416670238558713,0.22370986472930096
|
results/dialogue/zero_shot/dream.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.9353258206761391
|
3 |
Meta-Llama-3.1-8B-Instruct,0.9039686428221461
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.9559039686428221
|
5 |
Qwen2_5_7B_Instruct,0.9348358647721705
|
6 |
Qwen2_5_1_5B_Instruct,0.8314551690347869
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.9353258206761391
|
3 |
Meta-Llama-3.1-8B-Instruct,0.9039686428221461
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.8838804507594317
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.9338559529642332
|
6 |
Qwen2_5_32B_Instruct,0.9559039686428221
|
7 |
Qwen2_5_7B_Instruct,0.9348358647721705
|
8 |
Qwen2_5_1_5B_Instruct,0.8314551690347869
|
results/dialogue/zero_shot/samsum.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
2 |
Qwen2-7B-Instruct,0.25668781132950264,0.36375948458827556,0.12939804942125302,0.27690589997897935
|
3 |
Meta-Llama-3.1-8B-Instruct,0.2891505262763006,0.4001228010515775,0.15677431231732958,0.31055446545999466
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.2844232627209405,0.3986263552639068,0.14766658533002341,0.3069768475688912
|
5 |
Qwen2_5_7B_Instruct,0.2987576845890178,0.4163299367235864,0.1599063413842216,0.32003677565924527
|
6 |
Qwen2_5_1_5B_Instruct,0.2333120091694482,0.34339111721032756,0.10195887716459845,0.25458603313341865
|
|
|
1 |
Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
|
2 |
Qwen2-7B-Instruct,0.25668781132950264,0.36375948458827556,0.12939804942125302,0.27690589997897935
|
3 |
Meta-Llama-3.1-8B-Instruct,0.2891505262763006,0.4001228010515775,0.15677431231732958,0.31055446545999466
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.3049906423685726,0.42002411403511675,0.16877419641049218,0.32617361666010874
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.31654854892297785,0.42724204522960013,0.18677459831281892,0.3356290032265146
|
6 |
Qwen2_5_32B_Instruct,0.2844232627209405,0.3986263552639068,0.14766658533002341,0.3069768475688912
|
7 |
Qwen2_5_7B_Instruct,0.2987576845890178,0.4163299367235864,0.1599063413842216,0.32003677565924527
|
8 |
Qwen2_5_1_5B_Instruct,0.2333120091694482,0.34339111721032756,0.10195887716459845,0.25458603313341865
|
results/emotion/zero_shot/ind_emotion.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6545454545454545
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6545454545454545
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.6909090909090909
|
5 |
Qwen2_5_7B_Instruct,0.6636363636363637
|
6 |
Qwen2_5_1_5B_Instruct,0.5795454545454546
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6545454545454545
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6545454545454545
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.6681818181818182
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.759090909090909
|
6 |
Qwen2_5_32B_Instruct,0.6909090909090909
|
7 |
Qwen2_5_7B_Instruct,0.6636363636363637
|
8 |
Qwen2_5_1_5B_Instruct,0.5795454545454546
|
results/emotion/zero_shot/sst2.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.9346330275229358
|
3 |
Meta-Llama-3.1-8B-Instruct,0.8646788990825688
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.9472477064220184
|
5 |
Qwen2_5_7B_Instruct,0.9254587155963303
|
6 |
Qwen2_5_1_5B_Instruct,0.9231651376146789
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.9346330275229358
|
3 |
Meta-Llama-3.1-8B-Instruct,0.8646788990825688
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.9174311926605505
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.9403669724770642
|
6 |
Qwen2_5_32B_Instruct,0.9472477064220184
|
7 |
Qwen2_5_7B_Instruct,0.9254587155963303
|
8 |
Qwen2_5_1_5B_Instruct,0.9231651376146789
|
results/flores_translation/zero_shot/ind2eng.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.29408553325533265
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3765752579792989
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.3923422946746861
|
5 |
Qwen2_5_7B_Instruct,0.36472669481333536
|
6 |
Qwen2_5_1_5B_Instruct,0.2624938515155373
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.29408553325533265
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3765752579792989
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.38890283520513874
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.4036192393932773
|
6 |
Qwen2_5_32B_Instruct,0.3923422946746861
|
7 |
Qwen2_5_7B_Instruct,0.36472669481333536
|
8 |
Qwen2_5_1_5B_Instruct,0.2624938515155373
|
results/flores_translation/zero_shot/vie2eng.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.24106736560355876
|
3 |
Meta-Llama-3.1-8B-Instruct,0.31019605539004524
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.33791529833420336
|
5 |
Qwen2_5_7B_Instruct,0.3027564749728372
|
6 |
Qwen2_5_1_5B_Instruct,0.21935649300365245
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.24106736560355876
|
3 |
Meta-Llama-3.1-8B-Instruct,0.31019605539004524
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.32831099820283755
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.33299539463896577
|
6 |
Qwen2_5_32B_Instruct,0.33791529833420336
|
7 |
Qwen2_5_7B_Instruct,0.3027564749728372
|
8 |
Qwen2_5_1_5B_Instruct,0.21935649300365245
|
results/flores_translation/zero_shot/zho2eng.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.2113761361724575
|
3 |
Meta-Llama-3.1-8B-Instruct,0.23889886925287113
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.26924811164378015
|
5 |
Qwen2_5_7B_Instruct,0.2437311220019033
|
6 |
Qwen2_5_1_5B_Instruct,0.18420680441018222
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.2113761361724575
|
3 |
Meta-Llama-3.1-8B-Instruct,0.23889886925287113
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.2378480031503388
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.26203143663551526
|
6 |
Qwen2_5_32B_Instruct,0.26924811164378015
|
7 |
Qwen2_5_7B_Instruct,0.2437311220019033
|
8 |
Qwen2_5_1_5B_Instruct,0.18420680441018222
|
results/flores_translation/zero_shot/zsm2eng.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.28031997065822994
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3700921225177551
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.40310877536446654
|
5 |
Qwen2_5_7B_Instruct,0.3466422765302921
|
6 |
Qwen2_5_1_5B_Instruct,0.22890805100949677
|
|
|
1 |
Model,BLEU
|
2 |
Qwen2-7B-Instruct,0.28031997065822994
|
3 |
Meta-Llama-3.1-8B-Instruct,0.3700921225177551
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.39042133634273773
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.4085009869473552
|
6 |
Qwen2_5_32B_Instruct,0.40310877536446654
|
7 |
Qwen2_5_7B_Instruct,0.3466422765302921
|
8 |
Qwen2_5_1_5B_Instruct,0.22890805100949677
|
results/fundamental_nlp_tasks/zero_shot/c3.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.9244577412116679
|
3 |
Meta-Llama-3.1-8B-Instruct,0.8672400897531788
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.9603590127150337
|
5 |
Qwen2_5_7B_Instruct,0.9121166791323859
|
6 |
Qwen2_5_1_5B_Instruct,0.793941660433807
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.9244577412116679
|
3 |
Meta-Llama-3.1-8B-Instruct,0.8672400897531788
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.8676140613313388
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.9083769633507853
|
6 |
Qwen2_5_32B_Instruct,0.9603590127150337
|
7 |
Qwen2_5_7B_Instruct,0.9121166791323859
|
8 |
Qwen2_5_1_5B_Instruct,0.793941660433807
|
results/fundamental_nlp_tasks/zero_shot/cola.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7871524448705657
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6673058485139022
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8427612655800575
|
5 |
Qwen2_5_7B_Instruct,0.7909875359539789
|
6 |
Qwen2_5_1_5B_Instruct,0.7497603068072867
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7871524448705657
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6673058485139022
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.613614573346117
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.7698945349952061
|
6 |
Qwen2_5_32B_Instruct,0.8427612655800575
|
7 |
Qwen2_5_7B_Instruct,0.7909875359539789
|
8 |
Qwen2_5_1_5B_Instruct,0.7497603068072867
|
results/fundamental_nlp_tasks/zero_shot/mnli.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7295
|
3 |
Meta-Llama-3.1-8B-Instruct,0.4825
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8715
|
5 |
Qwen2_5_7B_Instruct,0.8105
|
6 |
Qwen2_5_1_5B_Instruct,0.6045
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7295
|
3 |
Meta-Llama-3.1-8B-Instruct,0.4825
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.5775
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.682
|
6 |
Qwen2_5_32B_Instruct,0.8715
|
7 |
Qwen2_5_7B_Instruct,0.8105
|
8 |
Qwen2_5_1_5B_Instruct,0.6045
|
results/fundamental_nlp_tasks/zero_shot/mrpc.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7867647058823529
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6740196078431373
|
|
|
4 |
Qwen2_5_32B_Instruct,0.7745098039215687
|
5 |
Qwen2_5_7B_Instruct,0.7058823529411765
|
6 |
Qwen2_5_1_5B_Instruct,0.6838235294117647
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7867647058823529
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6740196078431373
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.7279411764705882
|
5 |
Qwen2_5_32B_Instruct,0.7745098039215687
|
6 |
Qwen2_5_7B_Instruct,0.7058823529411765
|
7 |
Qwen2_5_1_5B_Instruct,0.6838235294117647
|
results/fundamental_nlp_tasks/zero_shot/ocnli.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6542372881355932
|
3 |
Meta-Llama-3.1-8B-Instruct,0.40983050847457625
|
|
|
4 |
Qwen2_5_32B_Instruct,0.7742372881355932
|
5 |
Qwen2_5_7B_Instruct,0.6732203389830509
|
6 |
Qwen2_5_1_5B_Instruct,0.5135593220338983
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.6542372881355932
|
3 |
Meta-Llama-3.1-8B-Instruct,0.40983050847457625
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.5732203389830508
|
5 |
Qwen2_5_32B_Instruct,0.7742372881355932
|
6 |
Qwen2_5_7B_Instruct,0.6732203389830509
|
7 |
Qwen2_5_1_5B_Instruct,0.5135593220338983
|
results/fundamental_nlp_tasks/zero_shot/qnli.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.8154859967051071
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5777045579352005
|
|
|
4 |
Qwen2_5_32B_Instruct,0.9062786015010068
|
5 |
Qwen2_5_7B_Instruct,0.8652754896576972
|
6 |
Qwen2_5_1_5B_Instruct,0.6148636280431997
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.8154859967051071
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5777045579352005
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.8943803770821893
|
5 |
Qwen2_5_32B_Instruct,0.9062786015010068
|
6 |
Qwen2_5_7B_Instruct,0.8652754896576972
|
7 |
Qwen2_5_1_5B_Instruct,0.6148636280431997
|
results/fundamental_nlp_tasks/zero_shot/qqp.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.781
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5645
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8315
|
5 |
Qwen2_5_7B_Instruct,0.76
|
6 |
Qwen2_5_1_5B_Instruct,0.731
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.781
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5645
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.625
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.7875
|
6 |
Qwen2_5_32B_Instruct,0.8315
|
7 |
Qwen2_5_7B_Instruct,0.76
|
8 |
Qwen2_5_1_5B_Instruct,0.731
|
results/fundamental_nlp_tasks/zero_shot/rte.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.8231046931407943
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6750902527075813
|
|
|
4 |
Qwen2_5_32B_Instruct,0.9097472924187726
|
5 |
Qwen2_5_7B_Instruct,0.8592057761732852
|
6 |
Qwen2_5_1_5B_Instruct,0.703971119133574
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.8231046931407943
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6750902527075813
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.7256317689530686
|
5 |
Qwen2_5_32B_Instruct,0.9097472924187726
|
6 |
Qwen2_5_7B_Instruct,0.8592057761732852
|
7 |
Qwen2_5_1_5B_Instruct,0.703971119133574
|
results/fundamental_nlp_tasks/zero_shot/wnli.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7183098591549296
|
3 |
Meta-Llama-3.1-8B-Instruct,0.49295774647887325
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8732394366197183
|
5 |
Qwen2_5_7B_Instruct,0.7605633802816901
|
6 |
Qwen2_5_1_5B_Instruct,0.4647887323943662
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7183098591549296
|
3 |
Meta-Llama-3.1-8B-Instruct,0.49295774647887325
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.647887323943662
|
5 |
Qwen2_5_32B_Instruct,0.8732394366197183
|
6 |
Qwen2_5_7B_Instruct,0.7605633802816901
|
7 |
Qwen2_5_1_5B_Instruct,0.4647887323943662
|
results/general_reasoning/zero_shot/c_eval.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7615193026151931
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5149439601494396
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8262764632627646
|
5 |
Qwen2_5_7B_Instruct,0.7459526774595268
|
6 |
Qwen2_5_1_5B_Instruct,0.5971357409713575
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7615193026151931
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5149439601494396
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.50186799501868
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.5560398505603985
|
6 |
Qwen2_5_32B_Instruct,0.8262764632627646
|
7 |
Qwen2_5_7B_Instruct,0.7459526774595268
|
8 |
Qwen2_5_1_5B_Instruct,0.5971357409713575
|
results/general_reasoning/zero_shot/cmmlu.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7727508202383008
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5246934898981178
|
|
|
4 |
Qwen2_5_32B_Instruct,0.8273182524607149
|
5 |
Qwen2_5_7B_Instruct,0.7486617164565705
|
6 |
Qwen2_5_1_5B_Instruct,0.5975651873596961
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7727508202383008
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5246934898981178
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.5615610429977551
|
5 |
Qwen2_5_32B_Instruct,0.8273182524607149
|
6 |
Qwen2_5_7B_Instruct,0.7486617164565705
|
7 |
Qwen2_5_1_5B_Instruct,0.5975651873596961
|
results/general_reasoning/zero_shot/indommlu.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.5385539755657921
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5252687095266707
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.6314840777087923
|
5 |
Qwen2_5_7B_Instruct,0.5600507376994459
|
6 |
Qwen2_5_1_5B_Instruct,0.4295346818879765
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.5385539755657921
|
3 |
Meta-Llama-3.1-8B-Instruct,0.5252687095266707
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.5269377127979171
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.6051805861539489
|
6 |
Qwen2_5_32B_Instruct,0.6314840777087923
|
7 |
Qwen2_5_7B_Instruct,0.5600507376994459
|
8 |
Qwen2_5_1_5B_Instruct,0.4295346818879765
|
results/general_reasoning/zero_shot/mmlu.csv
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.672506256703611
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6037182695745441
|
|
|
4 |
Qwen2_5_32B_Instruct,0.7996424740793707
|
5 |
Qwen2_5_7B_Instruct,0.6935287808366106
|
6 |
Qwen2_5_1_5B_Instruct,0.5646764390418305
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.672506256703611
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6037182695745441
|
4 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.7028244547729711
|
5 |
Qwen2_5_32B_Instruct,0.7996424740793707
|
6 |
Qwen2_5_7B_Instruct,0.6935287808366106
|
7 |
Qwen2_5_1_5B_Instruct,0.5646764390418305
|
results/general_reasoning/zero_shot/zbench.csv
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7272727272727273
|
3 |
Meta-Llama-3.1-8B-Instruct,0.42424242424242425
|
|
|
|
|
4 |
Qwen2_5_32B_Instruct,0.6060606060606061
|
5 |
Qwen2_5_7B_Instruct,0.6666666666666666
|
6 |
Qwen2_5_1_5B_Instruct,0.42424242424242425
|
|
|
1 |
Model,Accuracy
|
2 |
Qwen2-7B-Instruct,0.7272727272727273
|
3 |
Meta-Llama-3.1-8B-Instruct,0.42424242424242425
|
4 |
+
llama3-8b-cpt-sea-lionv2.1-instruct,0.2727272727272727
|
5 |
+
Gemma-2-9b-it-sg-ultrachat-sft,0.36363636363636365
|
6 |
Qwen2_5_32B_Instruct,0.6060606060606061
|
7 |
Qwen2_5_7B_Instruct,0.6666666666666666
|
8 |
Qwen2_5_1_5B_Instruct,0.42424242424242425
|