zhuohan-7 commited on
Commit
4c054d2
1 Parent(s): 41ef209

Upload folder using huggingface_hub

Browse files
Files changed (32) hide show
  1. results/cross_lingual/zero_shot/cross_logiqa.csv +1 -0
  2. results/cross_lingual/zero_shot/cross_mmlu.csv +1 -0
  3. results/cross_lingual/zero_shot/cross_xquad.csv +1 -0
  4. results/cultural_reasoning/zero_shot/cn_eval.csv +1 -0
  5. results/cultural_reasoning/zero_shot/ph_eval.csv +1 -0
  6. results/cultural_reasoning/zero_shot/sg_eval.csv +1 -0
  7. results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv +2 -1
  8. results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +2 -0
  9. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +3 -0
  10. results/cultural_reasoning/zero_shot/us_eval.csv +1 -0
  11. results/dialogue/zero_shot/dialogsum.csv +1 -0
  12. results/dialogue/zero_shot/dream.csv +1 -0
  13. results/dialogue/zero_shot/samsum.csv +1 -0
  14. results/emotion/zero_shot/ind_emotion.csv +1 -0
  15. results/emotion/zero_shot/sst2.csv +1 -0
  16. results/flores_translation/zero_shot/ind2eng.csv +1 -0
  17. results/flores_translation/zero_shot/vie2eng.csv +1 -0
  18. results/flores_translation/zero_shot/zho2eng.csv +1 -0
  19. results/flores_translation/zero_shot/zsm2eng.csv +1 -0
  20. results/fundamental_nlp_tasks/zero_shot/c3.csv +1 -0
  21. results/fundamental_nlp_tasks/zero_shot/cola.csv +1 -0
  22. results/fundamental_nlp_tasks/zero_shot/mnli.csv +1 -0
  23. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -0
  24. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +1 -0
  25. results/fundamental_nlp_tasks/zero_shot/qnli.csv +1 -0
  26. results/fundamental_nlp_tasks/zero_shot/rte.csv +1 -0
  27. results/fundamental_nlp_tasks/zero_shot/wnli.csv +1 -0
  28. results/general_reasoning/zero_shot/c_eval.csv +1 -0
  29. results/general_reasoning/zero_shot/cmmlu.csv +1 -0
  30. results/general_reasoning/zero_shot/indommlu.csv +1 -0
  31. results/general_reasoning/zero_shot/mmlu.csv +1 -4
  32. results/general_reasoning/zero_shot/zbench.csv +1 -0
results/cross_lingual/zero_shot/cross_logiqa.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.4496753246753246,0.49188311688311687,0.46
9
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
10
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
11
  Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
 
12
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
13
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
14
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
 
9
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
10
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
11
  Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.6542207792207791,0.5862012987012987,0.618346089144296,0.7329545454545454,0.7045454545454546,0.6477272727272727,0.6420454545454546,0.625,0.6647727272727273,0.5625
13
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
14
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
15
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
results/cross_lingual/zero_shot/cross_mmlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.5523809523809523,0.5384761904761904,0.545
9
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
11
  Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
 
12
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
13
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
14
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
 
9
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
11
  Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.7466666666666667,0.7828571428571428,0.7643337483933655,0.8,0.72,0.7333333333333333,0.72,0.7866666666666666,0.7466666666666667,0.72
13
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
14
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
15
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
results/cross_lingual/zero_shot/cross_xquad.csv CHANGED
@@ -8,6 +8,7 @@ cross_openhermes_llama3_8b_4096_inst,0.9052521008403361,0.8705882352941177,0.887
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
9
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
10
  Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
 
11
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
12
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
13
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
 
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
9
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
10
  Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
11
+ cross_openhermes_llama3_70b_4096_inst_2,0.959873949579832,0.9579831932773109,0.9589276393593623,0.965546218487395,0.9554621848739496,0.9621848739495799,0.9563025210084034,,,
12
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
13
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
14
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.47619047619047616
9
  Meta-Llama-3-8B-Instruct,0.4666666666666667
10
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
11
  Qwen2_5_3B_Instruct,0.7142857142857143
 
12
  SeaLLMs-v3-7B-Chat,0.819047619047619
13
  Qwen2_5_72B_Instruct,0.8761904761904762
14
  gemma-2-9b-it,0.580952380952381
 
9
  Meta-Llama-3-8B-Instruct,0.4666666666666667
10
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
11
  Qwen2_5_3B_Instruct,0.7142857142857143
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.638095238095238
13
  SeaLLMs-v3-7B-Chat,0.819047619047619
14
  Qwen2_5_72B_Instruct,0.8761904761904762
15
  gemma-2-9b-it,0.580952380952381
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.47
9
  Meta-Llama-3-8B-Instruct,0.58
10
  Meta-Llama-3.1-70B-Instruct,0.68
11
  Qwen2_5_3B_Instruct,0.4
 
12
  SeaLLMs-v3-7B-Chat,0.47
13
  Qwen2_5_72B_Instruct,0.72
14
  gemma-2-9b-it,0.58
 
9
  Meta-Llama-3-8B-Instruct,0.58
10
  Meta-Llama-3.1-70B-Instruct,0.68
11
  Qwen2_5_3B_Instruct,0.4
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.66
13
  SeaLLMs-v3-7B-Chat,0.47
14
  Qwen2_5_72B_Instruct,0.72
15
  gemma-2-9b-it,0.58
results/cultural_reasoning/zero_shot/sg_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6019417475728155
9
  Meta-Llama-3-8B-Instruct,0.6504854368932039
10
  Meta-Llama-3.1-70B-Instruct,0.7184466019417476
11
  Qwen2_5_3B_Instruct,0.6310679611650486
 
12
  SeaLLMs-v3-7B-Chat,0.7184466019417476
13
  Qwen2_5_72B_Instruct,0.7669902912621359
14
  gemma-2-9b-it,0.6699029126213593
 
9
  Meta-Llama-3-8B-Instruct,0.6504854368932039
10
  Meta-Llama-3.1-70B-Instruct,0.7184466019417476
11
  Qwen2_5_3B_Instruct,0.6310679611650486
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.7087378640776699
13
  SeaLLMs-v3-7B-Chat,0.7184466019417476
14
  Qwen2_5_72B_Instruct,0.7669902912621359
15
  gemma-2-9b-it,0.6699029126213593
results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6029411764705882
9
  Meta-Llama-3-8B-Instruct,0.5882352941176471
10
  Meta-Llama-3.1-70B-Instruct,0.6617647058823529
11
  Qwen2_5_3B_Instruct,0.5882352941176471
 
12
  SeaLLMs-v3-7B-Chat,0.5882352941176471
13
  Qwen2_5_72B_Instruct,0.7205882352941176
14
  gemma-2-9b-it,0.6029411764705882
@@ -16,7 +17,7 @@ Meta-Llama-3-70B-Instruct,0.6617647058823529
16
  Qwen2_5_14B_Instruct,0.6911764705882353
17
  sg_llama3_8192_8b,0.47058823529411764
18
  sg_llama3_70b_inst,0.6176470588235294
19
- gemma-2-2b-it,0.4852941176470588
20
  llama3-8b-cpt-sea-lionv2-instruct,0.6617647058823529
21
  Qwen2_5_0_5B_Instruct,0.36764705882352944
22
  GPT4o_0513,0.8088235294117647
 
9
  Meta-Llama-3-8B-Instruct,0.5882352941176471
10
  Meta-Llama-3.1-70B-Instruct,0.6617647058823529
11
  Qwen2_5_3B_Instruct,0.5882352941176471
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.6470588235294118
13
  SeaLLMs-v3-7B-Chat,0.5882352941176471
14
  Qwen2_5_72B_Instruct,0.7205882352941176
15
  gemma-2-9b-it,0.6029411764705882
 
17
  Qwen2_5_14B_Instruct,0.6911764705882353
18
  sg_llama3_8192_8b,0.47058823529411764
19
  sg_llama3_70b_inst,0.6176470588235294
20
+ gemma-2-2b-it,0.5147058823529411
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6617647058823529
22
  Qwen2_5_0_5B_Instruct,0.36764705882352944
23
  GPT4o_0513,0.8088235294117647
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv CHANGED
@@ -1,9 +1,11 @@
1
  Model,Accuracy
 
2
  Qwen2_5_32B_Instruct,0.8436363636363636
3
  Qwen2_5_7B_Instruct,0.78
4
  Qwen2_5_1_5B_Instruct,0.6636363636363637
5
  cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
6
  Qwen2_5_3B_Instruct,0.72
 
7
  Qwen2_5_72B_Instruct,0.8618181818181818
8
  Meta-Llama-3-70B-Instruct,0.8381818181818181
9
  Qwen2_5_14B_Instruct,0.8345454545454546
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.7854545454545454
3
  Qwen2_5_32B_Instruct,0.8436363636363636
4
  Qwen2_5_7B_Instruct,0.78
5
  Qwen2_5_1_5B_Instruct,0.6636363636363637
6
  cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
7
  Qwen2_5_3B_Instruct,0.72
8
+ cross_openhermes_llama3_70b_4096_inst_2,0.8618181818181818
9
  Qwen2_5_72B_Instruct,0.8618181818181818
10
  Meta-Llama-3-70B-Instruct,0.8381818181818181
11
  Qwen2_5_14B_Instruct,0.8345454545454546
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -1,14 +1,17 @@
1
  Model,Accuracy
 
2
  Qwen2_5_32B_Instruct,53.2
3
  Qwen2_5_7B_Instruct,50.279999999999994
4
  Qwen2_5_1_5B_Instruct,44.480000000000004
5
  cross_openhermes_llama3_8b_4096_inst,51.6
6
  Qwen2_5_3B_Instruct,47.24
 
7
  Qwen2_5_72B_Instruct,53.32
8
  Meta-Llama-3-70B-Instruct,50.599999999999994
9
  Qwen2_5_14B_Instruct,53.2
10
  sg_llama3_8192_8b,53.4
11
  sg_llama3_70b_inst,51.959999999999994
 
12
  Qwen2_5_0_5B_Instruct,35.28
13
  cross_openhermes_llama3_70b_4096_inst,53.2
14
  cross_openhermes_llama3_8b_4096_2_inst,52.28
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,49.2
3
  Qwen2_5_32B_Instruct,53.2
4
  Qwen2_5_7B_Instruct,50.279999999999994
5
  Qwen2_5_1_5B_Instruct,44.480000000000004
6
  cross_openhermes_llama3_8b_4096_inst,51.6
7
  Qwen2_5_3B_Instruct,47.24
8
+ cross_openhermes_llama3_70b_4096_inst_2,55.8
9
  Qwen2_5_72B_Instruct,53.32
10
  Meta-Llama-3-70B-Instruct,50.599999999999994
11
  Qwen2_5_14B_Instruct,53.2
12
  sg_llama3_8192_8b,53.4
13
  sg_llama3_70b_inst,51.959999999999994
14
+ gemma-2-2b-it,52.08
15
  Qwen2_5_0_5B_Instruct,35.28
16
  cross_openhermes_llama3_70b_4096_inst,53.2
17
  cross_openhermes_llama3_8b_4096_2_inst,52.28
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6448598130841121
9
  Meta-Llama-3-8B-Instruct,0.7009345794392523
10
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
11
  Qwen2_5_3B_Instruct,0.6728971962616822
 
12
  SeaLLMs-v3-7B-Chat,0.6915887850467289
13
  Qwen2_5_72B_Instruct,0.8598130841121495
14
  gemma-2-9b-it,0.8130841121495327
 
9
  Meta-Llama-3-8B-Instruct,0.7009345794392523
10
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
11
  Qwen2_5_3B_Instruct,0.6728971962616822
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.8598130841121495
13
  SeaLLMs-v3-7B-Chat,0.6915887850467289
14
  Qwen2_5_72B_Instruct,0.8598130841121495
15
  gemma-2-9b-it,0.8130841121495327
results/dialogue/zero_shot/dialogsum.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.2519360474995096,0.3481981488260775,0.132
9
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
10
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
11
  Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
 
12
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
13
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
14
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
 
9
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
10
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
11
  Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.2696117118557506,0.3743401999085179,0.14161035346358444,0.2928845821951494
13
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
14
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
15
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
results/dialogue/zero_shot/dream.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.8613424791768741
9
  Meta-Llama-3-8B-Instruct,0.8946594806467418
10
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
11
  Qwen2_5_3B_Instruct,0.9029887310142087
 
12
  SeaLLMs-v3-7B-Chat,0.9265066144047036
13
  Qwen2_5_72B_Instruct,0.9627633512983832
14
  gemma-2-9b-it,0.9416952474277315
 
9
  Meta-Llama-3-8B-Instruct,0.8946594806467418
10
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
11
  Qwen2_5_3B_Instruct,0.9029887310142087
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.9549240568348849
13
  SeaLLMs-v3-7B-Chat,0.9265066144047036
14
  Qwen2_5_72B_Instruct,0.9627633512983832
15
  gemma-2-9b-it,0.9416952474277315
results/dialogue/zero_shot/samsum.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.2961783902880866,0.40739117705606903,0.16
9
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
10
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
11
  Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
 
12
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
13
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
14
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
 
9
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
10
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
11
  Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.31836805920341904,0.432137300473344,0.18274124314511622,0.34022563399179695
13
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
14
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
15
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6704545454545454
9
  Meta-Llama-3-8B-Instruct,0.6522727272727272
10
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
11
  Qwen2_5_3B_Instruct,0.5522727272727272
 
12
  SeaLLMs-v3-7B-Chat,0.6454545454545455
13
  Qwen2_5_72B_Instruct,0.7068181818181818
14
  gemma-2-9b-it,0.7477272727272727
 
9
  Meta-Llama-3-8B-Instruct,0.6522727272727272
10
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
11
  Qwen2_5_3B_Instruct,0.5522727272727272
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.7068181818181818
13
  SeaLLMs-v3-7B-Chat,0.6454545454545455
14
  Qwen2_5_72B_Instruct,0.7068181818181818
15
  gemma-2-9b-it,0.7477272727272727
results/emotion/zero_shot/sst2.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.926605504587156
9
  Meta-Llama-3-8B-Instruct,0.8784403669724771
10
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
11
  Qwen2_5_3B_Instruct,0.8245412844036697
 
12
  SeaLLMs-v3-7B-Chat,0.9403669724770642
13
  Qwen2_5_72B_Instruct,0.9334862385321101
14
  gemma-2-9b-it,0.9311926605504587
 
9
  Meta-Llama-3-8B-Instruct,0.8784403669724771
10
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
11
  Qwen2_5_3B_Instruct,0.8245412844036697
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.9415137614678899
13
  SeaLLMs-v3-7B-Chat,0.9403669724770642
14
  Qwen2_5_72B_Instruct,0.9334862385321101
15
  gemma-2-9b-it,0.9311926605504587
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -7,6 +7,7 @@ cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
7
  Meta-Llama-3-8B-Instruct,0.33079891679041123
8
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
9
  Qwen2_5_3B_Instruct,0.3316936422167389
 
10
  SeaLLMs-v3-7B-Chat,0.3594829412574955
11
  gemma-2-9b-it,0.40786563079141763
12
  Meta-Llama-3-70B-Instruct,0.3830092775167675
 
7
  Meta-Llama-3-8B-Instruct,0.33079891679041123
8
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
9
  Qwen2_5_3B_Instruct,0.3316936422167389
10
+ cross_openhermes_llama3_70b_4096_inst_2,0.41785038798707536
11
  SeaLLMs-v3-7B-Chat,0.3594829412574955
12
  gemma-2-9b-it,0.40786563079141763
13
  Meta-Llama-3-70B-Instruct,0.3830092775167675
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -7,6 +7,7 @@ cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
7
  Meta-Llama-3-8B-Instruct,0.2647448190950291
8
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
9
  Qwen2_5_3B_Instruct,0.27312609009801636
 
10
  SeaLLMs-v3-7B-Chat,0.30981028289420137
11
  gemma-2-9b-it,0.3367700653885
12
  Meta-Llama-3-70B-Instruct,0.3230140263371192
 
7
  Meta-Llama-3-8B-Instruct,0.2647448190950291
8
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
9
  Qwen2_5_3B_Instruct,0.27312609009801636
10
+ cross_openhermes_llama3_70b_4096_inst_2,0.35523251361264646
11
  SeaLLMs-v3-7B-Chat,0.30981028289420137
12
  gemma-2-9b-it,0.3367700653885
13
  Meta-Llama-3-70B-Instruct,0.3230140263371192
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -7,6 +7,7 @@ cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
7
  Meta-Llama-3-8B-Instruct,0.199495011482748
8
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
9
  Qwen2_5_3B_Instruct,0.2245195134637718
 
10
  SeaLLMs-v3-7B-Chat,0.2516593644617717
11
  gemma-2-9b-it,0.267527968123433
12
  Meta-Llama-3-70B-Instruct,0.24397819518058994
 
7
  Meta-Llama-3-8B-Instruct,0.199495011482748
8
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
9
  Qwen2_5_3B_Instruct,0.2245195134637718
10
+ cross_openhermes_llama3_70b_4096_inst_2,0.272938440868618
11
  SeaLLMs-v3-7B-Chat,0.2516593644617717
12
  gemma-2-9b-it,0.267527968123433
13
  Meta-Llama-3-70B-Instruct,0.24397819518058994
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -7,6 +7,7 @@ cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
7
  Meta-Llama-3-8B-Instruct,0.31625368345049
8
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
9
  Qwen2_5_3B_Instruct,0.31056841204320457
 
10
  SeaLLMs-v3-7B-Chat,0.3484133510670942
11
  gemma-2-9b-it,0.4234100394581857
12
  Meta-Llama-3-70B-Instruct,0.3957287030176054
 
7
  Meta-Llama-3-8B-Instruct,0.31625368345049
8
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
9
  Qwen2_5_3B_Instruct,0.31056841204320457
10
+ cross_openhermes_llama3_70b_4096_inst_2,0.439568878723126
11
  SeaLLMs-v3-7B-Chat,0.3484133510670942
12
  gemma-2-9b-it,0.4234100394581857
13
  Meta-Llama-3-70B-Instruct,0.3957287030176054
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7718773373223635
9
  Meta-Llama-3-8B-Instruct,0.8515332834704562
10
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
11
  Qwen2_5_3B_Instruct,0.8668661181750187
 
12
  SeaLLMs-v3-7B-Chat,0.9143605086013463
13
  Qwen2_5_72B_Instruct,0.9596110695587136
14
  gemma-2-9b-it,0.9222139117427075
 
9
  Meta-Llama-3-8B-Instruct,0.8515332834704562
10
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
11
  Qwen2_5_3B_Instruct,0.8668661181750187
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.9420344053851907
13
  SeaLLMs-v3-7B-Chat,0.9143605086013463
14
  Qwen2_5_72B_Instruct,0.9596110695587136
15
  gemma-2-9b-it,0.9222139117427075
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7660594439117929
9
  Meta-Llama-3-8B-Instruct,0.6548418024928092
10
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
11
  Qwen2_5_3B_Instruct,0.6644295302013423
 
12
  SeaLLMs-v3-7B-Chat,0.785234899328859
13
  Qwen2_5_72B_Instruct,0.8571428571428571
14
  gemma-2-9b-it,0.7938638542665388
 
9
  Meta-Llama-3-8B-Instruct,0.6548418024928092
10
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
11
  Qwen2_5_3B_Instruct,0.6644295302013423
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.8609779482262704
13
  SeaLLMs-v3-7B-Chat,0.785234899328859
14
  Qwen2_5_72B_Instruct,0.8571428571428571
15
  gemma-2-9b-it,0.7938638542665388
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.57
9
  Meta-Llama-3-8B-Instruct,0.546
10
  Meta-Llama-3.1-70B-Instruct,0.7015
11
  Qwen2_5_3B_Instruct,0.7465
 
12
  SeaLLMs-v3-7B-Chat,0.653
13
  Qwen2_5_72B_Instruct,0.8445
14
  gemma-2-9b-it,0.716
 
9
  Meta-Llama-3-8B-Instruct,0.546
10
  Meta-Llama-3.1-70B-Instruct,0.7015
11
  Qwen2_5_3B_Instruct,0.7465
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.7481549345956126
13
  SeaLLMs-v3-7B-Chat,0.653
14
  Qwen2_5_72B_Instruct,0.8445
15
  gemma-2-9b-it,0.716
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7303921568627451
9
  Meta-Llama-3-8B-Instruct,0.678921568627451
10
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
11
  Qwen2_5_3B_Instruct,0.5661764705882353
 
12
  SeaLLMs-v3-7B-Chat,0.7475490196078431
13
  Qwen2_5_72B_Instruct,0.8014705882352942
14
  gemma-2-9b-it,0.7401960784313726
 
9
  Meta-Llama-3-8B-Instruct,0.678921568627451
10
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
11
  Qwen2_5_3B_Instruct,0.5661764705882353
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.7916666666666666
13
  SeaLLMs-v3-7B-Chat,0.7475490196078431
14
  Qwen2_5_72B_Instruct,0.8014705882352942
15
  gemma-2-9b-it,0.7401960784313726
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.5183050847457628
9
  Meta-Llama-3-8B-Instruct,0.44033898305084745
10
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
11
  Qwen2_5_3B_Instruct,0.6145762711864406
 
12
  SeaLLMs-v3-7B-Chat,0.5698305084745763
13
  Qwen2_5_72B_Instruct,0.7684745762711864
14
  gemma-2-9b-it,0.6189830508474576
 
9
  Meta-Llama-3-8B-Instruct,0.44033898305084745
10
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
11
  Qwen2_5_3B_Instruct,0.6145762711864406
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.6759322033898305
13
  SeaLLMs-v3-7B-Chat,0.5698305084745763
14
  Qwen2_5_72B_Instruct,0.7684745762711864
15
  gemma-2-9b-it,0.6189830508474576
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.7596558667398865
9
  Meta-Llama-3-8B-Instruct,0.6025993044114956
10
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
11
  Qwen2_5_3B_Instruct,0.7645982061138569
 
12
  SeaLLMs-v3-7B-Chat,0.7159070107999268
13
  Qwen2_5_72B_Instruct,0.9082921471718836
14
  gemma-2-9b-it,0.9070107999267801
 
9
  Meta-Llama-3-8B-Instruct,0.6025993044114956
10
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
11
  Qwen2_5_3B_Instruct,0.7645982061138569
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.9086582463847702
13
  SeaLLMs-v3-7B-Chat,0.7159070107999268
14
  Qwen2_5_72B_Instruct,0.9082921471718836
15
  gemma-2-9b-it,0.9070107999267801
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.6967509025270758
9
  Meta-Llama-3-8B-Instruct,0.6173285198555957
10
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
11
  Qwen2_5_3B_Instruct,0.779783393501805
 
12
  SeaLLMs-v3-7B-Chat,0.7870036101083032
13
  Qwen2_5_72B_Instruct,0.9025270758122743
14
  gemma-2-9b-it,0.7472924187725631
 
9
  Meta-Llama-3-8B-Instruct,0.6173285198555957
10
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
11
  Qwen2_5_3B_Instruct,0.779783393501805
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.8916967509025271
13
  SeaLLMs-v3-7B-Chat,0.7870036101083032
14
  Qwen2_5_72B_Instruct,0.9025270758122743
15
  gemma-2-9b-it,0.7472924187725631
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.4647887323943662
9
  Meta-Llama-3-8B-Instruct,0.4788732394366197
10
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
11
  Qwen2_5_3B_Instruct,0.647887323943662
 
12
  SeaLLMs-v3-7B-Chat,0.5915492957746479
13
  Qwen2_5_72B_Instruct,0.8169014084507042
14
  gemma-2-9b-it,0.7746478873239436
 
9
  Meta-Llama-3-8B-Instruct,0.4788732394366197
10
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
11
  Qwen2_5_3B_Instruct,0.647887323943662
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.8450704225352113
13
  SeaLLMs-v3-7B-Chat,0.5915492957746479
14
  Qwen2_5_72B_Instruct,0.8169014084507042
15
  gemma-2-9b-it,0.7746478873239436
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.44707347447073476
9
  Meta-Llama-3-8B-Instruct,0.4775840597758406
10
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
11
  Qwen2_5_3B_Instruct,0.6537982565379825
 
12
  SeaLLMs-v3-7B-Chat,0.7658779576587795
13
  Qwen2_5_72B_Instruct,0.8325031133250311
14
  gemma-2-9b-it,0.5523038605230386
 
9
  Meta-Llama-3-8B-Instruct,0.4775840597758406
10
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
11
  Qwen2_5_3B_Instruct,0.6537982565379825
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.6133250311332503
13
  SeaLLMs-v3-7B-Chat,0.7658779576587795
14
  Qwen2_5_72B_Instruct,0.8325031133250311
15
  gemma-2-9b-it,0.5523038605230386
results/general_reasoning/zero_shot/cmmlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.4547573821447073
9
  Meta-Llama-3-8B-Instruct,0.4839405974788465
10
  Meta-Llama-3.1-70B-Instruct,0.6814885166637886
11
  Qwen2_5_3B_Instruct,0.6621481609393887
 
12
  SeaLLMs-v3-7B-Chat,0.7684337765498187
13
  Qwen2_5_72B_Instruct,0.8343982041098256
14
  gemma-2-9b-it,0.5700224486271801
 
9
  Meta-Llama-3-8B-Instruct,0.4839405974788465
10
  Meta-Llama-3.1-70B-Instruct,0.6814885166637886
11
  Qwen2_5_3B_Instruct,0.6621481609393887
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.6392678293904335
13
  SeaLLMs-v3-7B-Chat,0.7684337765498187
14
  Qwen2_5_72B_Instruct,0.8343982041098256
15
  gemma-2-9b-it,0.5700224486271801
results/general_reasoning/zero_shot/indommlu.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.5097803591695039
9
  Meta-Llama-3-8B-Instruct,0.5264703918819681
10
  Meta-Llama-3.1-70B-Instruct,0.6740770411910008
11
  Qwen2_5_3B_Instruct,0.49656185326123237
 
12
  SeaLLMs-v3-7B-Chat,0.5267374324053675
13
  Qwen2_5_72B_Instruct,0.6380933306629281
14
  gemma-2-9b-it,0.606983109686895
 
9
  Meta-Llama-3-8B-Instruct,0.5264703918819681
10
  Meta-Llama-3.1-70B-Instruct,0.6740770411910008
11
  Qwen2_5_3B_Instruct,0.49656185326123237
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.6559182855998398
13
  SeaLLMs-v3-7B-Chat,0.5267374324053675
14
  Qwen2_5_72B_Instruct,0.6380933306629281
15
  gemma-2-9b-it,0.606983109686895
results/general_reasoning/zero_shot/mmlu.csv CHANGED
@@ -8,12 +8,10 @@ Qwen2-72B-Instruct,0.7922774401144083
8
  cross_openhermes_llama3_8b_4096_inst,0.556381837683232
9
  Meta-Llama-3-8B-Instruct,0.6005720414730068
10
  Meta-Llama-3.1-70B-Instruct,0.8058634250983197
11
- sg_llama3_8192_8b_np,0.6528423310690025
12
  Qwen2_5_3B_Instruct,0.6118698605648909
13
- Meta-Llama-3-8B-Instruct_np,0.6619949946371112
14
  SeaLLMs-v3-7B-Chat,0.6670003575259207
15
  Qwen2_5_72B_Instruct,0.8129424383267787
16
- cross_openhermes_llama3_8b_4096_inst_np,0.5601001072577761
17
  gemma-2-9b-it,0.7100464783696818
18
  Meta-Llama-3-70B-Instruct,0.7649624597783339
19
  Qwen2_5_14B_Instruct,0.7542366821594566
@@ -23,6 +21,5 @@ gemma-2-2b-it,0.5706828745084018
23
  llama3-8b-cpt-sea-lionv2-instruct,0.6130854486950303
24
  Qwen2_5_0_5B_Instruct,0.461136932427601
25
  GPT4o_0513,0.8308187343582409
26
- llama3-8b-cpt-sea-lionv2-instruct_np,0.6518412584912406
27
  cross_openhermes_llama3_70b_4096_inst,0.7400071505184126
28
  cross_openhermes_llama3_8b_4096_2_inst,0.5785484447622453
 
8
  cross_openhermes_llama3_8b_4096_inst,0.556381837683232
9
  Meta-Llama-3-8B-Instruct,0.6005720414730068
10
  Meta-Llama-3.1-70B-Instruct,0.8058634250983197
 
11
  Qwen2_5_3B_Instruct,0.6118698605648909
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.7551662495530926
13
  SeaLLMs-v3-7B-Chat,0.6670003575259207
14
  Qwen2_5_72B_Instruct,0.8129424383267787
 
15
  gemma-2-9b-it,0.7100464783696818
16
  Meta-Llama-3-70B-Instruct,0.7649624597783339
17
  Qwen2_5_14B_Instruct,0.7542366821594566
 
21
  llama3-8b-cpt-sea-lionv2-instruct,0.6130854486950303
22
  Qwen2_5_0_5B_Instruct,0.461136932427601
23
  GPT4o_0513,0.8308187343582409
 
24
  cross_openhermes_llama3_70b_4096_inst,0.7400071505184126
25
  cross_openhermes_llama3_8b_4096_2_inst,0.5785484447622453
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -9,6 +9,7 @@ cross_openhermes_llama3_8b_4096_inst,0.3333333333333333
9
  Meta-Llama-3-8B-Instruct,0.3333333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
11
  Qwen2_5_3B_Instruct,0.5757575757575758
 
12
  SeaLLMs-v3-7B-Chat,0.5454545454545454
13
  Qwen2_5_72B_Instruct,0.696969696969697
14
  gemma-2-9b-it,0.48484848484848486
 
9
  Meta-Llama-3-8B-Instruct,0.3333333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
11
  Qwen2_5_3B_Instruct,0.5757575757575758
12
+ cross_openhermes_llama3_70b_4096_inst_2,0.48484848484848486
13
  SeaLLMs-v3-7B-Chat,0.5454545454545454
14
  Qwen2_5_72B_Instruct,0.696969696969697
15
  gemma-2-9b-it,0.48484848484848486