zhuohan-7 commited on
Commit
244bcc5
1 Parent(s): 508092c

Upload folder using huggingface_hub

Browse files
Files changed (33) hide show
  1. results/cross_lingual/zero_shot/cross_logiqa.csv +9 -0
  2. results/cross_lingual/zero_shot/cross_mmlu.csv +9 -0
  3. results/cross_lingual/zero_shot/cross_xquad.csv +8 -0
  4. results/cultural_reasoning/zero_shot/cn_eval.csv +9 -0
  5. results/cultural_reasoning/zero_shot/ph_eval.csv +9 -0
  6. results/cultural_reasoning/zero_shot/sg_eval.csv +9 -0
  7. results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv +9 -0
  8. results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +9 -0
  9. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +9 -0
  10. results/cultural_reasoning/zero_shot/us_eval.csv +9 -0
  11. results/dialogue/zero_shot/dialogsum.csv +9 -0
  12. results/dialogue/zero_shot/dream.csv +9 -0
  13. results/dialogue/zero_shot/samsum.csv +9 -0
  14. results/emotion/zero_shot/ind_emotion.csv +9 -0
  15. results/emotion/zero_shot/sst2.csv +9 -0
  16. results/flores_translation/zero_shot/ind2eng.csv +5 -0
  17. results/flores_translation/zero_shot/vie2eng.csv +5 -0
  18. results/flores_translation/zero_shot/zho2eng.csv +5 -0
  19. results/flores_translation/zero_shot/zsm2eng.csv +5 -0
  20. results/fundamental_nlp_tasks/zero_shot/c3.csv +9 -0
  21. results/fundamental_nlp_tasks/zero_shot/cola.csv +9 -0
  22. results/fundamental_nlp_tasks/zero_shot/mnli.csv +8 -0
  23. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +8 -0
  24. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +8 -0
  25. results/fundamental_nlp_tasks/zero_shot/qnli.csv +8 -0
  26. results/fundamental_nlp_tasks/zero_shot/qqp.csv +8 -0
  27. results/fundamental_nlp_tasks/zero_shot/rte.csv +8 -0
  28. results/fundamental_nlp_tasks/zero_shot/wnli.csv +8 -0
  29. results/general_reasoning/zero_shot/c_eval.csv +9 -0
  30. results/general_reasoning/zero_shot/cmmlu.csv +9 -0
  31. results/general_reasoning/zero_shot/indommlu.csv +9 -0
  32. results/general_reasoning/zero_shot/mmlu.csv +9 -0
  33. results/general_reasoning/zero_shot/zbench.csv +9 -0
results/cross_lingual/zero_shot/cross_logiqa.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
  Qwen2-7B-Instruct,0.564935064935065,0.48279220779220783,0.5206435955861558,0.6590909090909091,0.7045454545454546,0.5340909090909091,0.5738636363636364,0.5397727272727273,0.5113636363636364,0.4318181818181818
3
  Meta-Llama-3.1-8B-Instruct,0.4472402597402597,0.43717532467532455,0.44215052105151864,0.5227272727272727,0.4602272727272727,0.4715909090909091,0.4715909090909091,0.4147727272727273,0.3977272727272727,0.39204545454545453
 
 
 
4
  Qwen2-72B-Instruct,0.6728896103896104,0.6762987012987012,0.6745898487968579,0.75,0.8068181818181818,0.6534090909090909,0.6193181818181818,0.625,0.6534090909090909,0.6022727272727273
 
5
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
6
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
 
7
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
 
8
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
9
  Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.6246175698800746,0.7102272727272727,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
 
10
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
11
  gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
12
  llama3-8b-cpt-sea-lionv2-instruct,0.48538961038961037,0.4472402597402597,0.46553468284769084,0.5284090909090909,0.5113636363636364,0.5227272727272727,0.5227272727272727,0.48863636363636365,0.44886363636363635,0.375
 
13
  GPT4o_0513,0.7159090909090909,0.6941558441558444,0.7048646724637749,0.7613636363636364,0.7670454545454546,0.6988636363636364,0.6988636363636364,0.7045454545454546,0.6761363636363636,0.7045454545454546
 
14
  Meta-Llama-3.1-8B,0.29464285714285715,0.07857142857142858,0.12406015034269886,0.32954545454545453,0.32386363636363635,0.2840909090909091,0.2727272727272727,0.2840909090909091,0.3125,0.2556818181818182
 
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
  Qwen2-7B-Instruct,0.564935064935065,0.48279220779220783,0.5206435955861558,0.6590909090909091,0.7045454545454546,0.5340909090909091,0.5738636363636364,0.5397727272727273,0.5113636363636364,0.4318181818181818
3
  Meta-Llama-3.1-8B-Instruct,0.4472402597402597,0.43717532467532455,0.44215052105151864,0.5227272727272727,0.4602272727272727,0.4715909090909091,0.4715909090909091,0.4147727272727273,0.3977272727272727,0.39204545454545453
4
+ Qwen2_5_32B_Instruct,0.6931818181818182,0.6397727272727273,0.6654072695772988,0.7727272727272727,0.7897727272727273,0.6704545454545454,0.6761363636363636,0.6875,0.6875,0.5681818181818182
5
+ Qwen2_5_7B_Instruct,0.599025974025974,0.5034090909090908,0.5470709896292291,0.7102272727272727,0.7215909090909091,0.6136363636363636,0.6022727272727273,0.5738636363636364,0.5511363636363636,0.42045454545454547
6
+ Qwen2_5_1_5B_Instruct,0.46834415584415584,0.348538961038961,0.3996561615557665,0.5511363636363636,0.5909090909090909,0.4659090909090909,0.5113636363636364,0.4375,0.375,0.3465909090909091
7
  Qwen2-72B-Instruct,0.6728896103896104,0.6762987012987012,0.6745898487968579,0.75,0.8068181818181818,0.6534090909090909,0.6193181818181818,0.625,0.6534090909090909,0.6022727272727273
8
+ cross_openhermes_llama3_8b_4096_inst,0.4496753246753246,0.49188311688311687,0.4698331840074224,0.5170454545454546,0.4318181818181818,0.4318181818181818,0.4659090909090909,0.4602272727272727,0.4602272727272727,0.3806818181818182
9
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
10
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
11
+ Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
12
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
13
+ Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
14
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
15
  Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.6246175698800746,0.7102272727272727,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
16
+ Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
17
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
18
  gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
19
  llama3-8b-cpt-sea-lionv2-instruct,0.48538961038961037,0.4472402597402597,0.46553468284769084,0.5284090909090909,0.5113636363636364,0.5227272727272727,0.5227272727272727,0.48863636363636365,0.44886363636363635,0.375
20
+ Qwen2_5_0_5B_Instruct,0.3538961038961039,0.1978896103896103,0.25383898238962527,0.45454545454545453,0.39204545454545453,0.3465909090909091,0.375,0.3409090909090909,0.30113636363636365,0.26704545454545453
21
  GPT4o_0513,0.7159090909090909,0.6941558441558444,0.7048646724637749,0.7613636363636364,0.7670454545454546,0.6988636363636364,0.6988636363636364,0.7045454545454546,0.6761363636363636,0.7045454545454546
22
+ cross_openhermes_llama3_70b_4096_inst,0.6071428571428571,0.5717532467532467,0.5889168666140888,0.6988636363636364,0.6363636363636364,0.625,0.5681818181818182,0.5965909090909091,0.5965909090909091,0.5284090909090909
23
  Meta-Llama-3.1-8B,0.29464285714285715,0.07857142857142858,0.12406015034269886,0.32954545454545453,0.32386363636363635,0.2840909090909091,0.2727272727272727,0.2840909090909091,0.3125,0.2556818181818182
results/cross_lingual/zero_shot/cross_mmlu.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
  Qwen2-7B-Instruct,0.6628571428571428,0.5257142857142858,0.5863736263242921,0.76,0.6666666666666666,0.72,0.5933333333333334,0.7066666666666667,0.6133333333333333,0.58
3
  Meta-Llama-3.1-8B-Instruct,0.5619047619047618,0.5020952380952383,0.5303189947159841,0.66,0.5266666666666666,0.5733333333333334,0.5266666666666666,0.5533333333333333,0.5533333333333333,0.54
 
 
 
4
  Qwen2-72B-Instruct,0.779047619047619,0.7611428571428573,0.7699911663398871,0.8133333333333334,0.7933333333333333,0.7933333333333333,0.7333333333333333,0.7666666666666667,0.78,0.7733333333333333
 
5
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
6
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
 
7
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
 
8
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
9
  Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.7446218665971989,0.7933333333333333,0.7466666666666667,0.7733333333333333,0.7466666666666667,0.7733333333333333,0.7333333333333333,0.74
 
10
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
11
  gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6104761904761905,0.5685714285714286,0.5887791368067445,0.72,0.6,0.6133333333333333,0.58,0.6333333333333333,0.5933333333333334,0.5333333333333333
 
13
  GPT4o_0513,0.8038095238095239,0.8506666666666668,0.8265745643832277,0.8266666666666667,0.7933333333333333,0.8,0.7666666666666667,0.7933333333333333,0.8266666666666667,0.82
 
14
  Meta-Llama-3.1-8B,0.42000000000000004,0.1535238095238095,0.22485552968513808,0.4866666666666667,0.43333333333333335,0.44,0.38666666666666666,0.47333333333333333,0.3333333333333333,0.38666666666666666
 
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
  Qwen2-7B-Instruct,0.6628571428571428,0.5257142857142858,0.5863736263242921,0.76,0.6666666666666666,0.72,0.5933333333333334,0.7066666666666667,0.6133333333333333,0.58
3
  Meta-Llama-3.1-8B-Instruct,0.5619047619047618,0.5020952380952383,0.5303189947159841,0.66,0.5266666666666666,0.5733333333333334,0.5266666666666666,0.5533333333333333,0.5533333333333333,0.54
4
+ Qwen2_5_32B_Instruct,0.8019047619047619,0.7386666666666668,0.7689878008073214,0.8533333333333334,0.8533333333333334,0.82,0.7933333333333333,0.8,0.7866666666666666,0.7066666666666667
5
+ Qwen2_5_7B_Instruct,0.6733333333333332,0.580952380952381,0.6237408250578389,0.7666666666666667,0.7066666666666667,0.72,0.6666666666666666,0.6866666666666666,0.6266666666666667,0.54
6
+ Qwen2_5_1_5B_Instruct,0.5076190476190475,0.3721904761904762,0.42948154099799957,0.6,0.6066666666666667,0.5333333333333333,0.4866666666666667,0.5666666666666667,0.4,0.36
7
  Qwen2-72B-Instruct,0.779047619047619,0.7611428571428573,0.7699911663398871,0.8133333333333334,0.7933333333333333,0.7933333333333333,0.7333333333333333,0.7666666666666667,0.78,0.7733333333333333
8
+ cross_openhermes_llama3_8b_4096_inst,0.5523809523809523,0.5384761904761904,0.5453399518902743,0.6533333333333333,0.56,0.5333333333333333,0.5066666666666667,0.5466666666666666,0.5733333333333334,0.49333333333333335
9
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
11
+ Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
12
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
13
+ Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
14
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
15
  Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.7446218665971989,0.7933333333333333,0.7466666666666667,0.7733333333333333,0.7466666666666667,0.7733333333333333,0.7333333333333333,0.74
16
+ Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
17
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
18
  gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
19
  llama3-8b-cpt-sea-lionv2-instruct,0.6104761904761905,0.5685714285714286,0.5887791368067445,0.72,0.6,0.6133333333333333,0.58,0.6333333333333333,0.5933333333333334,0.5333333333333333
20
+ Qwen2_5_0_5B_Instruct,0.4228571428571429,0.2436190476190476,0.3091364879297727,0.6133333333333333,0.5,0.4266666666666667,0.4066666666666667,0.3933333333333333,0.3333333333333333,0.2866666666666667
21
  GPT4o_0513,0.8038095238095239,0.8506666666666668,0.8265745643832277,0.8266666666666667,0.7933333333333333,0.8,0.7666666666666667,0.7933333333333333,0.8266666666666667,0.82
22
+ cross_openhermes_llama3_70b_4096_inst,0.7257142857142858,0.7375238095238097,0.7315713913287366,0.8,0.7066666666666667,0.7466666666666667,0.6733333333333333,0.7666666666666667,0.7333333333333333,0.6533333333333333
23
  Meta-Llama-3.1-8B,0.42000000000000004,0.1535238095238095,0.22485552968513808,0.4866666666666667,0.43333333333333335,0.44,0.38666666666666666,0.47333333333333333,0.3333333333333333,0.38666666666666666
results/cross_lingual/zero_shot/cross_xquad.csv CHANGED
@@ -1,14 +1,22 @@
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
  Qwen2-7B-Instruct,0.9418067226890756,0.9046218487394958,0.9228398561109394,0.957983193277311,0.9336134453781513,0.9436974789915966,0.9319327731092437,,,
3
  Meta-Llama-3.1-8B-Instruct,0.9287815126050419,0.8867647058823529,0.9072869161050563,0.9420168067226891,0.9193277310924369,0.9361344537815126,0.9176470588235294,,,
 
 
4
  Qwen2-72B-Instruct,0.9613445378151261,0.9516806722689075,0.956488195931227,0.9638655462184874,0.9596638655462185,0.9596638655462185,0.9621848739495799,,,
 
5
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
6
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
 
7
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
 
8
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
9
  Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.9506591499208973,0.9714285714285714,0.9403361344537815,0.9596638655462185,0.965546218487395,,,
 
10
  sg_llama3_70b_inst,0.9552521008403361,0.9453781512605042,0.9502894779607259,0.9663865546218487,0.9436974789915966,0.957983193277311,0.9529411764705882,,,
11
  gemma-2-2b-it,0.917016806722689,0.8665966386554622,0.8910940700869288,0.934453781512605,0.9025210084033614,0.9193277310924369,0.9117647058823529,,,
12
  llama3-8b-cpt-sea-lionv2-instruct,0.9365546218487395,0.9086134453781513,0.9223724784871395,0.9420168067226891,0.926890756302521,0.9436974789915966,0.9336134453781513,,,
 
13
  GPT4o_0513,0.9605042016806723,0.951890756302521,0.9561780814209724,0.965546218487395,0.9537815126050421,0.9630252100840336,0.9596638655462185,,,
 
14
  Meta-Llama-3.1-8B,0.5619747899159664,0.21176470588235294,0.307613678067924,0.4756302521008403,0.6579831932773109,0.5571428571428572,0.5571428571428572,,,
 
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
  Qwen2-7B-Instruct,0.9418067226890756,0.9046218487394958,0.9228398561109394,0.957983193277311,0.9336134453781513,0.9436974789915966,0.9319327731092437,,,
3
  Meta-Llama-3.1-8B-Instruct,0.9287815126050419,0.8867647058823529,0.9072869161050563,0.9420168067226891,0.9193277310924369,0.9361344537815126,0.9176470588235294,,,
4
+ Qwen2_5_7B_Instruct,0.9460084033613445,0.9178571428571428,0.9317201790045005,0.9554621848739496,0.9487394957983193,0.9445378151260504,0.9352941176470588,,,
5
+ Qwen2_5_1_5B_Instruct,0.8939075630252101,0.8308823529411764,0.8612434620121144,0.9100840336134454,0.9,0.8957983193277311,0.8697478991596639,,,
6
  Qwen2-72B-Instruct,0.9613445378151261,0.9516806722689075,0.956488195931227,0.9638655462184874,0.9596638655462185,0.9596638655462185,0.9621848739495799,,,
7
+ cross_openhermes_llama3_8b_4096_inst,0.9052521008403361,0.8705882352941177,0.8875818539385091,0.9168067226890756,0.9025210084033614,0.9050420168067227,0.8966386554621849,,,
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
9
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
10
+ Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
11
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
12
+ Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
13
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
14
  Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.9506591499208973,0.9714285714285714,0.9403361344537815,0.9596638655462185,0.965546218487395,,,
15
+ Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
16
  sg_llama3_70b_inst,0.9552521008403361,0.9453781512605042,0.9502894779607259,0.9663865546218487,0.9436974789915966,0.957983193277311,0.9529411764705882,,,
17
  gemma-2-2b-it,0.917016806722689,0.8665966386554622,0.8910940700869288,0.934453781512605,0.9025210084033614,0.9193277310924369,0.9117647058823529,,,
18
  llama3-8b-cpt-sea-lionv2-instruct,0.9365546218487395,0.9086134453781513,0.9223724784871395,0.9420168067226891,0.926890756302521,0.9436974789915966,0.9336134453781513,,,
19
+ Qwen2_5_0_5B_Instruct,0.6584033613445378,0.48172268907563026,0.5563732844778362,0.692436974789916,0.673109243697479,0.653781512605042,0.6142857142857143,,,
20
  GPT4o_0513,0.9605042016806723,0.951890756302521,0.9561780814209724,0.965546218487395,0.9537815126050421,0.9630252100840336,0.9596638655462185,,,
21
+ cross_openhermes_llama3_70b_4096_inst,0.9510504201680672,0.9464285714285714,0.9487338668359928,0.9621848739495799,0.9512605042016806,0.9487394957983193,0.9420168067226891,,,
22
  Meta-Llama-3.1-8B,0.5619747899159664,0.21176470588235294,0.307613678067924,0.4756302521008403,0.6579831932773109,0.5571428571428572,0.5571428571428572,,,
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.8285714285714286
3
  Meta-Llama-3.1-8B-Instruct,0.4857142857142857
 
 
 
4
  Qwen2-72B-Instruct,0.8285714285714286
 
5
  Meta-Llama-3-8B-Instruct,0.4666666666666667
6
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
 
7
  SeaLLMs-v3-7B-Chat,0.819047619047619
 
8
  gemma-2-9b-it,0.580952380952381
9
  Meta-Llama-3-70B-Instruct,0.5333333333333333
 
10
  sg_llama3_70b_inst,0.5523809523809524
11
  gemma-2-2b-it,0.3619047619047619
12
  llama3-8b-cpt-sea-lionv2-instruct,0.49523809523809526
 
13
  GPT4o_0513,0.8095238095238095
 
14
  Meta-Llama-3.1-8B,0.3904761904761905
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.8285714285714286
3
  Meta-Llama-3.1-8B-Instruct,0.4857142857142857
4
+ Qwen2_5_32B_Instruct,0.8476190476190476
5
+ Qwen2_5_7B_Instruct,0.8
6
+ Qwen2_5_1_5B_Instruct,0.5523809523809524
7
  Qwen2-72B-Instruct,0.8285714285714286
8
+ cross_openhermes_llama3_8b_4096_inst,0.47619047619047616
9
  Meta-Llama-3-8B-Instruct,0.4666666666666667
10
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
11
+ Qwen2_5_3B_Instruct,0.7142857142857143
12
  SeaLLMs-v3-7B-Chat,0.819047619047619
13
+ Qwen2_5_72B_Instruct,0.8761904761904762
14
  gemma-2-9b-it,0.580952380952381
15
  Meta-Llama-3-70B-Instruct,0.5333333333333333
16
+ Qwen2_5_14B_Instruct,0.8285714285714286
17
  sg_llama3_70b_inst,0.5523809523809524
18
  gemma-2-2b-it,0.3619047619047619
19
  llama3-8b-cpt-sea-lionv2-instruct,0.49523809523809526
20
+ Qwen2_5_0_5B_Instruct,0.3619047619047619
21
  GPT4o_0513,0.8095238095238095
22
+ cross_openhermes_llama3_70b_4096_inst,0.6095238095238096
23
  Meta-Llama-3.1-8B,0.3904761904761905
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.52
3
  Meta-Llama-3.1-8B-Instruct,0.6
 
 
 
4
  Qwen2-72B-Instruct,0.62
 
5
  Meta-Llama-3-8B-Instruct,0.58
6
  Meta-Llama-3.1-70B-Instruct,0.68
 
7
  SeaLLMs-v3-7B-Chat,0.47
 
8
  gemma-2-9b-it,0.58
9
  Meta-Llama-3-70B-Instruct,0.63
 
10
  sg_llama3_70b_inst,0.69
11
  gemma-2-2b-it,0.4
12
  llama3-8b-cpt-sea-lionv2-instruct,0.56
 
13
  GPT4o_0513,0.77
 
14
  Meta-Llama-3.1-8B,0.43
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.52
3
  Meta-Llama-3.1-8B-Instruct,0.6
4
+ Qwen2_5_32B_Instruct,0.7
5
+ Qwen2_5_7B_Instruct,0.55
6
+ Qwen2_5_1_5B_Instruct,0.37
7
  Qwen2-72B-Instruct,0.62
8
+ cross_openhermes_llama3_8b_4096_inst,0.47
9
  Meta-Llama-3-8B-Instruct,0.58
10
  Meta-Llama-3.1-70B-Instruct,0.68
11
+ Qwen2_5_3B_Instruct,0.4
12
  SeaLLMs-v3-7B-Chat,0.47
13
+ Qwen2_5_72B_Instruct,0.72
14
  gemma-2-9b-it,0.58
15
  Meta-Llama-3-70B-Instruct,0.63
16
+ Qwen2_5_14B_Instruct,0.6
17
  sg_llama3_70b_inst,0.69
18
  gemma-2-2b-it,0.4
19
  llama3-8b-cpt-sea-lionv2-instruct,0.56
20
+ Qwen2_5_0_5B_Instruct,0.27
21
  GPT4o_0513,0.77
22
+ cross_openhermes_llama3_70b_4096_inst,0.68
23
  Meta-Llama-3.1-8B,0.43
results/cultural_reasoning/zero_shot/sg_eval.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6796116504854369
3
  Meta-Llama-3.1-8B-Instruct,0.5728155339805825
 
 
 
4
  Qwen2-72B-Instruct,0.7378640776699029
 
5
  Meta-Llama-3-8B-Instruct,0.6504854368932039
6
  Meta-Llama-3.1-70B-Instruct,0.7184466019417476
 
7
  SeaLLMs-v3-7B-Chat,0.7184466019417476
 
8
  gemma-2-9b-it,0.6699029126213593
9
  Meta-Llama-3-70B-Instruct,0.7087378640776699
 
10
  sg_llama3_70b_inst,0.6699029126213593
11
  gemma-2-2b-it,0.5533980582524272
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6504854368932039
 
13
  GPT4o_0513,0.8446601941747572
 
14
  Meta-Llama-3.1-8B,0.39805825242718446
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6796116504854369
3
  Meta-Llama-3.1-8B-Instruct,0.5728155339805825
4
+ Qwen2_5_32B_Instruct,0.7184466019417476
5
+ Qwen2_5_7B_Instruct,0.6699029126213593
6
+ Qwen2_5_1_5B_Instruct,0.5048543689320388
7
  Qwen2-72B-Instruct,0.7378640776699029
8
+ cross_openhermes_llama3_8b_4096_inst,0.6019417475728155
9
  Meta-Llama-3-8B-Instruct,0.6504854368932039
10
  Meta-Llama-3.1-70B-Instruct,0.7184466019417476
11
+ Qwen2_5_3B_Instruct,0.6310679611650486
12
  SeaLLMs-v3-7B-Chat,0.7184466019417476
13
+ Qwen2_5_72B_Instruct,0.7669902912621359
14
  gemma-2-9b-it,0.6699029126213593
15
  Meta-Llama-3-70B-Instruct,0.7087378640776699
16
+ Qwen2_5_14B_Instruct,0.7669902912621359
17
  sg_llama3_70b_inst,0.6699029126213593
18
  gemma-2-2b-it,0.5533980582524272
19
  llama3-8b-cpt-sea-lionv2-instruct,0.6504854368932039
20
+ Qwen2_5_0_5B_Instruct,0.4077669902912621
21
  GPT4o_0513,0.8446601941747572
22
+ cross_openhermes_llama3_70b_4096_inst,0.6796116504854369
23
  Meta-Llama-3.1-8B,0.39805825242718446
results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6323529411764706
3
  Meta-Llama-3.1-8B-Instruct,0.5294117647058824
 
 
 
4
  Qwen2-72B-Instruct,0.6764705882352942
 
5
  Meta-Llama-3-8B-Instruct,0.5882352941176471
6
  Meta-Llama-3.1-70B-Instruct,0.6617647058823529
 
7
  SeaLLMs-v3-7B-Chat,0.5882352941176471
 
8
  gemma-2-9b-it,0.6029411764705882
9
  Meta-Llama-3-70B-Instruct,0.6617647058823529
 
10
  sg_llama3_70b_inst,0.6176470588235294
11
  gemma-2-2b-it,0.4852941176470588
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6617647058823529
 
13
  GPT4o_0513,0.8088235294117647
 
14
  Meta-Llama-3.1-8B,0.4117647058823529
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6323529411764706
3
  Meta-Llama-3.1-8B-Instruct,0.5294117647058824
4
+ Qwen2_5_32B_Instruct,0.6470588235294118
5
+ Qwen2_5_7B_Instruct,0.5882352941176471
6
+ Qwen2_5_1_5B_Instruct,0.47058823529411764
7
  Qwen2-72B-Instruct,0.6764705882352942
8
+ cross_openhermes_llama3_8b_4096_inst,0.6029411764705882
9
  Meta-Llama-3-8B-Instruct,0.5882352941176471
10
  Meta-Llama-3.1-70B-Instruct,0.6617647058823529
11
+ Qwen2_5_3B_Instruct,0.5882352941176471
12
  SeaLLMs-v3-7B-Chat,0.5882352941176471
13
+ Qwen2_5_72B_Instruct,0.7205882352941176
14
  gemma-2-9b-it,0.6029411764705882
15
  Meta-Llama-3-70B-Instruct,0.6617647058823529
16
+ Qwen2_5_14B_Instruct,0.6911764705882353
17
  sg_llama3_70b_inst,0.6176470588235294
18
  gemma-2-2b-it,0.4852941176470588
19
  llama3-8b-cpt-sea-lionv2-instruct,0.6617647058823529
20
+ Qwen2_5_0_5B_Instruct,0.36764705882352944
21
  GPT4o_0513,0.8088235294117647
22
+ cross_openhermes_llama3_70b_4096_inst,0.5882352941176471
23
  Meta-Llama-3.1-8B,0.4117647058823529
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv CHANGED
@@ -1,3 +1,12 @@
1
  Model,Accuracy
 
 
 
 
 
 
2
  Meta-Llama-3-70B-Instruct,0.8381818181818181
 
3
  sg_llama3_70b_inst,0.8436363636363636
 
 
 
1
  Model,Accuracy
2
+ Qwen2_5_32B_Instruct,0.8436363636363636
3
+ Qwen2_5_7B_Instruct,0.78
4
+ Qwen2_5_1_5B_Instruct,0.6636363636363637
5
+ cross_openhermes_llama3_8b_4096_inst,0.7490909090909091
6
+ Qwen2_5_3B_Instruct,0.72
7
+ Qwen2_5_72B_Instruct,0.8618181818181818
8
  Meta-Llama-3-70B-Instruct,0.8381818181818181
9
+ Qwen2_5_14B_Instruct,0.8345454545454546
10
  sg_llama3_70b_inst,0.8436363636363636
11
+ Qwen2_5_0_5B_Instruct,0.5727272727272728
12
+ cross_openhermes_llama3_70b_4096_inst,0.8381818181818181
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -1,3 +1,12 @@
1
  Model,Accuracy
 
 
 
 
 
 
2
  Meta-Llama-3-70B-Instruct,50.599999999999994
 
3
  sg_llama3_70b_inst,51.959999999999994
 
 
 
1
  Model,Accuracy
2
+ Qwen2_5_32B_Instruct,53.2
3
+ Qwen2_5_7B_Instruct,50.279999999999994
4
+ Qwen2_5_1_5B_Instruct,44.480000000000004
5
+ cross_openhermes_llama3_8b_4096_inst,51.6
6
+ Qwen2_5_3B_Instruct,47.24
7
+ Qwen2_5_72B_Instruct,53.32
8
  Meta-Llama-3-70B-Instruct,50.599999999999994
9
+ Qwen2_5_14B_Instruct,53.2
10
  sg_llama3_70b_inst,51.959999999999994
11
+ Qwen2_5_0_5B_Instruct,35.28
12
+ cross_openhermes_llama3_70b_4096_inst,53.2
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7289719626168224
3
  Meta-Llama-3.1-8B-Instruct,0.7289719626168224
 
 
 
4
  Qwen2-72B-Instruct,0.8785046728971962
 
5
  Meta-Llama-3-8B-Instruct,0.7009345794392523
6
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
 
7
  SeaLLMs-v3-7B-Chat,0.6915887850467289
 
8
  gemma-2-9b-it,0.8130841121495327
9
  Meta-Llama-3-70B-Instruct,0.8691588785046729
 
10
  sg_llama3_70b_inst,0.8598130841121495
11
  gemma-2-2b-it,0.6915887850467289
12
  llama3-8b-cpt-sea-lionv2-instruct,0.7009345794392523
 
13
  GPT4o_0513,0.8691588785046729
 
14
  Meta-Llama-3.1-8B,0.3644859813084112
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7289719626168224
3
  Meta-Llama-3.1-8B-Instruct,0.7289719626168224
4
+ Qwen2_5_32B_Instruct,0.8411214953271028
5
+ Qwen2_5_7B_Instruct,0.7663551401869159
6
+ Qwen2_5_1_5B_Instruct,0.5981308411214953
7
  Qwen2-72B-Instruct,0.8785046728971962
8
+ cross_openhermes_llama3_8b_4096_inst,0.6448598130841121
9
  Meta-Llama-3-8B-Instruct,0.7009345794392523
10
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
11
+ Qwen2_5_3B_Instruct,0.6728971962616822
12
  SeaLLMs-v3-7B-Chat,0.6915887850467289
13
+ Qwen2_5_72B_Instruct,0.8598130841121495
14
  gemma-2-9b-it,0.8130841121495327
15
  Meta-Llama-3-70B-Instruct,0.8691588785046729
16
+ Qwen2_5_14B_Instruct,0.822429906542056
17
  sg_llama3_70b_inst,0.8598130841121495
18
  gemma-2-2b-it,0.6915887850467289
19
  llama3-8b-cpt-sea-lionv2-instruct,0.7009345794392523
20
+ Qwen2_5_0_5B_Instruct,0.37383177570093457
21
  GPT4o_0513,0.8691588785046729
22
+ cross_openhermes_llama3_70b_4096_inst,0.8317757009345794
23
  Meta-Llama-3.1-8B,0.3644859813084112
results/dialogue/zero_shot/dialogsum.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
2
  Qwen2-7B-Instruct,0.2092663759873139,0.30486100228371826,0.09413830506038247,0.22879982061784096
3
  Meta-Llama-3.1-8B-Instruct,0.24990743661648132,0.3515557454075673,0.12563120411564133,0.2725353603262354
 
 
 
4
  Qwen2-72B-Instruct,0.2183280630214023,0.316174552903144,0.10156543495268992,0.23724420120837297
 
5
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
6
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
 
7
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
 
8
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
9
  Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
 
10
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
11
  gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
12
  llama3-8b-cpt-sea-lionv2-instruct,0.25777587511641403,0.35911990072292727,0.13269121463917308,0.2815165099871418
 
13
  GPT4o_0513,0.2375730297294346,0.3364674648846549,0.11718194476069822,0.25906967954295057
 
 
1
  Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
2
  Qwen2-7B-Instruct,0.2092663759873139,0.30486100228371826,0.09413830506038247,0.22879982061784096
3
  Meta-Llama-3.1-8B-Instruct,0.24990743661648132,0.3515557454075673,0.12563120411564133,0.2725353603262354
4
+ Qwen2_5_32B_Instruct,0.2393912015484827,0.3451081398022419,0.11160543395371676,0.26146003088948944
5
+ Qwen2_5_7B_Instruct,0.2502928721533066,0.35566069744050016,0.12210269253668227,0.27311522648273734
6
+ Qwen2_5_1_5B_Instruct,0.20263242988485167,0.30002072253966694,0.08416670238558713,0.22370986472930096
7
  Qwen2-72B-Instruct,0.2183280630214023,0.316174552903144,0.10156543495268992,0.23724420120837297
8
+ cross_openhermes_llama3_8b_4096_inst,0.2519360474995096,0.3481981488260775,0.13235699958788102,0.2752529940845703
9
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
10
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
11
+ Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
12
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
13
+ Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
14
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
15
  Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
16
+ Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
17
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
18
  gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
19
  llama3-8b-cpt-sea-lionv2-instruct,0.25777587511641403,0.35911990072292727,0.13269121463917308,0.2815165099871418
20
+ Qwen2_5_0_5B_Instruct,0.19408176276624156,0.28989753303423227,0.07842728643649079,0.21392046882800164
21
  GPT4o_0513,0.2375730297294346,0.3364674648846549,0.11718194476069822,0.25906967954295057
22
+ cross_openhermes_llama3_70b_4096_inst,0.2727448037865066,0.3786585439052446,0.14288118221672744,0.2966946852375478
results/dialogue/zero_shot/dream.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.9353258206761391
3
  Meta-Llama-3.1-8B-Instruct,0.9039686428221461
 
 
 
4
  Qwen2-72B-Instruct,0.9612934835864773
 
5
  Meta-Llama-3-8B-Instruct,0.8946594806467418
6
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
 
7
  SeaLLMs-v3-7B-Chat,0.9265066144047036
 
8
  gemma-2-9b-it,0.9416952474277315
9
  Meta-Llama-3-70B-Instruct,0.9480646741793238
 
10
  sg_llama3_70b_inst,0.9524742773150416
11
  gemma-2-2b-it,0.8510534051935326
12
  llama3-8b-cpt-sea-lionv2-instruct,0.8858402743753062
 
13
  GPT4o_0513,0.9583537481626654
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.9353258206761391
3
  Meta-Llama-3.1-8B-Instruct,0.9039686428221461
4
+ Qwen2_5_32B_Instruct,0.9559039686428221
5
+ Qwen2_5_7B_Instruct,0.9348358647721705
6
+ Qwen2_5_1_5B_Instruct,0.8314551690347869
7
  Qwen2-72B-Instruct,0.9612934835864773
8
+ cross_openhermes_llama3_8b_4096_inst,0.8613424791768741
9
  Meta-Llama-3-8B-Instruct,0.8946594806467418
10
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
11
+ Qwen2_5_3B_Instruct,0.9029887310142087
12
  SeaLLMs-v3-7B-Chat,0.9265066144047036
13
+ Qwen2_5_72B_Instruct,0.9627633512983832
14
  gemma-2-9b-it,0.9416952474277315
15
  Meta-Llama-3-70B-Instruct,0.9480646741793238
16
+ Qwen2_5_14B_Instruct,0.9461048505634493
17
  sg_llama3_70b_inst,0.9524742773150416
18
  gemma-2-2b-it,0.8510534051935326
19
  llama3-8b-cpt-sea-lionv2-instruct,0.8858402743753062
20
+ Qwen2_5_0_5B_Instruct,0.6526212640862322
21
  GPT4o_0513,0.9583537481626654
22
+ cross_openhermes_llama3_70b_4096_inst,0.9514943655071043
results/dialogue/zero_shot/samsum.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
2
  Qwen2-7B-Instruct,0.25668781132950264,0.36375948458827556,0.12939804942125302,0.27690589997897935
3
  Meta-Llama-3.1-8B-Instruct,0.2891505262763006,0.4001228010515775,0.15677431231732958,0.31055446545999466
 
 
 
4
  Qwen2-72B-Instruct,0.2800906719573321,0.3887231369098802,0.15237661526996754,0.29917226369214855
 
5
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
6
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
 
7
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
 
8
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
9
  Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
 
10
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
11
  gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
12
  llama3-8b-cpt-sea-lionv2-instruct,0.306997595680581,0.4214048099551701,0.1709790451938523,0.3286089318927205
 
13
  GPT4o_0513,0.27736679291505306,0.386750207633093,0.14889081847621596,0.2964593526358502
 
 
1
  Model,Average,ROUGE-1,ROUGE-2,ROUGE-L
2
  Qwen2-7B-Instruct,0.25668781132950264,0.36375948458827556,0.12939804942125302,0.27690589997897935
3
  Meta-Llama-3.1-8B-Instruct,0.2891505262763006,0.4001228010515775,0.15677431231732958,0.31055446545999466
4
+ Qwen2_5_32B_Instruct,0.2844232627209405,0.3986263552639068,0.14766658533002341,0.3069768475688912
5
+ Qwen2_5_7B_Instruct,0.2987576845890178,0.4163299367235864,0.1599063413842216,0.32003677565924527
6
+ Qwen2_5_1_5B_Instruct,0.2333120091694482,0.34339111721032756,0.10195887716459845,0.25458603313341865
7
  Qwen2-72B-Instruct,0.2800906719573321,0.3887231369098802,0.15237661526996754,0.29917226369214855
8
+ cross_openhermes_llama3_8b_4096_inst,0.2961783902880866,0.40739117705606903,0.16227160132916457,0.3188723924790264
9
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
10
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
11
+ Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
12
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
13
+ Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
14
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
15
  Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
16
+ Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
17
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
18
  gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
19
  llama3-8b-cpt-sea-lionv2-instruct,0.306997595680581,0.4214048099551701,0.1709790451938523,0.3286089318927205
20
+ Qwen2_5_0_5B_Instruct,0.20766179544894214,0.3105872033328297,0.08726222085933319,0.22513596215466355
21
  GPT4o_0513,0.27736679291505306,0.386750207633093,0.14889081847621596,0.2964593526358502
22
+ cross_openhermes_llama3_70b_4096_inst,0.32140993091581,0.43714768967090817,0.18346420469253946,0.3436178983839823
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6545454545454545
3
  Meta-Llama-3.1-8B-Instruct,0.6545454545454545
 
 
 
4
  Qwen2-72B-Instruct,0.675
 
5
  Meta-Llama-3-8B-Instruct,0.6522727272727272
6
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
 
7
  SeaLLMs-v3-7B-Chat,0.6454545454545455
 
8
  gemma-2-9b-it,0.7477272727272727
9
  Meta-Llama-3-70B-Instruct,0.6909090909090909
 
10
  sg_llama3_70b_inst,0.7
11
  gemma-2-2b-it,0.6636363636363637
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6613636363636364
 
13
  GPT4o_0513,0.7068181818181818
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6545454545454545
3
  Meta-Llama-3.1-8B-Instruct,0.6545454545454545
4
+ Qwen2_5_32B_Instruct,0.6909090909090909
5
+ Qwen2_5_7B_Instruct,0.6636363636363637
6
+ Qwen2_5_1_5B_Instruct,0.5795454545454546
7
  Qwen2-72B-Instruct,0.675
8
+ cross_openhermes_llama3_8b_4096_inst,0.6704545454545454
9
  Meta-Llama-3-8B-Instruct,0.6522727272727272
10
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
11
+ Qwen2_5_3B_Instruct,0.5522727272727272
12
  SeaLLMs-v3-7B-Chat,0.6454545454545455
13
+ Qwen2_5_72B_Instruct,0.7068181818181818
14
  gemma-2-9b-it,0.7477272727272727
15
  Meta-Llama-3-70B-Instruct,0.6909090909090909
16
+ Qwen2_5_14B_Instruct,0.6954545454545454
17
  sg_llama3_70b_inst,0.7
18
  gemma-2-2b-it,0.6636363636363637
19
  llama3-8b-cpt-sea-lionv2-instruct,0.6613636363636364
20
+ Qwen2_5_0_5B_Instruct,0.37727272727272726
21
  GPT4o_0513,0.7068181818181818
22
+ cross_openhermes_llama3_70b_4096_inst,0.6863636363636364
results/emotion/zero_shot/sst2.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.9346330275229358
3
  Meta-Llama-3.1-8B-Instruct,0.8646788990825688
 
 
 
4
  Qwen2-72B-Instruct,0.9346330275229358
 
5
  Meta-Llama-3-8B-Instruct,0.8784403669724771
6
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
 
7
  SeaLLMs-v3-7B-Chat,0.9403669724770642
 
8
  gemma-2-9b-it,0.9311926605504587
9
  Meta-Llama-3-70B-Instruct,0.9495412844036697
 
10
  sg_llama3_70b_inst,0.9334862385321101
11
  gemma-2-2b-it,0.9243119266055045
12
  llama3-8b-cpt-sea-lionv2-instruct,0.9128440366972477
 
13
  GPT4o_0513,0.9415137614678899
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.9346330275229358
3
  Meta-Llama-3.1-8B-Instruct,0.8646788990825688
4
+ Qwen2_5_32B_Instruct,0.9472477064220184
5
+ Qwen2_5_7B_Instruct,0.9254587155963303
6
+ Qwen2_5_1_5B_Instruct,0.9231651376146789
7
  Qwen2-72B-Instruct,0.9346330275229358
8
+ cross_openhermes_llama3_8b_4096_inst,0.926605504587156
9
  Meta-Llama-3-8B-Instruct,0.8784403669724771
10
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
11
+ Qwen2_5_3B_Instruct,0.8245412844036697
12
  SeaLLMs-v3-7B-Chat,0.9403669724770642
13
+ Qwen2_5_72B_Instruct,0.9334862385321101
14
  gemma-2-9b-it,0.9311926605504587
15
  Meta-Llama-3-70B-Instruct,0.9495412844036697
16
+ Qwen2_5_14B_Instruct,0.9311926605504587
17
  sg_llama3_70b_inst,0.9334862385321101
18
  gemma-2-2b-it,0.9243119266055045
19
  llama3-8b-cpt-sea-lionv2-instruct,0.9128440366972477
20
+ Qwen2_5_0_5B_Instruct,0.7889908256880734
21
  GPT4o_0513,0.9415137614678899
22
+ cross_openhermes_llama3_70b_4096_inst,0.9380733944954128
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -1,14 +1,19 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.29408553325533265
3
  Meta-Llama-3.1-8B-Instruct,0.3765752579792989
 
4
  Qwen2-72B-Instruct,0.4043588265556185
 
5
  Meta-Llama-3-8B-Instruct,0.33079891679041123
6
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
 
7
  SeaLLMs-v3-7B-Chat,0.3594829412574955
8
  gemma-2-9b-it,0.40786563079141763
9
  Meta-Llama-3-70B-Instruct,0.3830092775167675
 
10
  sg_llama3_70b_inst,0.4086440304524362
11
  gemma-2-2b-it,0.3482500758113138
12
  llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
13
  GPT4o_0513,0.42589589086974855
 
14
  Meta-Llama-3.1-8B,0.008893689222008793
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.29408553325533265
3
  Meta-Llama-3.1-8B-Instruct,0.3765752579792989
4
+ Qwen2_5_7B_Instruct,0.36472669481333536
5
  Qwen2-72B-Instruct,0.4043588265556185
6
+ cross_openhermes_llama3_8b_4096_inst,0.37782883404862155
7
  Meta-Llama-3-8B-Instruct,0.33079891679041123
8
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
9
+ Qwen2_5_3B_Instruct,0.3316936422167389
10
  SeaLLMs-v3-7B-Chat,0.3594829412574955
11
  gemma-2-9b-it,0.40786563079141763
12
  Meta-Llama-3-70B-Instruct,0.3830092775167675
13
+ Qwen2_5_14B_Instruct,0.3901044620348051
14
  sg_llama3_70b_inst,0.4086440304524362
15
  gemma-2-2b-it,0.3482500758113138
16
  llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
17
  GPT4o_0513,0.42589589086974855
18
+ cross_openhermes_llama3_70b_4096_inst,0.4206616934730876
19
  Meta-Llama-3.1-8B,0.008893689222008793
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -1,14 +1,19 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.24106736560355876
3
  Meta-Llama-3.1-8B-Instruct,0.31019605539004524
 
4
  Qwen2-72B-Instruct,0.33005323227052946
 
5
  Meta-Llama-3-8B-Instruct,0.2647448190950291
6
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
 
7
  SeaLLMs-v3-7B-Chat,0.30981028289420137
8
  gemma-2-9b-it,0.3367700653885
9
  Meta-Llama-3-70B-Instruct,0.3230140263371192
 
10
  sg_llama3_70b_inst,0.34258533717783785
11
  gemma-2-2b-it,0.27518909199172303
12
  llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
13
  GPT4o_0513,0.36219303373759176
 
14
  Meta-Llama-3.1-8B,0.0064729173628987014
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.24106736560355876
3
  Meta-Llama-3.1-8B-Instruct,0.31019605539004524
4
+ Qwen2_5_7B_Instruct,0.3027564749728372
5
  Qwen2-72B-Instruct,0.33005323227052946
6
+ cross_openhermes_llama3_8b_4096_inst,0.28905588559612455
7
  Meta-Llama-3-8B-Instruct,0.2647448190950291
8
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
9
+ Qwen2_5_3B_Instruct,0.27312609009801636
10
  SeaLLMs-v3-7B-Chat,0.30981028289420137
11
  gemma-2-9b-it,0.3367700653885
12
  Meta-Llama-3-70B-Instruct,0.3230140263371192
13
+ Qwen2_5_14B_Instruct,0.32198218156960645
14
  sg_llama3_70b_inst,0.34258533717783785
15
  gemma-2-2b-it,0.27518909199172303
16
  llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
17
  GPT4o_0513,0.36219303373759176
18
+ cross_openhermes_llama3_70b_4096_inst,0.3538368711937718
19
  Meta-Llama-3.1-8B,0.0064729173628987014
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -1,14 +1,19 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.2113761361724575
3
  Meta-Llama-3.1-8B-Instruct,0.23889886925287113
 
4
  Qwen2-72B-Instruct,0.23893268538329387
 
5
  Meta-Llama-3-8B-Instruct,0.199495011482748
6
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
 
7
  SeaLLMs-v3-7B-Chat,0.2516593644617717
8
  gemma-2-9b-it,0.267527968123433
9
  Meta-Llama-3-70B-Instruct,0.24397819518058994
 
10
  sg_llama3_70b_inst,0.26000707510414633
11
  gemma-2-2b-it,0.21164036008441425
12
  llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
13
  GPT4o_0513,0.27722306559544163
 
14
  Meta-Llama-3.1-8B,0.0030426517414972854
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.2113761361724575
3
  Meta-Llama-3.1-8B-Instruct,0.23889886925287113
4
+ Qwen2_5_7B_Instruct,0.2437311220019033
5
  Qwen2-72B-Instruct,0.23893268538329387
6
+ cross_openhermes_llama3_8b_4096_inst,0.2258901846942186
7
  Meta-Llama-3-8B-Instruct,0.199495011482748
8
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
9
+ Qwen2_5_3B_Instruct,0.2245195134637718
10
  SeaLLMs-v3-7B-Chat,0.2516593644617717
11
  gemma-2-9b-it,0.267527968123433
12
  Meta-Llama-3-70B-Instruct,0.24397819518058994
13
+ Qwen2_5_14B_Instruct,0.2627781200417998
14
  sg_llama3_70b_inst,0.26000707510414633
15
  gemma-2-2b-it,0.21164036008441425
16
  llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
17
  GPT4o_0513,0.27722306559544163
18
+ cross_openhermes_llama3_70b_4096_inst,0.27230844604661014
19
  Meta-Llama-3.1-8B,0.0030426517414972854
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -1,14 +1,19 @@
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.28031997065822994
3
  Meta-Llama-3.1-8B-Instruct,0.3700921225177551
 
4
  Qwen2-72B-Instruct,0.40796892621611885
 
5
  Meta-Llama-3-8B-Instruct,0.31625368345049
6
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
 
7
  SeaLLMs-v3-7B-Chat,0.3484133510670942
8
  gemma-2-9b-it,0.4234100394581857
9
  Meta-Llama-3-70B-Instruct,0.3957287030176054
 
10
  sg_llama3_70b_inst,0.4163761508073963
11
  gemma-2-2b-it,0.33737270487369614
12
  llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
13
  GPT4o_0513,0.451496635720668
 
14
  Meta-Llama-3.1-8B,0.00798239824596684
 
1
  Model,BLEU
2
  Qwen2-7B-Instruct,0.28031997065822994
3
  Meta-Llama-3.1-8B-Instruct,0.3700921225177551
4
+ Qwen2_5_7B_Instruct,0.3466422765302921
5
  Qwen2-72B-Instruct,0.40796892621611885
6
+ cross_openhermes_llama3_8b_4096_inst,0.37996622288549425
7
  Meta-Llama-3-8B-Instruct,0.31625368345049
8
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
9
+ Qwen2_5_3B_Instruct,0.31056841204320457
10
  SeaLLMs-v3-7B-Chat,0.3484133510670942
11
  gemma-2-9b-it,0.4234100394581857
12
  Meta-Llama-3-70B-Instruct,0.3957287030176054
13
+ Qwen2_5_14B_Instruct,0.3841042767934729
14
  sg_llama3_70b_inst,0.4163761508073963
15
  gemma-2-2b-it,0.33737270487369614
16
  llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
17
  GPT4o_0513,0.451496635720668
18
+ cross_openhermes_llama3_70b_4096_inst,0.43447247409976697
19
  Meta-Llama-3.1-8B,0.00798239824596684
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.9244577412116679
3
  Meta-Llama-3.1-8B-Instruct,0.8672400897531788
 
 
 
4
  Qwen2-72B-Instruct,0.9611069558713538
 
5
  Meta-Llama-3-8B-Instruct,0.8515332834704562
6
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
 
7
  SeaLLMs-v3-7B-Chat,0.9143605086013463
 
8
  gemma-2-9b-it,0.9222139117427075
9
  Meta-Llama-3-70B-Instruct,0.9521316379955124
 
10
  sg_llama3_70b_inst,0.9289454001495886
11
  gemma-2-2b-it,0.7700074794315632
12
  llama3-8b-cpt-sea-lionv2-instruct,0.8672400897531788
 
13
  GPT4o_0513,0.9648466716529543
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.9244577412116679
3
  Meta-Llama-3.1-8B-Instruct,0.8672400897531788
4
+ Qwen2_5_32B_Instruct,0.9603590127150337
5
+ Qwen2_5_7B_Instruct,0.9121166791323859
6
+ Qwen2_5_1_5B_Instruct,0.793941660433807
7
  Qwen2-72B-Instruct,0.9611069558713538
8
+ cross_openhermes_llama3_8b_4096_inst,0.7718773373223635
9
  Meta-Llama-3-8B-Instruct,0.8515332834704562
10
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
11
+ Qwen2_5_3B_Instruct,0.8668661181750187
12
  SeaLLMs-v3-7B-Chat,0.9143605086013463
13
+ Qwen2_5_72B_Instruct,0.9596110695587136
14
  gemma-2-9b-it,0.9222139117427075
15
  Meta-Llama-3-70B-Instruct,0.9521316379955124
16
+ Qwen2_5_14B_Instruct,0.9502617801047121
17
  sg_llama3_70b_inst,0.9289454001495886
18
  gemma-2-2b-it,0.7700074794315632
19
  llama3-8b-cpt-sea-lionv2-instruct,0.8672400897531788
20
+ Qwen2_5_0_5B_Instruct,0.612939416604338
21
  GPT4o_0513,0.9648466716529543
22
+ cross_openhermes_llama3_70b_4096_inst,0.9270755422587883
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7871524448705657
3
  Meta-Llama-3.1-8B-Instruct,0.6673058485139022
 
 
 
4
  Qwen2-72B-Instruct,0.8341323106423778
 
5
  Meta-Llama-3-8B-Instruct,0.6548418024928092
6
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
 
7
  SeaLLMs-v3-7B-Chat,0.785234899328859
 
8
  gemma-2-9b-it,0.7938638542665388
9
  Meta-Llama-3-70B-Instruct,0.835091083413231
 
10
  sg_llama3_70b_inst,0.8696069031639502
11
  gemma-2-2b-it,0.6749760306807286
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6078619367209971
 
13
  GPT4o_0513,0.8398849472674976
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7871524448705657
3
  Meta-Llama-3.1-8B-Instruct,0.6673058485139022
4
+ Qwen2_5_32B_Instruct,0.8427612655800575
5
+ Qwen2_5_7B_Instruct,0.7909875359539789
6
+ Qwen2_5_1_5B_Instruct,0.7497603068072867
7
  Qwen2-72B-Instruct,0.8341323106423778
8
+ cross_openhermes_llama3_8b_4096_inst,0.7660594439117929
9
  Meta-Llama-3-8B-Instruct,0.6548418024928092
10
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
11
+ Qwen2_5_3B_Instruct,0.6644295302013423
12
  SeaLLMs-v3-7B-Chat,0.785234899328859
13
+ Qwen2_5_72B_Instruct,0.8571428571428571
14
  gemma-2-9b-it,0.7938638542665388
15
  Meta-Llama-3-70B-Instruct,0.835091083413231
16
+ Qwen2_5_14B_Instruct,0.8063279002876318
17
  sg_llama3_70b_inst,0.8696069031639502
18
  gemma-2-2b-it,0.6749760306807286
19
  llama3-8b-cpt-sea-lionv2-instruct,0.6078619367209971
20
+ Qwen2_5_0_5B_Instruct,0.6116970278044104
21
  GPT4o_0513,0.8398849472674976
22
+ cross_openhermes_llama3_70b_4096_inst,0.8456375838926175
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7295
3
  Meta-Llama-3.1-8B-Instruct,0.4825
 
 
4
  Qwen2-72B-Instruct,0.7925
 
5
  Meta-Llama-3-8B-Instruct,0.546
6
  Meta-Llama-3.1-70B-Instruct,0.7015
 
7
  SeaLLMs-v3-7B-Chat,0.653
 
8
  gemma-2-9b-it,0.716
9
  Meta-Llama-3-70B-Instruct,0.6709421285692472
 
10
  sg_llama3_70b_inst,0.7685
11
  gemma-2-2b-it,0.6185
12
  llama3-8b-cpt-sea-lionv2-instruct,0.5765
 
13
  GPT4o_0513,0.8335
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7295
3
  Meta-Llama-3.1-8B-Instruct,0.4825
4
+ Qwen2_5_7B_Instruct,0.8105
5
+ Qwen2_5_1_5B_Instruct,0.6045
6
  Qwen2-72B-Instruct,0.7925
7
+ cross_openhermes_llama3_8b_4096_inst,0.57
8
  Meta-Llama-3-8B-Instruct,0.546
9
  Meta-Llama-3.1-70B-Instruct,0.7015
10
+ Qwen2_5_3B_Instruct,0.7465
11
  SeaLLMs-v3-7B-Chat,0.653
12
+ Qwen2_5_72B_Instruct,0.8445
13
  gemma-2-9b-it,0.716
14
  Meta-Llama-3-70B-Instruct,0.6709421285692472
15
+ Qwen2_5_14B_Instruct,0.818
16
  sg_llama3_70b_inst,0.7685
17
  gemma-2-2b-it,0.6185
18
  llama3-8b-cpt-sea-lionv2-instruct,0.5765
19
+ Qwen2_5_0_5B_Instruct,0.5095
20
  GPT4o_0513,0.8335
21
+ cross_openhermes_llama3_70b_4096_inst,0.743
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7867647058823529
3
  Meta-Llama-3.1-8B-Instruct,0.6740196078431373
 
 
4
  Qwen2-72B-Instruct,0.8063725490196079
 
5
  Meta-Llama-3-8B-Instruct,0.678921568627451
6
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
 
7
  SeaLLMs-v3-7B-Chat,0.7475490196078431
 
8
  gemma-2-9b-it,0.7401960784313726
9
  Meta-Llama-3-70B-Instruct,0.7598039215686274
 
10
  sg_llama3_70b_inst,0.7892156862745098
11
  gemma-2-2b-it,0.7083333333333334
12
  llama3-8b-cpt-sea-lionv2-instruct,0.5833333333333334
 
13
  GPT4o_0513,0.7377450980392157
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7867647058823529
3
  Meta-Llama-3.1-8B-Instruct,0.6740196078431373
4
+ Qwen2_5_7B_Instruct,0.7058823529411765
5
+ Qwen2_5_1_5B_Instruct,0.6838235294117647
6
  Qwen2-72B-Instruct,0.8063725490196079
7
+ cross_openhermes_llama3_8b_4096_inst,0.7303921568627451
8
  Meta-Llama-3-8B-Instruct,0.678921568627451
9
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
10
+ Qwen2_5_3B_Instruct,0.5661764705882353
11
  SeaLLMs-v3-7B-Chat,0.7475490196078431
12
+ Qwen2_5_72B_Instruct,0.8014705882352942
13
  gemma-2-9b-it,0.7401960784313726
14
  Meta-Llama-3-70B-Instruct,0.7598039215686274
15
+ Qwen2_5_14B_Instruct,0.7794117647058824
16
  sg_llama3_70b_inst,0.7892156862745098
17
  gemma-2-2b-it,0.7083333333333334
18
  llama3-8b-cpt-sea-lionv2-instruct,0.5833333333333334
19
+ Qwen2_5_0_5B_Instruct,0.5759803921568627
20
  GPT4o_0513,0.7377450980392157
21
+ cross_openhermes_llama3_70b_4096_inst,0.7818627450980392
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6542372881355932
3
  Meta-Llama-3.1-8B-Instruct,0.40983050847457625
 
 
4
  Qwen2-72B-Instruct,0.7820338983050847
 
5
  Meta-Llama-3-8B-Instruct,0.44033898305084745
6
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
 
7
  SeaLLMs-v3-7B-Chat,0.5698305084745763
 
8
  gemma-2-9b-it,0.6189830508474576
9
  Meta-Llama-3-70B-Instruct,0.5928813559322034
 
10
  sg_llama3_70b_inst,0.6420338983050847
11
  gemma-2-2b-it,0.43322033898305085
12
  llama3-8b-cpt-sea-lionv2-instruct,0.45559322033898303
 
13
  GPT4o_0513,0.7308474576271187
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.6542372881355932
3
  Meta-Llama-3.1-8B-Instruct,0.40983050847457625
4
+ Qwen2_5_32B_Instruct,0.7742372881355932
5
+ Qwen2_5_1_5B_Instruct,0.5135593220338983
6
  Qwen2-72B-Instruct,0.7820338983050847
7
+ cross_openhermes_llama3_8b_4096_inst,0.5183050847457628
8
  Meta-Llama-3-8B-Instruct,0.44033898305084745
9
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
10
+ Qwen2_5_3B_Instruct,0.6145762711864406
11
  SeaLLMs-v3-7B-Chat,0.5698305084745763
12
+ Qwen2_5_72B_Instruct,0.7684745762711864
13
  gemma-2-9b-it,0.6189830508474576
14
  Meta-Llama-3-70B-Instruct,0.5928813559322034
15
+ Qwen2_5_14B_Instruct,0.7538983050847458
16
  sg_llama3_70b_inst,0.6420338983050847
17
  gemma-2-2b-it,0.43322033898305085
18
  llama3-8b-cpt-sea-lionv2-instruct,0.45559322033898303
19
+ Qwen2_5_0_5B_Instruct,0.3847457627118644
20
  GPT4o_0513,0.7308474576271187
21
+ cross_openhermes_llama3_70b_4096_inst,0.6647457627118644
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.8154859967051071
3
  Meta-Llama-3.1-8B-Instruct,0.5777045579352005
 
 
4
  Qwen2-72B-Instruct,0.8887058392824455
 
5
  Meta-Llama-3-8B-Instruct,0.6025993044114956
6
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
 
7
  SeaLLMs-v3-7B-Chat,0.7159070107999268
 
8
  gemma-2-9b-it,0.9070107999267801
9
  Meta-Llama-3-70B-Instruct,0.876807614863628
 
10
  sg_llama3_70b_inst,0.9004210140948197
11
  gemma-2-2b-it,0.7792421746293245
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6101043382756727
 
13
  GPT4o_0513,0.9304411495515285
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.8154859967051071
3
  Meta-Llama-3.1-8B-Instruct,0.5777045579352005
4
+ Qwen2_5_7B_Instruct,0.8652754896576972
5
+ Qwen2_5_1_5B_Instruct,0.6148636280431997
6
  Qwen2-72B-Instruct,0.8887058392824455
7
+ cross_openhermes_llama3_8b_4096_inst,0.7596558667398865
8
  Meta-Llama-3-8B-Instruct,0.6025993044114956
9
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
10
+ Qwen2_5_3B_Instruct,0.7645982061138569
11
  SeaLLMs-v3-7B-Chat,0.7159070107999268
12
+ Qwen2_5_72B_Instruct,0.9082921471718836
13
  gemma-2-9b-it,0.9070107999267801
14
  Meta-Llama-3-70B-Instruct,0.876807614863628
15
+ Qwen2_5_14B_Instruct,0.9079260479589969
16
  sg_llama3_70b_inst,0.9004210140948197
17
  gemma-2-2b-it,0.7792421746293245
18
  llama3-8b-cpt-sea-lionv2-instruct,0.6101043382756727
19
+ Qwen2_5_0_5B_Instruct,0.5464030752333883
20
  GPT4o_0513,0.9304411495515285
21
+ cross_openhermes_llama3_70b_4096_inst,0.8943803770821893
results/fundamental_nlp_tasks/zero_shot/qqp.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.781
3
  Meta-Llama-3.1-8B-Instruct,0.5645
 
 
4
  Qwen2-72B-Instruct,0.8065
 
5
  Meta-Llama-3-8B-Instruct,0.563
6
  Meta-Llama-3.1-70B-Instruct,0.815
 
7
  SeaLLMs-v3-7B-Chat,0.7625
 
8
  gemma-2-9b-it,0.7775
9
  Meta-Llama-3-70B-Instruct,0.7876082117239673
 
10
  sg_llama3_70b_inst,0.804
11
  gemma-2-2b-it,0.761
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6225
 
13
  GPT4o_0513,0.8085
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.781
3
  Meta-Llama-3.1-8B-Instruct,0.5645
4
+ Qwen2_5_7B_Instruct,0.76
5
+ Qwen2_5_1_5B_Instruct,0.731
6
  Qwen2-72B-Instruct,0.8065
7
+ cross_openhermes_llama3_8b_4096_inst,0.7495
8
  Meta-Llama-3-8B-Instruct,0.563
9
  Meta-Llama-3.1-70B-Instruct,0.815
10
+ Qwen2_5_3B_Instruct,0.7415
11
  SeaLLMs-v3-7B-Chat,0.7625
12
+ Qwen2_5_72B_Instruct,0.8315
13
  gemma-2-9b-it,0.7775
14
  Meta-Llama-3-70B-Instruct,0.7876082117239673
15
+ Qwen2_5_14B_Instruct,0.8255
16
  sg_llama3_70b_inst,0.804
17
  gemma-2-2b-it,0.761
18
  llama3-8b-cpt-sea-lionv2-instruct,0.6225
19
+ Qwen2_5_0_5B_Instruct,0.619
20
  GPT4o_0513,0.8085
21
+ cross_openhermes_llama3_70b_4096_inst,0.801
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.8231046931407943
3
  Meta-Llama-3.1-8B-Instruct,0.6750902527075813
 
 
4
  Qwen2-72B-Instruct,0.8447653429602888
 
5
  Meta-Llama-3-8B-Instruct,0.6173285198555957
6
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
 
7
  SeaLLMs-v3-7B-Chat,0.7870036101083032
 
8
  gemma-2-9b-it,0.7472924187725631
9
  Meta-Llama-3-70B-Instruct,0.8086642599277978
 
10
  sg_llama3_70b_inst,0.8916967509025271
11
  gemma-2-2b-it,0.7292418772563177
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6859205776173285
 
13
  GPT4o_0513,0.8700361010830325
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.8231046931407943
3
  Meta-Llama-3.1-8B-Instruct,0.6750902527075813
4
+ Qwen2_5_7B_Instruct,0.8592057761732852
5
+ Qwen2_5_1_5B_Instruct,0.703971119133574
6
  Qwen2-72B-Instruct,0.8447653429602888
7
+ cross_openhermes_llama3_8b_4096_inst,0.6967509025270758
8
  Meta-Llama-3-8B-Instruct,0.6173285198555957
9
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
10
+ Qwen2_5_3B_Instruct,0.779783393501805
11
  SeaLLMs-v3-7B-Chat,0.7870036101083032
12
+ Qwen2_5_72B_Instruct,0.9025270758122743
13
  gemma-2-9b-it,0.7472924187725631
14
  Meta-Llama-3-70B-Instruct,0.8086642599277978
15
+ Qwen2_5_14B_Instruct,0.8664259927797834
16
  sg_llama3_70b_inst,0.8916967509025271
17
  gemma-2-2b-it,0.7292418772563177
18
  llama3-8b-cpt-sea-lionv2-instruct,0.6859205776173285
19
+ Qwen2_5_0_5B_Instruct,0.5992779783393501
20
  GPT4o_0513,0.8700361010830325
21
+ cross_openhermes_llama3_70b_4096_inst,0.8953068592057761
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -1,13 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7183098591549296
3
  Meta-Llama-3.1-8B-Instruct,0.49295774647887325
 
 
4
  Qwen2-72B-Instruct,0.8873239436619719
 
5
  Meta-Llama-3-8B-Instruct,0.4788732394366197
6
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
 
7
  SeaLLMs-v3-7B-Chat,0.5915492957746479
 
8
  gemma-2-9b-it,0.7746478873239436
9
  Meta-Llama-3-70B-Instruct,0.7887323943661971
 
10
  sg_llama3_70b_inst,0.8309859154929577
11
  gemma-2-2b-it,0.43661971830985913
12
  llama3-8b-cpt-sea-lionv2-instruct,0.5774647887323944
 
13
  GPT4o_0513,0.9295774647887324
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7183098591549296
3
  Meta-Llama-3.1-8B-Instruct,0.49295774647887325
4
+ Qwen2_5_7B_Instruct,0.7605633802816901
5
+ Qwen2_5_1_5B_Instruct,0.4647887323943662
6
  Qwen2-72B-Instruct,0.8873239436619719
7
+ cross_openhermes_llama3_8b_4096_inst,0.4647887323943662
8
  Meta-Llama-3-8B-Instruct,0.4788732394366197
9
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
10
+ Qwen2_5_3B_Instruct,0.647887323943662
11
  SeaLLMs-v3-7B-Chat,0.5915492957746479
12
+ Qwen2_5_72B_Instruct,0.8169014084507042
13
  gemma-2-9b-it,0.7746478873239436
14
  Meta-Llama-3-70B-Instruct,0.7887323943661971
15
+ Qwen2_5_14B_Instruct,0.8309859154929577
16
  sg_llama3_70b_inst,0.8309859154929577
17
  gemma-2-2b-it,0.43661971830985913
18
  llama3-8b-cpt-sea-lionv2-instruct,0.5774647887323944
19
+ Qwen2_5_0_5B_Instruct,0.43661971830985913
20
  GPT4o_0513,0.9295774647887324
21
+ cross_openhermes_llama3_70b_4096_inst,0.8450704225352113
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7615193026151931
3
  Meta-Llama-3.1-8B-Instruct,0.5149439601494396
 
 
 
4
  Qwen2-72B-Instruct,0.8312577833125778
 
5
  Meta-Llama-3-8B-Instruct,0.4775840597758406
6
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
 
7
  SeaLLMs-v3-7B-Chat,0.7658779576587795
 
8
  gemma-2-9b-it,0.5523038605230386
9
  Meta-Llama-3-70B-Instruct,0.6220423412204235
 
10
  sg_llama3_70b_inst,0.5722291407222914
11
  gemma-2-2b-it,0.4352428393524284
12
  llama3-8b-cpt-sea-lionv2-instruct,0.49813200498132004
 
13
  GPT4o_0513,0.7073474470734745
 
14
  Meta-Llama-3.1-8B,0.3742216687422167
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7615193026151931
3
  Meta-Llama-3.1-8B-Instruct,0.5149439601494396
4
+ Qwen2_5_32B_Instruct,0.8262764632627646
5
+ Qwen2_5_7B_Instruct,0.7459526774595268
6
+ Qwen2_5_1_5B_Instruct,0.5971357409713575
7
  Qwen2-72B-Instruct,0.8312577833125778
8
+ cross_openhermes_llama3_8b_4096_inst,0.44707347447073476
9
  Meta-Llama-3-8B-Instruct,0.4775840597758406
10
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
11
+ Qwen2_5_3B_Instruct,0.6537982565379825
12
  SeaLLMs-v3-7B-Chat,0.7658779576587795
13
+ Qwen2_5_72B_Instruct,0.8325031133250311
14
  gemma-2-9b-it,0.5523038605230386
15
  Meta-Llama-3-70B-Instruct,0.6220423412204235
16
+ Qwen2_5_14B_Instruct,0.7839352428393525
17
  sg_llama3_70b_inst,0.5722291407222914
18
  gemma-2-2b-it,0.4352428393524284
19
  llama3-8b-cpt-sea-lionv2-instruct,0.49813200498132004
20
+ Qwen2_5_0_5B_Instruct,0.41718555417185554
21
  GPT4o_0513,0.7073474470734745
22
+ cross_openhermes_llama3_70b_4096_inst,0.5734744707347447
23
  Meta-Llama-3.1-8B,0.3742216687422167
results/general_reasoning/zero_shot/cmmlu.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7727508202383008
3
  Meta-Llama-3.1-8B-Instruct,0.5246934898981178
 
 
 
4
  Qwen2-72B-Instruct,0.8293904334311863
 
5
  Meta-Llama-3-8B-Instruct,0.4839405974788465
6
  Meta-Llama-3.1-70B-Instruct,0.6814885166637886
 
7
  SeaLLMs-v3-7B-Chat,0.7684337765498187
 
8
  gemma-2-9b-it,0.5700224486271801
9
  Meta-Llama-3-70B-Instruct,0.6494560524952513
 
10
  sg_llama3_70b_inst,0.6044724572612675
11
  gemma-2-2b-it,0.4412882058366431
12
  llama3-8b-cpt-sea-lionv2-instruct,0.48929373165256435
 
13
  GPT4o_0513,0.7414954239336902
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7727508202383008
3
  Meta-Llama-3.1-8B-Instruct,0.5246934898981178
4
+ Qwen2_5_32B_Instruct,0.8273182524607149
5
+ Qwen2_5_7B_Instruct,0.7486617164565705
6
+ Qwen2_5_1_5B_Instruct,0.5975651873596961
7
  Qwen2-72B-Instruct,0.8293904334311863
8
+ cross_openhermes_llama3_8b_4096_inst,0.4547573821447073
9
  Meta-Llama-3-8B-Instruct,0.4839405974788465
10
  Meta-Llama-3.1-70B-Instruct,0.6814885166637886
11
+ Qwen2_5_3B_Instruct,0.6621481609393887
12
  SeaLLMs-v3-7B-Chat,0.7684337765498187
13
+ Qwen2_5_72B_Instruct,0.8343982041098256
14
  gemma-2-9b-it,0.5700224486271801
15
  Meta-Llama-3-70B-Instruct,0.6494560524952513
16
+ Qwen2_5_14B_Instruct,0.7807805214988776
17
  sg_llama3_70b_inst,0.6044724572612675
18
  gemma-2-2b-it,0.4412882058366431
19
  llama3-8b-cpt-sea-lionv2-instruct,0.48929373165256435
20
+ Qwen2_5_0_5B_Instruct,0.42056639613192887
21
  GPT4o_0513,0.7414954239336902
22
+ cross_openhermes_llama3_70b_4096_inst,0.6177689518217925
results/general_reasoning/zero_shot/indommlu.csv CHANGED
@@ -1,13 +1,22 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.5385539755657921
3
  Meta-Llama-3.1-8B-Instruct,0.5252687095266707
 
 
 
4
  Qwen2-72B-Instruct,0.6385606515788771
 
5
  Meta-Llama-3-8B-Instruct,0.5264703918819681
6
  Meta-Llama-3.1-70B-Instruct,0.6740770411910008
 
7
  SeaLLMs-v3-7B-Chat,0.5267374324053675
 
8
  gemma-2-9b-it,0.606983109686895
9
  Meta-Llama-3-70B-Instruct,0.6323519594098405
 
10
  sg_llama3_70b_inst,0.6394285332799252
11
  gemma-2-2b-it,0.48220842512851325
12
  llama3-8b-cpt-sea-lionv2-instruct,0.5252687095266707
 
13
  GPT4o_0513,0.7584618465852193
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.5385539755657921
3
  Meta-Llama-3.1-8B-Instruct,0.5252687095266707
4
+ Qwen2_5_32B_Instruct,0.6314840777087923
5
+ Qwen2_5_7B_Instruct,0.5600507376994459
6
+ Qwen2_5_1_5B_Instruct,0.4295346818879765
7
  Qwen2-72B-Instruct,0.6385606515788771
8
+ cross_openhermes_llama3_8b_4096_inst,0.5097803591695039
9
  Meta-Llama-3-8B-Instruct,0.5264703918819681
10
  Meta-Llama-3.1-70B-Instruct,0.6740770411910008
11
+ Qwen2_5_3B_Instruct,0.49656185326123237
12
  SeaLLMs-v3-7B-Chat,0.5267374324053675
13
+ Qwen2_5_72B_Instruct,0.6380933306629281
14
  gemma-2-9b-it,0.606983109686895
15
  Meta-Llama-3-70B-Instruct,0.6323519594098405
16
+ Qwen2_5_14B_Instruct,0.6009746979104079
17
  sg_llama3_70b_inst,0.6394285332799252
18
  gemma-2-2b-it,0.48220842512851325
19
  llama3-8b-cpt-sea-lionv2-instruct,0.5252687095266707
20
+ Qwen2_5_0_5B_Instruct,0.3279925228653448
21
  GPT4o_0513,0.7584618465852193
22
+ cross_openhermes_llama3_70b_4096_inst,0.6333533613725882
results/general_reasoning/zero_shot/mmlu.csv CHANGED
@@ -1,14 +1,23 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.672506256703611
3
  Meta-Llama-3.1-8B-Instruct,0.6037182695745441
 
 
 
4
  Qwen2-72B-Instruct,0.7922774401144083
 
5
  Meta-Llama-3-8B-Instruct,0.6005720414730068
6
  Meta-Llama-3.1-70B-Instruct,0.8058634250983197
 
7
  SeaLLMs-v3-7B-Chat,0.6670003575259207
 
8
  gemma-2-9b-it,0.7100464783696818
9
  Meta-Llama-3-70B-Instruct,0.7649624597783339
 
10
  sg_llama3_70b_inst,0.7407937075437969
11
  gemma-2-2b-it,0.5706828745084018
12
  llama3-8b-cpt-sea-lionv2-instruct,0.6130854486950303
 
13
  GPT4o_0513,0.8308187343582409
 
14
  Meta-Llama-3.1-8B,0.386271004647837
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.672506256703611
3
  Meta-Llama-3.1-8B-Instruct,0.6037182695745441
4
+ Qwen2_5_32B_Instruct,0.7996424740793707
5
+ Qwen2_5_7B_Instruct,0.6935287808366106
6
+ Qwen2_5_1_5B_Instruct,0.5646764390418305
7
  Qwen2-72B-Instruct,0.7922774401144083
8
+ cross_openhermes_llama3_8b_4096_inst,0.556381837683232
9
  Meta-Llama-3-8B-Instruct,0.6005720414730068
10
  Meta-Llama-3.1-70B-Instruct,0.8058634250983197
11
+ Qwen2_5_3B_Instruct,0.6118698605648909
12
  SeaLLMs-v3-7B-Chat,0.6670003575259207
13
+ Qwen2_5_72B_Instruct,0.8129424383267787
14
  gemma-2-9b-it,0.7100464783696818
15
  Meta-Llama-3-70B-Instruct,0.7649624597783339
16
+ Qwen2_5_14B_Instruct,0.7542366821594566
17
  sg_llama3_70b_inst,0.7407937075437969
18
  gemma-2-2b-it,0.5706828745084018
19
  llama3-8b-cpt-sea-lionv2-instruct,0.6130854486950303
20
+ Qwen2_5_0_5B_Instruct,0.461136932427601
21
  GPT4o_0513,0.8308187343582409
22
+ cross_openhermes_llama3_70b_4096_inst,0.7400071505184126
23
  Meta-Llama-3.1-8B,0.386271004647837
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -1,12 +1,21 @@
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7272727272727273
3
  Meta-Llama-3.1-8B-Instruct,0.42424242424242425
 
 
 
4
  Qwen2-72B-Instruct,0.5757575757575758
 
5
  Meta-Llama-3-8B-Instruct,0.3333333333333333
6
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
 
7
  SeaLLMs-v3-7B-Chat,0.5454545454545454
 
8
  gemma-2-9b-it,0.48484848484848486
9
  Meta-Llama-3-70B-Instruct,0.5151515151515151
 
10
  sg_llama3_70b_inst,0.42424242424242425
11
  gemma-2-2b-it,0.24242424242424243
12
  llama3-8b-cpt-sea-lionv2-instruct,0.30303030303030304
 
 
 
1
  Model,Accuracy
2
  Qwen2-7B-Instruct,0.7272727272727273
3
  Meta-Llama-3.1-8B-Instruct,0.42424242424242425
4
+ Qwen2_5_32B_Instruct,0.6060606060606061
5
+ Qwen2_5_7B_Instruct,0.6666666666666666
6
+ Qwen2_5_1_5B_Instruct,0.42424242424242425
7
  Qwen2-72B-Instruct,0.5757575757575758
8
+ cross_openhermes_llama3_8b_4096_inst,0.3333333333333333
9
  Meta-Llama-3-8B-Instruct,0.3333333333333333
10
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
11
+ Qwen2_5_3B_Instruct,0.5757575757575758
12
  SeaLLMs-v3-7B-Chat,0.5454545454545454
13
+ Qwen2_5_72B_Instruct,0.696969696969697
14
  gemma-2-9b-it,0.48484848484848486
15
  Meta-Llama-3-70B-Instruct,0.5151515151515151
16
+ Qwen2_5_14B_Instruct,0.6666666666666666
17
  sg_llama3_70b_inst,0.42424242424242425
18
  gemma-2-2b-it,0.24242424242424243
19
  llama3-8b-cpt-sea-lionv2-instruct,0.30303030303030304
20
+ Qwen2_5_0_5B_Instruct,0.36363636363636365
21
+ cross_openhermes_llama3_70b_4096_inst,0.42424242424242425