zhuohan-7 commited on
Commit
4972444
1 Parent(s): fb8024c

Upload folder using huggingface_hub

Browse files
Files changed (31) hide show
  1. results/cross_lingual/zero_shot/cross_logiqa.csv +1 -0
  2. results/cross_lingual/zero_shot/cross_mmlu.csv +1 -0
  3. results/cultural_reasoning/zero_shot/cn_eval.csv +1 -0
  4. results/cultural_reasoning/zero_shot/ph_eval.csv +1 -0
  5. results/cultural_reasoning/zero_shot/sg_eval.csv +1 -0
  6. results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv +1 -0
  7. results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv +1 -0
  8. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +1 -0
  9. results/cultural_reasoning/zero_shot/us_eval.csv +1 -0
  10. results/dialogue/zero_shot/dialogsum.csv +1 -0
  11. results/dialogue/zero_shot/dream.csv +1 -0
  12. results/dialogue/zero_shot/samsum.csv +1 -0
  13. results/emotion/zero_shot/ind_emotion.csv +1 -0
  14. results/emotion/zero_shot/sst2.csv +1 -0
  15. results/flores_translation/zero_shot/ind2eng.csv +1 -0
  16. results/flores_translation/zero_shot/vie2eng.csv +1 -0
  17. results/flores_translation/zero_shot/zho2eng.csv +1 -0
  18. results/flores_translation/zero_shot/zsm2eng.csv +1 -0
  19. results/fundamental_nlp_tasks/zero_shot/c3.csv +1 -0
  20. results/fundamental_nlp_tasks/zero_shot/cola.csv +1 -0
  21. results/fundamental_nlp_tasks/zero_shot/mnli.csv +1 -0
  22. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -0
  23. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +1 -0
  24. results/fundamental_nlp_tasks/zero_shot/qnli.csv +1 -0
  25. results/fundamental_nlp_tasks/zero_shot/qqp.csv +1 -0
  26. results/fundamental_nlp_tasks/zero_shot/rte.csv +1 -0
  27. results/fundamental_nlp_tasks/zero_shot/wnli.csv +1 -0
  28. results/general_reasoning/zero_shot/c_eval.csv +1 -0
  29. results/general_reasoning/zero_shot/cmmlu.csv +1 -0
  30. results/general_reasoning/zero_shot/indommlu.csv +1 -0
  31. results/general_reasoning/zero_shot/zbench.csv +1 -0
results/cross_lingual/zero_shot/cross_logiqa.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.62461756988007
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.45860389610389607,0.46087662337662333,0.4597374508818368,0.5568181818181818,0.48295454545454547,0.4659090909090909,0.4431818181818182,0.4431818181818182,0.4318181818181818,0.38636363636363635
21
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
22
  sg_llama3_8192_8b,0.49594155844155846,0.468831168831169,0.4820054587349027,0.5795454545454546,0.48295454545454547,0.5397727272727273,0.48863636363636365,0.5,0.48295454545454547,0.3977272727272727
 
23
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
24
  cross_openhermes_llama3_8b_2048_inst,0.4659090909090909,0.4407467532467531,0.452978757986988,0.5284090909090909,0.5284090909090909,0.4375,0.4659090909090909,0.4375,0.48863636363636365,0.375
25
  gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.45860389610389607,0.46087662337662333,0.4597374508818368,0.5568181818181818,0.48295454545454547,0.4659090909090909,0.4431818181818182,0.4431818181818182,0.4318181818181818,0.38636363636363635
21
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
22
  sg_llama3_8192_8b,0.49594155844155846,0.468831168831169,0.4820054587349027,0.5795454545454546,0.48295454545454547,0.5397727272727273,0.48863636363636365,0.5,0.48295454545454547,0.3977272727272727
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.6055194805194805,0.6220779220779219,0.6136870270197391,0.6590909090909091,0.6590909090909091,0.5625,0.6193181818181818,0.5681818181818182,0.5852272727272727,0.5852272727272727
24
  sg_llama3_70b_inst,0.6217532467532468,0.5629870129870129,0.590912649920049,0.7272727272727273,0.6590909090909091,0.6477272727272727,0.6079545454545454,0.6136363636363636,0.5795454545454546,0.5170454545454546
25
  cross_openhermes_llama3_8b_2048_inst,0.4659090909090909,0.4407467532467531,0.452978757986988,0.5284090909090909,0.5284090909090909,0.4375,0.4659090909090909,0.4375,0.48863636363636365,0.375
26
  gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
results/cross_lingual/zero_shot/cross_mmlu.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.744621866597198
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5447619047619048,0.5127619047619049,0.5282777548560428,0.6533333333333333,0.5333333333333333,0.5066666666666667,0.54,0.5466666666666666,0.5133333333333333,0.52
21
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
22
  sg_llama3_8192_8b,0.5876190476190476,0.5207619047619048,0.5521740766611207,0.7,0.58,0.6333333333333333,0.5466666666666666,0.5533333333333333,0.5666666666666667,0.5333333333333333
 
23
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
24
  cross_openhermes_llama3_8b_2048_inst,0.5504761904761906,0.528,0.53900388550283,0.6733333333333333,0.5466666666666666,0.52,0.5133333333333333,0.5666666666666667,0.5466666666666666,0.4866666666666667
25
  gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5447619047619048,0.5127619047619049,0.5282777548560428,0.6533333333333333,0.5333333333333333,0.5066666666666667,0.54,0.5466666666666666,0.5133333333333333,0.52
21
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
22
  sg_llama3_8192_8b,0.5876190476190476,0.5207619047619048,0.5521740766611207,0.7,0.58,0.6333333333333333,0.5466666666666666,0.5533333333333333,0.5666666666666667,0.5333333333333333
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.7180952380952382,0.7485714285714284,0.7330166975381478,0.78,0.7133333333333334,0.7133333333333334,0.6866666666666666,0.7266666666666667,0.7,0.7066666666666667
24
  sg_llama3_70b_inst,0.7342857142857142,0.7079999999999999,0.7209033280007295,0.82,0.6866666666666666,0.7333333333333333,0.6933333333333334,0.78,0.7266666666666667,0.7
25
  cross_openhermes_llama3_8b_2048_inst,0.5504761904761906,0.528,0.53900388550283,0.6733333333333333,0.5466666666666666,0.52,0.5133333333333333,0.5666666666666667,0.5466666666666666,0.4866666666666667
26
  gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.5333333333333333
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.49523809523809526
21
  Qwen2_5_14B_Instruct,0.8285714285714286
22
  sg_llama3_8192_8b,0.47619047619047616
 
23
  sg_llama3_70b_inst,0.5523809523809524
24
  cross_openhermes_llama3_8b_2048_inst,0.5142857142857142
25
  gemma-2-2b-it,0.3619047619047619
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.49523809523809526
21
  Qwen2_5_14B_Instruct,0.8285714285714286
22
  sg_llama3_8192_8b,0.47619047619047616
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.5904761904761905
24
  sg_llama3_70b_inst,0.5523809523809524
25
  cross_openhermes_llama3_8b_2048_inst,0.5142857142857142
26
  gemma-2-2b-it,0.3619047619047619
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.63
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.52
21
  Qwen2_5_14B_Instruct,0.6
22
  sg_llama3_8192_8b,0.57
 
23
  sg_llama3_70b_inst,0.69
24
  cross_openhermes_llama3_8b_2048_inst,0.5
25
  gemma-2-2b-it,0.4
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.52
21
  Qwen2_5_14B_Instruct,0.6
22
  sg_llama3_8192_8b,0.57
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.59
24
  sg_llama3_70b_inst,0.69
25
  cross_openhermes_llama3_8b_2048_inst,0.5
26
  gemma-2-2b-it,0.4
results/cultural_reasoning/zero_shot/sg_eval.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.7087378640776699
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6116504854368932
21
  Qwen2_5_14B_Instruct,0.7669902912621359
22
  sg_llama3_8192_8b,0.5533980582524272
 
23
  sg_llama3_70b_inst,0.6699029126213593
24
  cross_openhermes_llama3_8b_2048_inst,0.6116504854368932
25
  gemma-2-2b-it,0.5533980582524272
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6116504854368932
21
  Qwen2_5_14B_Instruct,0.7669902912621359
22
  sg_llama3_8192_8b,0.5533980582524272
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.6796116504854369
24
  sg_llama3_70b_inst,0.6699029126213593
25
  cross_openhermes_llama3_8b_2048_inst,0.6116504854368932
26
  gemma-2-2b-it,0.5533980582524272
results/cultural_reasoning/zero_shot/sg_eval_v1_cleaned.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.6617647058823529
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5735294117647058
21
  Qwen2_5_14B_Instruct,0.6911764705882353
22
  sg_llama3_8192_8b,0.47058823529411764
 
23
  sg_llama3_70b_inst,0.6176470588235294
24
  cross_openhermes_llama3_8b_2048_inst,0.6323529411764706
25
  gemma-2-2b-it,0.5147058823529411
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5735294117647058
21
  Qwen2_5_14B_Instruct,0.6911764705882353
22
  sg_llama3_8192_8b,0.47058823529411764
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.6029411764705882
24
  sg_llama3_70b_inst,0.6176470588235294
25
  cross_openhermes_llama3_8b_2048_inst,0.6323529411764706
26
  gemma-2-2b-it,0.5147058823529411
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.8381818181818181
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7927272727272727
21
  Qwen2_5_14B_Instruct,0.8345454545454546
22
  sg_llama3_8192_8b,0.76
 
23
  sg_llama3_70b_inst,0.8436363636363636
24
  cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
25
  gemma-2-2b-it,0.7163636363636363
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7927272727272727
21
  Qwen2_5_14B_Instruct,0.8345454545454546
22
  sg_llama3_8192_8b,0.76
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.8090909090909091
24
  sg_llama3_70b_inst,0.8436363636363636
25
  cross_openhermes_llama3_8b_2048_inst,0.7781818181818182
26
  gemma-2-2b-it,0.7163636363636363
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,50.599999999999994
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,49.32000000000001
21
  Qwen2_5_14B_Instruct,53.2
22
  sg_llama3_8192_8b,53.4
 
23
  sg_llama3_70b_inst,51.959999999999994
24
  cross_openhermes_llama3_8b_2048_inst,52.24
25
  gemma-2-2b-it,52.08
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,49.32000000000001
21
  Qwen2_5_14B_Instruct,53.2
22
  sg_llama3_8192_8b,53.4
23
+ gemma2-9b-cpt-sea-lionv3-instruct,55.0
24
  sg_llama3_70b_inst,51.959999999999994
25
  cross_openhermes_llama3_8b_2048_inst,52.24
26
  gemma-2-2b-it,52.08
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.8691588785046729
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6635514018691588
21
  Qwen2_5_14B_Instruct,0.822429906542056
22
  sg_llama3_8192_8b,0.7009345794392523
 
23
  sg_llama3_70b_inst,0.8598130841121495
24
  cross_openhermes_llama3_8b_2048_inst,0.7102803738317757
25
  gemma-2-2b-it,0.6915887850467289
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6635514018691588
21
  Qwen2_5_14B_Instruct,0.822429906542056
22
  sg_llama3_8192_8b,0.7009345794392523
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.8037383177570093
24
  sg_llama3_70b_inst,0.8598130841121495
25
  cross_openhermes_llama3_8b_2048_inst,0.7102803738317757
26
  gemma-2-2b-it,0.6915887850467289
results/dialogue/zero_shot/dialogsum.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.1275808733778686
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.23822266043844623,0.33277843672963314,0.1255742736802169,0.2563152709054887
21
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
22
  sg_llama3_8192_8b,0.2708022468830074,0.3774768588431775,0.1387436961438702,0.2961861856619747
 
23
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
24
  cross_openhermes_llama3_8b_2048_inst,0.26075548388204367,0.3613222112557008,0.13618000204638436,0.28476423834404585
25
  gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.23822266043844623,0.33277843672963314,0.1255742736802169,0.2563152709054887
21
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
22
  sg_llama3_8192_8b,0.2708022468830074,0.3774768588431775,0.1387436961438702,0.2961861856619747
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.23790909190042164,0.34294544984076464,0.1078722585042388,0.26290956735626153
24
  sg_llama3_70b_inst,0.26633840691332344,0.3692028513115729,0.1412505883866801,0.2885617810417173
25
  cross_openhermes_llama3_8b_2048_inst,0.26075548388204367,0.3613222112557008,0.13618000204638436,0.28476423834404585
26
  gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
results/dialogue/zero_shot/dream.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.9480646741793238
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.8731014208721215
21
  Qwen2_5_14B_Instruct,0.9461048505634493
22
  sg_llama3_8192_8b,0.9103380695737384
 
23
  sg_llama3_70b_inst,0.9524742773150416
24
  cross_openhermes_llama3_8b_2048_inst,0.8642822146006859
25
  gemma-2-2b-it,0.8510534051935326
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.8731014208721215
21
  Qwen2_5_14B_Instruct,0.9461048505634493
22
  sg_llama3_8192_8b,0.9103380695737384
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.9407153356197943
24
  sg_llama3_70b_inst,0.9524742773150416
25
  cross_openhermes_llama3_8b_2048_inst,0.8642822146006859
26
  gemma-2-2b-it,0.8510534051935326
results/dialogue/zero_shot/samsum.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.2813508829464006,0.38872030551635356,0.1571931255091481,0.29813921781370006
21
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
22
  sg_llama3_8192_8b,0.30740523414540055,0.4199805360695743,0.1701793607165699,0.33205580565005743
 
23
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
24
  cross_openhermes_llama3_8b_2048_inst,0.29349656315763506,0.4044332095677647,0.16224920209587818,0.3138072778092623
25
  gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.2813508829464006,0.38872030551635356,0.1571931255091481,0.29813921781370006
21
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
22
  sg_llama3_8192_8b,0.30740523414540055,0.4199805360695743,0.1701793607165699,0.33205580565005743
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.29509358497900623,0.4129497614150914,0.1502573048316353,0.32207368869029196
24
  sg_llama3_70b_inst,0.3146051103643872,0.4271361513564755,0.18238925099430264,0.33428992874238356
25
  cross_openhermes_llama3_8b_2048_inst,0.29349656315763506,0.4044332095677647,0.16224920209587818,0.3138072778092623
26
  gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.6909090909090909
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6659090909090909
21
  Qwen2_5_14B_Instruct,0.6954545454545454
22
  sg_llama3_8192_8b,0.6045454545454545
 
23
  sg_llama3_70b_inst,0.7
24
  cross_openhermes_llama3_8b_2048_inst,0.6636363636363637
25
  gemma-2-2b-it,0.6636363636363637
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6659090909090909
21
  Qwen2_5_14B_Instruct,0.6954545454545454
22
  sg_llama3_8192_8b,0.6045454545454545
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.7340909090909091
24
  sg_llama3_70b_inst,0.7
25
  cross_openhermes_llama3_8b_2048_inst,0.6636363636363637
26
  gemma-2-2b-it,0.6636363636363637
results/emotion/zero_shot/sst2.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.9495412844036697
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.911697247706422
21
  Qwen2_5_14B_Instruct,0.9311926605504587
22
  sg_llama3_8192_8b,0.9208715596330275
 
23
  sg_llama3_70b_inst,0.9334862385321101
24
  cross_openhermes_llama3_8b_2048_inst,0.9243119266055045
25
  gemma-2-2b-it,0.9243119266055045
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.911697247706422
21
  Qwen2_5_14B_Instruct,0.9311926605504587
22
  sg_llama3_8192_8b,0.9208715596330275
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.9311926605504587
24
  sg_llama3_70b_inst,0.9334862385321101
25
  cross_openhermes_llama3_8b_2048_inst,0.9243119266055045
26
  gemma-2-2b-it,0.9243119266055045
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.3830092775167675
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.3778716126842916
21
  Qwen2_5_14B_Instruct,0.3901044620348051
22
  sg_llama3_8192_8b,0.3758986882788705
 
23
  sg_llama3_70b_inst,0.4086440304524362
24
  cross_openhermes_llama3_8b_2048_inst,0.3904916300086918
25
  gemma-2-2b-it,0.3482500758113138
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.3778716126842916
21
  Qwen2_5_14B_Instruct,0.3901044620348051
22
  sg_llama3_8192_8b,0.3758986882788705
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.3922444585880475
24
  sg_llama3_70b_inst,0.4086440304524362
25
  cross_openhermes_llama3_8b_2048_inst,0.3904916300086918
26
  gemma-2-2b-it,0.3482500758113138
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.3230140263371192
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.2881995374548925
21
  Qwen2_5_14B_Instruct,0.32198218156960645
22
  sg_llama3_8192_8b,0.3087032778607667
 
23
  sg_llama3_70b_inst,0.34258533717783785
24
  cross_openhermes_llama3_8b_2048_inst,0.2973194210388712
25
  gemma-2-2b-it,0.27518909199172303
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.2881995374548925
21
  Qwen2_5_14B_Instruct,0.32198218156960645
22
  sg_llama3_8192_8b,0.3087032778607667
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.3176282835521885
24
  sg_llama3_70b_inst,0.34258533717783785
25
  cross_openhermes_llama3_8b_2048_inst,0.2973194210388712
26
  gemma-2-2b-it,0.27518909199172303
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.24397819518058994
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.23373987803056884
21
  Qwen2_5_14B_Instruct,0.2627781200417998
22
  sg_llama3_8192_8b,0.23778397807613597
 
23
  sg_llama3_70b_inst,0.26000707510414633
24
  cross_openhermes_llama3_8b_2048_inst,0.23916426190948417
25
  gemma-2-2b-it,0.21164036008441425
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.23373987803056884
21
  Qwen2_5_14B_Instruct,0.2627781200417998
22
  sg_llama3_8192_8b,0.23778397807613597
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.25110750921976727
24
  sg_llama3_70b_inst,0.26000707510414633
25
  cross_openhermes_llama3_8b_2048_inst,0.23916426190948417
26
  gemma-2-2b-it,0.21164036008441425
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.3957287030176054
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.37943052200875504
21
  Qwen2_5_14B_Instruct,0.3841042767934729
22
  sg_llama3_8192_8b,0.376818225699898
 
23
  sg_llama3_70b_inst,0.4163761508073963
24
  cross_openhermes_llama3_8b_2048_inst,0.3904643635616676
25
  gemma-2-2b-it,0.33737270487369614
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.37943052200875504
21
  Qwen2_5_14B_Instruct,0.3841042767934729
22
  sg_llama3_8192_8b,0.376818225699898
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.4059485540822735
24
  sg_llama3_70b_inst,0.4163761508073963
25
  cross_openhermes_llama3_8b_2048_inst,0.3904643635616676
26
  gemma-2-2b-it,0.33737270487369614
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -21,6 +21,7 @@ llama-own-4096-2-sg-ultrachat-sft-eos-real,0.8358264771877337
21
  Qwen2_5_14B_Instruct,0.9502617801047121
22
  sg_llama3_8192_8b,0.8051608077786089
23
  Meta-Llama-3.1-70B,0.7786088257292446
 
24
  sg_llama3_70b_inst,0.9289454001495886
25
  cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
26
  gemma-2-2b-it,0.7700074794315632
 
21
  Qwen2_5_14B_Instruct,0.9502617801047121
22
  sg_llama3_8192_8b,0.8051608077786089
23
  Meta-Llama-3.1-70B,0.7786088257292446
24
+ gemma2-9b-cpt-sea-lionv3-instruct,0.9255796559461481
25
  sg_llama3_70b_inst,0.9289454001495886
26
  cross_openhermes_llama3_8b_2048_inst,0.8167539267015707
27
  gemma-2-2b-it,0.7700074794315632
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.835091083413231
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7468839884947267
21
  Qwen2_5_14B_Instruct,0.8063279002876318
22
  sg_llama3_8192_8b,0.8130393096836049
 
23
  sg_llama3_70b_inst,0.8696069031639502
24
  cross_openhermes_llama3_8b_2048_inst,0.7727708533077661
25
  gemma-2-2b-it,0.6749760306807286
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7468839884947267
21
  Qwen2_5_14B_Instruct,0.8063279002876318
22
  sg_llama3_8192_8b,0.8130393096836049
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.825503355704698
24
  sg_llama3_70b_inst,0.8696069031639502
25
  cross_openhermes_llama3_8b_2048_inst,0.7727708533077661
26
  gemma-2-2b-it,0.6749760306807286
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.6709421285692472
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.571
21
  Qwen2_5_14B_Instruct,0.818
22
  sg_llama3_8192_8b,0.6605
 
23
  sg_llama3_70b_inst,0.7685
24
  cross_openhermes_llama3_8b_2048_inst,0.57
25
  gemma-2-2b-it,0.6185
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.571
21
  Qwen2_5_14B_Instruct,0.818
22
  sg_llama3_8192_8b,0.6605
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.7445
24
  sg_llama3_70b_inst,0.7685
25
  cross_openhermes_llama3_8b_2048_inst,0.57
26
  gemma-2-2b-it,0.6185
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.7598039215686274
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7377450980392157
20
  Qwen2_5_14B_Instruct,0.7794117647058824
21
  sg_llama3_8192_8b,0.7254901960784313
 
22
  sg_llama3_70b_inst,0.7892156862745098
23
  cross_openhermes_llama3_8b_2048_inst,0.7058823529411765
24
  gemma-2-2b-it,0.7083333333333334
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7377450980392157
20
  Qwen2_5_14B_Instruct,0.7794117647058824
21
  sg_llama3_8192_8b,0.7254901960784313
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.7794117647058824
23
  sg_llama3_70b_inst,0.7892156862745098
24
  cross_openhermes_llama3_8b_2048_inst,0.7058823529411765
25
  gemma-2-2b-it,0.7083333333333334
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.5928813559322034
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5138983050847458
20
  Qwen2_5_14B_Instruct,0.7538983050847458
21
  sg_llama3_8192_8b,0.5084745762711864
 
22
  sg_llama3_70b_inst,0.6420338983050847
23
  cross_openhermes_llama3_8b_2048_inst,0.5322033898305085
24
  gemma-2-2b-it,0.43322033898305085
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5138983050847458
20
  Qwen2_5_14B_Instruct,0.7538983050847458
21
  sg_llama3_8192_8b,0.5084745762711864
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.6488135593220339
23
  sg_llama3_70b_inst,0.6420338983050847
24
  cross_openhermes_llama3_8b_2048_inst,0.5322033898305085
25
  gemma-2-2b-it,0.43322033898305085
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.876807614863628
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6829580816401245
20
  Qwen2_5_14B_Instruct,0.9079260479589969
21
  sg_llama3_8192_8b,0.8118250045762402
 
22
  sg_llama3_70b_inst,0.9004210140948197
23
  cross_openhermes_llama3_8b_2048_inst,0.7393373604246751
24
  gemma-2-2b-it,0.7792421746293245
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.6829580816401245
20
  Qwen2_5_14B_Instruct,0.9079260479589969
21
  sg_llama3_8192_8b,0.8118250045762402
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.9055464030752334
23
  sg_llama3_70b_inst,0.9004210140948197
24
  cross_openhermes_llama3_8b_2048_inst,0.7393373604246751
25
  gemma-2-2b-it,0.7792421746293245
results/fundamental_nlp_tasks/zero_shot/qqp.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.7876082117239673
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.767
20
  Qwen2_5_14B_Instruct,0.8255
21
  sg_llama3_8192_8b,0.8095
 
22
  sg_llama3_70b_inst,0.804
23
  cross_openhermes_llama3_8b_2048_inst,0.722
24
  gemma-2-2b-it,0.761
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.767
20
  Qwen2_5_14B_Instruct,0.8255
21
  sg_llama3_8192_8b,0.8095
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.803
23
  sg_llama3_70b_inst,0.804
24
  cross_openhermes_llama3_8b_2048_inst,0.722
25
  gemma-2-2b-it,0.761
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.8086642599277978
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7292418772563177
20
  Qwen2_5_14B_Instruct,0.8664259927797834
21
  sg_llama3_8192_8b,0.7364620938628159
 
22
  sg_llama3_70b_inst,0.8916967509025271
23
  cross_openhermes_llama3_8b_2048_inst,0.7075812274368231
24
  gemma-2-2b-it,0.7292418772563177
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.7292418772563177
20
  Qwen2_5_14B_Instruct,0.8664259927797834
21
  sg_llama3_8192_8b,0.7364620938628159
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.8483754512635379
23
  sg_llama3_70b_inst,0.8916967509025271
24
  cross_openhermes_llama3_8b_2048_inst,0.7075812274368231
25
  gemma-2-2b-it,0.7292418772563177
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.7887323943661971
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5915492957746479
20
  Qwen2_5_14B_Instruct,0.8309859154929577
21
  sg_llama3_8192_8b,0.704225352112676
 
22
  sg_llama3_70b_inst,0.8309859154929577
23
  cross_openhermes_llama3_8b_2048_inst,0.49295774647887325
24
  gemma-2-2b-it,0.43661971830985913
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5915492957746479
20
  Qwen2_5_14B_Instruct,0.8309859154929577
21
  sg_llama3_8192_8b,0.704225352112676
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.7605633802816901
23
  sg_llama3_70b_inst,0.8309859154929577
24
  cross_openhermes_llama3_8b_2048_inst,0.49295774647887325
25
  gemma-2-2b-it,0.43661971830985913
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.6220423412204235
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.4800747198007472
21
  Qwen2_5_14B_Instruct,0.7839352428393525
22
  sg_llama3_8192_8b,0.4825653798256538
 
23
  sg_llama3_70b_inst,0.5722291407222914
24
  cross_openhermes_llama3_8b_2048_inst,0.4663760896637609
25
  gemma-2-2b-it,0.4352428393524284
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.4800747198007472
21
  Qwen2_5_14B_Instruct,0.7839352428393525
22
  sg_llama3_8192_8b,0.4825653798256538
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.5722291407222914
24
  sg_llama3_70b_inst,0.5722291407222914
25
  cross_openhermes_llama3_8b_2048_inst,0.4663760896637609
26
  gemma-2-2b-it,0.4352428393524284
results/general_reasoning/zero_shot/cmmlu.csv CHANGED
@@ -19,6 +19,7 @@ Meta-Llama-3-70B-Instruct,0.6494560524952513
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.4750474874805733
20
  Qwen2_5_14B_Instruct,0.7807805214988776
21
  sg_llama3_8192_8b,0.49050250388533934
 
22
  sg_llama3_70b_inst,0.6044724572612675
23
  cross_openhermes_llama3_8b_2048_inst,0.47064410291832154
24
  gemma-2-2b-it,0.4412882058366431
 
19
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.4750474874805733
20
  Qwen2_5_14B_Instruct,0.7807805214988776
21
  sg_llama3_8192_8b,0.49050250388533934
22
+ gemma2-9b-cpt-sea-lionv3-instruct,0.5796062856156105
23
  sg_llama3_70b_inst,0.6044724572612675
24
  cross_openhermes_llama3_8b_2048_inst,0.47064410291832154
25
  gemma-2-2b-it,0.4412882058366431
results/general_reasoning/zero_shot/indommlu.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.6323519594098405
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5312103611723079
21
  Qwen2_5_14B_Instruct,0.6009746979104079
22
  sg_llama3_8192_8b,0.5021697042526203
 
23
  sg_llama3_70b_inst,0.6394285332799252
24
  cross_openhermes_llama3_8b_2048_inst,0.528873756592563
25
  gemma-2-2b-it,0.48220842512851325
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.5312103611723079
21
  Qwen2_5_14B_Instruct,0.6009746979104079
22
  sg_llama3_8192_8b,0.5021697042526203
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.6196007744175178
24
  sg_llama3_70b_inst,0.6394285332799252
25
  cross_openhermes_llama3_8b_2048_inst,0.528873756592563
26
  gemma-2-2b-it,0.48220842512851325
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -20,6 +20,7 @@ Meta-Llama-3-70B-Instruct,0.5151515151515151
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.36363636363636365
21
  Qwen2_5_14B_Instruct,0.6666666666666666
22
  sg_llama3_8192_8b,0.30303030303030304
 
23
  sg_llama3_70b_inst,0.42424242424242425
24
  cross_openhermes_llama3_8b_2048_inst,0.3333333333333333
25
  gemma-2-2b-it,0.24242424242424243
 
20
  llama-own-4096-2-sg-ultrachat-sft-eos-real,0.36363636363636365
21
  Qwen2_5_14B_Instruct,0.6666666666666666
22
  sg_llama3_8192_8b,0.30303030303030304
23
+ gemma2-9b-cpt-sea-lionv3-instruct,0.42424242424242425
24
  sg_llama3_70b_inst,0.42424242424242425
25
  cross_openhermes_llama3_8b_2048_inst,0.3333333333333333
26
  gemma-2-2b-it,0.24242424242424243