zhuohan-7 commited on
Commit
90da191
·
verified ·
1 Parent(s): c7ff547

Upload folder using huggingface_hub

Browse files
Files changed (42) hide show
  1. results/cultural_reasoning/few_shot/cn_eval.csv +0 -4
  2. results/cultural_reasoning/few_shot/ph_eval.csv +0 -4
  3. results/cultural_reasoning/few_shot/sg_eval_v2_open.csv +1 -0
  4. results/cultural_reasoning/few_shot/us_eval.csv +0 -4
  5. results/cultural_reasoning/zero_shot/cn_eval.csv +6 -9
  6. results/cultural_reasoning/zero_shot/ph_eval.csv +5 -8
  7. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +3 -0
  8. results/cultural_reasoning/zero_shot/us_eval.csv +5 -8
  9. results/dialogue/few_shot/dream.csv +0 -4
  10. results/dialogue/zero_shot/dream.csv +3 -8
  11. results/emotion/few_shot/ind_emotion.csv +0 -4
  12. results/emotion/few_shot/sst2.csv +0 -4
  13. results/emotion/zero_shot/ind_emotion.csv +3 -8
  14. results/emotion/zero_shot/sst2.csv +3 -8
  15. results/fundamental_nlp_tasks/few_shot/c3.csv +0 -4
  16. results/fundamental_nlp_tasks/few_shot/cola.csv +0 -4
  17. results/fundamental_nlp_tasks/few_shot/mnli.csv +0 -4
  18. results/fundamental_nlp_tasks/few_shot/mrpc.csv +0 -4
  19. results/fundamental_nlp_tasks/few_shot/ocnli.csv +0 -4
  20. results/fundamental_nlp_tasks/few_shot/qnli.csv +0 -4
  21. results/fundamental_nlp_tasks/few_shot/qqp.csv +0 -4
  22. results/fundamental_nlp_tasks/few_shot/rte.csv +0 -4
  23. results/fundamental_nlp_tasks/few_shot/wnli.csv +0 -4
  24. results/fundamental_nlp_tasks/zero_shot/c3.csv +3 -8
  25. results/fundamental_nlp_tasks/zero_shot/cola.csv +3 -8
  26. results/fundamental_nlp_tasks/zero_shot/mnli.csv +3 -8
  27. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -7
  28. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +3 -8
  29. results/fundamental_nlp_tasks/zero_shot/qnli.csv +2 -8
  30. results/fundamental_nlp_tasks/zero_shot/qqp.csv +3 -8
  31. results/fundamental_nlp_tasks/zero_shot/rte.csv +2 -8
  32. results/fundamental_nlp_tasks/zero_shot/wnli.csv +2 -8
  33. results/general_reasoning/few_shot/c_eval.csv +0 -4
  34. results/general_reasoning/few_shot/cmmlu.csv +0 -4
  35. results/general_reasoning/few_shot/indommlu.csv +0 -4
  36. results/general_reasoning/few_shot/mmlu.csv +0 -4
  37. results/general_reasoning/few_shot/zbench.csv +0 -4
  38. results/general_reasoning/zero_shot/c_eval.csv +6 -9
  39. results/general_reasoning/zero_shot/cmmlu.csv +6 -9
  40. results/general_reasoning/zero_shot/indommlu.csv +3 -8
  41. results/general_reasoning/zero_shot/mmlu.csv +6 -9
  42. results/general_reasoning/zero_shot/zbench.csv +4 -8
results/cultural_reasoning/few_shot/cn_eval.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.6
3
- Meta-Llama-3-8B,0.41904761904761906
4
- llama3-8b-cpt-sea-lionv2-base,0.4095238095238095
5
- Meta-Llama-3.1-8B,0.4857142857142857
 
1
  Model,Accuracy
 
 
 
 
results/cultural_reasoning/few_shot/ph_eval.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.68
3
- Meta-Llama-3-8B,0.54
4
- llama3-8b-cpt-sea-lionv2-base,0.52
5
- Meta-Llama-3.1-8B,0.51
 
1
  Model,Accuracy
 
 
 
 
results/cultural_reasoning/few_shot/sg_eval_v2_open.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ Model,Accuracy
results/cultural_reasoning/few_shot/us_eval.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.8785046728971962
3
- Meta-Llama-3-8B,0.6915887850467289
4
- llama3-8b-cpt-sea-lionv2-base,0.719626168224299
5
- Meta-Llama-3.1-8B,0.6728971962616822
 
1
  Model,Accuracy
 
 
 
 
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -1,10 +1,7 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.8095238095238095
3
- Meta-Llama-3.1-8B-Instruct,0.42857142857142855
4
- Qwen2-72B-Instruct,0.8571428571428571
5
- Meta-Llama-3-8B-Instruct,0.37142857142857144
6
- SeaLLMs-v3-7B-Chat,0.8095238095238095
7
- gemma-2-9b-it,0.6190476190476191
8
- Meta-Llama-3-70B-Instruct,0.5142857142857142
9
- gemma-2-2b-it,0.4095238095238095
10
- llama3-8b-cpt-sea-lionv2-instruct,0.47619047619047616
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.4857142857142857
3
+ Meta-Llama-3.1-70B-Instruct,0.5428571428571428
4
+ gemma-2-9b-it,0.580952380952381
5
+ Meta-Llama-3-70B-Instruct,0.5333333333333333
6
+ sg_llama3_70b_inst,0.5523809523809524
7
+ GPT4o_0513,0.8095238095238095
 
 
 
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -1,10 +1,7 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.51
3
- Meta-Llama-3.1-8B-Instruct,0.56
4
- Qwen2-72B-Instruct,0.63
5
- Meta-Llama-3-8B-Instruct,0.54
6
- SeaLLMs-v3-7B-Chat,0.5
7
- gemma-2-9b-it,0.61
8
  Meta-Llama-3-70B-Instruct,0.63
9
- gemma-2-2b-it,0.39
10
- llama3-8b-cpt-sea-lionv2-instruct,0.53
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.6
3
+ Meta-Llama-3.1-70B-Instruct,0.68
4
+ gemma-2-9b-it,0.58
 
 
 
5
  Meta-Llama-3-70B-Instruct,0.63
6
+ sg_llama3_70b_inst,0.69
7
+ GPT4o_0513,0.77
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Model,Accuracy
2
+ Meta-Llama-3-70B-Instruct,50.599999999999994
3
+ sg_llama3_70b_inst,51.959999999999994
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -1,10 +1,7 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.719626168224299
3
- Meta-Llama-3.1-8B-Instruct,0.6448598130841121
4
- Qwen2-72B-Instruct,0.8504672897196262
5
- Meta-Llama-3-8B-Instruct,0.6448598130841121
6
- SeaLLMs-v3-7B-Chat,0.7009345794392523
7
- gemma-2-9b-it,0.8317757009345794
8
  Meta-Llama-3-70B-Instruct,0.8691588785046729
9
- gemma-2-2b-it,0.7102803738317757
10
- llama3-8b-cpt-sea-lionv2-instruct,0.6542056074766355
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.7289719626168224
3
+ Meta-Llama-3.1-70B-Instruct,0.8411214953271028
4
+ gemma-2-9b-it,0.8130841121495327
 
 
 
5
  Meta-Llama-3-70B-Instruct,0.8691588785046729
6
+ sg_llama3_70b_inst,0.8598130841121495
7
+ GPT4o_0513,0.8691588785046729
results/dialogue/few_shot/dream.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.9510044096031357
3
- Meta-Llama-3-8B,0.8250857422831945
4
- llama3-8b-cpt-sea-lionv2-base,0.8515433610975012
5
- Meta-Llama-3.1-8B,0.8530132288094071
 
1
  Model,Accuracy
 
 
 
 
results/dialogue/zero_shot/dream.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.9338559529642332
3
- Meta-Llama-3.1-8B-Instruct,0.8858402743753062
4
- Qwen2-72B-Instruct,0.9603135717785399
5
- Meta-Llama-3-8B-Instruct,0.5433610975012249
6
- SeaLLMs-v3-7B-Chat,0.9211170994610485
7
- gemma-2-9b-it,0.9397354238118569
8
  Meta-Llama-3-70B-Instruct,0.9480646741793238
9
- gemma-2-2b-it,0.8486036256736894
10
- llama3-8b-cpt-sea-lionv2-instruct,0.7555120039196472
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.9039686428221461
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.9480646741793238
4
+ sg_llama3_70b_inst,0.9524742773150416
5
+ GPT4o_0513,0.9583537481626654
results/emotion/few_shot/ind_emotion.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.7159090909090909
3
- Meta-Llama-3-8B,0.4636363636363636
4
- llama3-8b-cpt-sea-lionv2-base,0.525
5
- Meta-Llama-3.1-8B,0.5136363636363637
 
1
  Model,Accuracy
 
 
 
 
results/emotion/few_shot/sst2.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.9002293577981652
3
- Meta-Llama-3-8B,0.6697247706422018
4
- llama3-8b-cpt-sea-lionv2-base,0.75
5
- Meta-Llama-3.1-8B,0.8405963302752294
 
1
  Model,Accuracy
 
 
 
 
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.6386363636363637
3
- Meta-Llama-3.1-8B-Instruct,0.6295454545454545
4
- Qwen2-72B-Instruct,0.675
5
- Meta-Llama-3-8B-Instruct,0.6522727272727272
6
- SeaLLMs-v3-7B-Chat,0.34545454545454546
7
- gemma-2-9b-it,0.7431818181818182
8
  Meta-Llama-3-70B-Instruct,0.6909090909090909
9
- gemma-2-2b-it,0.625
10
- llama3-8b-cpt-sea-lionv2-instruct,0.6272727272727273
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.6545454545454545
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.6909090909090909
4
+ sg_llama3_70b_inst,0.7
5
+ GPT4o_0513,0.7068181818181818
results/emotion/zero_shot/sst2.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.9231651376146789
3
- Meta-Llama-3.1-8B-Instruct,0.8784403669724771
4
- Qwen2-72B-Instruct,0.9369266055045872
5
- Meta-Llama-3-8B-Instruct,0.8669724770642202
6
- SeaLLMs-v3-7B-Chat,0.9346330275229358
7
- gemma-2-9b-it,0.9311926605504587
8
  Meta-Llama-3-70B-Instruct,0.9495412844036697
9
- gemma-2-2b-it,0.9208715596330275
10
- llama3-8b-cpt-sea-lionv2-instruct,0.9162844036697247
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.8646788990825688
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.9495412844036697
4
+ sg_llama3_70b_inst,0.9334862385321101
5
+ GPT4o_0513,0.9415137614678899
results/fundamental_nlp_tasks/few_shot/c3.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.9390426327599103
3
- Meta-Llama-3-8B,0.7703814510097232
4
- llama3-8b-cpt-sea-lionv2-base,0.7913238593866866
5
- Meta-Llama-3.1-8B,0.8208676140613314
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/cola.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.7171620325982742
3
- Meta-Llama-3-8B,0.6596356663470757
4
- llama3-8b-cpt-sea-lionv2-base,0.6021093000958773
5
- Meta-Llama-3.1-8B,0.6222435282837967
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/mnli.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.7505
3
- Meta-Llama-3-8B,0.46174988547869905
4
- llama3-8b-cpt-sea-lionv2-base,0.472
5
- Meta-Llama-3.1-8B,0.48506133251895966
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/mrpc.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.6764705882352942
3
- Meta-Llama-3-8B,0.5906862745098039
4
- llama3-8b-cpt-sea-lionv2-base,0.6078431372549019
5
- Meta-Llama-3.1-8B,0.5661764705882353
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/ocnli.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.6840677966101695
3
- Meta-Llama-3-8B,0.3935593220338983
4
- llama3-8b-cpt-sea-lionv2-base,0.3840677966101695
5
- Meta-Llama-3.1-8B,0.411864406779661
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/qnli.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.572
3
- Meta-Llama-3-8B,0.5059491122094087
4
- llama3-8b-cpt-sea-lionv2-base,0.49716273110012815
5
- Meta-Llama-3.1-8B,0.5081457074867289
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/qqp.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.7215
3
- Meta-Llama-3-8B,0.551
4
- llama3-8b-cpt-sea-lionv2-base,0.519
5
- Meta-Llama-3.1-8B,0.5565
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/rte.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.776173285198556
3
- Meta-Llama-3-8B,0.5487364620938628
4
- llama3-8b-cpt-sea-lionv2-base,0.6462093862815884
5
- Meta-Llama-3.1-8B,0.6137184115523465
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/few_shot/wnli.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.8169014084507042
3
- Meta-Llama-3-8B,0.4647887323943662
4
- llama3-8b-cpt-sea-lionv2-base,0.5915492957746479
5
- Meta-Llama-3.1-8B,0.5211267605633803
 
1
  Model,Accuracy
 
 
 
 
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.9233358264771877
3
- Meta-Llama-3.1-8B-Instruct,0.7984293193717278
4
- Qwen2-72B-Instruct,0.9599850411368736
5
- Meta-Llama-3-8B-Instruct,0.8515332834704562
6
- SeaLLMs-v3-7B-Chat,0.912490650710546
7
- gemma-2-9b-it,0.9210919970082274
8
  Meta-Llama-3-70B-Instruct,0.9521316379955124
9
- gemma-2-2b-it,0.7703814510097232
10
- llama3-8b-cpt-sea-lionv2-instruct,0.675392670157068
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.8672400897531788
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.9521316379955124
4
+ sg_llama3_70b_inst,0.9289454001495886
5
+ GPT4o_0513,0.9648466716529543
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.7861936720997124
3
- Meta-Llama-3.1-8B-Instruct,0.7046979865771812
4
- Qwen2-72B-Instruct,0.8360498561840843
5
- Meta-Llama-3-8B-Instruct,0.6481303930968361
6
- SeaLLMs-v3-7B-Chat,0.7890699904122723
7
- gemma-2-9b-it,0.7967401725790988
8
  Meta-Llama-3-70B-Instruct,0.835091083413231
9
- gemma-2-2b-it,0.6711409395973155
10
- llama3-8b-cpt-sea-lionv2-instruct,0.5915627996164909
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.6673058485139022
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.835091083413231
4
+ sg_llama3_70b_inst,0.8696069031639502
5
+ GPT4o_0513,0.8398849472674976
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.7341578867002596
3
- Meta-Llama-3.1-8B-Instruct,0.4603756298671553
4
- Qwen2-72B-Instruct,0.7979844251030692
5
- Meta-Llama-3-8B-Instruct,0.5296991907161399
6
- SeaLLMs-v3-7B-Chat,0.638
7
- gemma-2-9b-it,0.707
8
  Meta-Llama-3-70B-Instruct,0.6709421285692472
9
- gemma-2-2b-it,0.612
10
- llama3-8b-cpt-sea-lionv2-instruct,0.5276123581208327
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.4825
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.6709421285692472
4
+ sg_llama3_70b_inst,0.7685
5
+ GPT4o_0513,0.8335
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -1,10 +1,4 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.7745098039215687
3
  Meta-Llama-3.1-8B-Instruct,0.6740196078431373
4
- Qwen2-72B-Instruct,0.7941176470588235
5
- Meta-Llama-3-8B-Instruct,0.6764705882352942
6
- SeaLLMs-v3-7B-Chat,0.7475490196078431
7
- gemma-2-9b-it,0.7450980392156863
8
  Meta-Llama-3-70B-Instruct,0.7598039215686274
9
- gemma-2-2b-it,0.7132352941176471
10
- llama3-8b-cpt-sea-lionv2-instruct,0.49264705882352944
 
1
  Model,Accuracy
 
2
  Meta-Llama-3.1-8B-Instruct,0.6740196078431373
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.7598039215686274
4
+ sg_llama3_70b_inst,0.7892156862745098
 
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.6474576271186441
3
- Meta-Llama-3.1-8B-Instruct,0.42135593220338985
4
- Qwen2-72B-Instruct,0.7874576271186441
5
- Meta-Llama-3-8B-Instruct,0.4322033898305085
6
- SeaLLMs-v3-7B-Chat,0.5613559322033899
7
- gemma-2-9b-it,0.6183050847457627
8
  Meta-Llama-3-70B-Instruct,0.5928813559322034
9
- gemma-2-2b-it,0.4335593220338983
10
- llama3-8b-cpt-sea-lionv2-instruct,0.4135593220338983
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.40983050847457625
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.5928813559322034
4
+ sg_llama3_70b_inst,0.6420338983050847
5
+ GPT4o_0513,0.7308474576271187
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -1,10 +1,4 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.8169503935566539
3
- Meta-Llama-3.1-8B-Instruct,0.6027823540179389
4
- Qwen2-72B-Instruct,0.8894380377082189
5
- Meta-Llama-3-8B-Instruct,0.5689181768259198
6
- SeaLLMs-v3-7B-Chat,0.7181036060772469
7
- gemma-2-9b-it,0.90481420464946
8
  Meta-Llama-3-70B-Instruct,0.876807614863628
9
- gemma-2-2b-it,0.779974373055098
10
- llama3-8b-cpt-sea-lionv2-instruct,0.5652571846970529
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.5777045579352005
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.876807614863628
4
+ sg_llama3_70b_inst,0.9004210140948197
 
results/fundamental_nlp_tasks/zero_shot/qqp.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.7771209497897601
3
- Meta-Llama-3.1-8B-Instruct,0.5058125154588177
4
- Qwen2-72B-Instruct,0.7992332426416028
5
- Meta-Llama-3-8B-Instruct,0.5512490724709375
6
- SeaLLMs-v3-7B-Chat,0.757
7
- gemma-2-9b-it,0.761
8
  Meta-Llama-3-70B-Instruct,0.7876082117239673
9
- gemma-2-2b-it,0.771
10
- llama3-8b-cpt-sea-lionv2-instruct,0.585
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.5645
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.7876082117239673
4
+ sg_llama3_70b_inst,0.804
5
+ GPT4o_0513,0.8085
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -1,10 +1,4 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.8411552346570397
3
- Meta-Llama-3.1-8B-Instruct,0.6895306859205776
4
- Qwen2-72B-Instruct,0.8592057761732852
5
- Meta-Llama-3-8B-Instruct,0.6028880866425993
6
- SeaLLMs-v3-7B-Chat,0.7870036101083032
7
- gemma-2-9b-it,0.7472924187725631
8
  Meta-Llama-3-70B-Instruct,0.8086642599277978
9
- gemma-2-2b-it,0.7003610108303249
10
- llama3-8b-cpt-sea-lionv2-instruct,0.6209386281588448
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.6750902527075813
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.8086642599277978
4
+ sg_llama3_70b_inst,0.8916967509025271
 
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -1,10 +1,4 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.647887323943662
3
- Meta-Llama-3.1-8B-Instruct,0.4507042253521127
4
- Qwen2-72B-Instruct,0.9014084507042254
5
- Meta-Llama-3-8B-Instruct,0.4507042253521127
6
- SeaLLMs-v3-7B-Chat,0.6619718309859155
7
- gemma-2-9b-it,0.7464788732394366
8
  Meta-Llama-3-70B-Instruct,0.7887323943661971
9
- gemma-2-2b-it,0.43661971830985913
10
- llama3-8b-cpt-sea-lionv2-instruct,0.4788732394366197
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.49295774647887325
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.7887323943661971
4
+ sg_llama3_70b_inst,0.8309859154929577
 
results/general_reasoning/few_shot/c_eval.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.6183063511830635
3
- Meta-Llama-3-8B,0.43773349937733497
4
- llama3-8b-cpt-sea-lionv2-base,0.42092154420921546
5
- Meta-Llama-3.1-8B,0.44458281444582815
 
1
  Model,Accuracy
 
 
 
 
results/general_reasoning/few_shot/cmmlu.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.652650664824728
3
- Meta-Llama-3-8B,0.4308409601105163
4
- llama3-8b-cpt-sea-lionv2-base,0.4389570022448627
5
- Meta-Llama-3.1-8B,0.4556207908824037
 
1
  Model,Accuracy
 
 
 
 
results/general_reasoning/few_shot/indommlu.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.6355564456906335
3
- Meta-Llama-3-8B,0.4500300420588824
4
- llama3-8b-cpt-sea-lionv2-base,0.5077775552440082
5
- Meta-Llama-3.1-8B,0.4644502303224514
 
1
  Model,Accuracy
 
 
 
 
results/general_reasoning/few_shot/mmlu.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.7509474436896675
3
- Meta-Llama-3-8B,0.5651054701465856
4
- llama3-8b-cpt-sea-lionv2-base,0.5598140865212728
5
- Meta-Llama-3.1-8B,0.5749731855559528
 
1
  Model,Accuracy
 
 
 
 
results/general_reasoning/few_shot/zbench.csv CHANGED
@@ -1,5 +1 @@
1
  Model,Accuracy
2
- Meta-Llama-3-70B,0.5151515151515151
3
- Meta-Llama-3-8B,0.2727272727272727
4
- llama3-8b-cpt-sea-lionv2-base,0.3333333333333333
5
- Meta-Llama-3.1-8B,0.3939393939393939
 
1
  Model,Accuracy
 
 
 
 
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -1,10 +1,7 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.7546699875466999
3
- Meta-Llama-3.1-8B-Instruct,0.3493150684931507
4
- Qwen2-72B-Instruct,0.823785803237858
5
- Meta-Llama-3-8B-Instruct,0.4533001245330012
6
- SeaLLMs-v3-7B-Chat,0.7440846824408468
7
- gemma-2-9b-it,0.547945205479452
8
- Meta-Llama-3-70B-Instruct,0.6046077210460772
9
- gemma-2-2b-it,0.4153175591531756
10
- llama3-8b-cpt-sea-lionv2-instruct,0.398505603985056
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.5149439601494396
3
+ Meta-Llama-3.1-70B-Instruct,0.6612702366127023
4
+ gemma-2-9b-it,0.5523038605230386
5
+ Meta-Llama-3-70B-Instruct,0.6220423412204235
6
+ sg_llama3_70b_inst,0.5722291407222914
7
+ GPT4o_0513,0.7073474470734745
 
 
 
results/general_reasoning/zero_shot/cmmlu.csv CHANGED
@@ -1,10 +1,7 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.7656708685891901
3
- Meta-Llama-3.1-8B-Instruct,0.38240372992574684
4
- Qwen2-72B-Instruct,0.8240372992574685
5
- Meta-Llama-3-8B-Instruct,0.4679675358314626
6
- SeaLLMs-v3-7B-Chat,0.7718010706268348
7
- gemma-2-9b-it,0.5721809704714211
8
- Meta-Llama-3-70B-Instruct,0.6195821101709549
9
- gemma-2-2b-it,0.4336902089449145
10
- llama3-8b-cpt-sea-lionv2-instruct,0.4105508547746503
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.5246934898981178
3
+ Meta-Llama-3.1-70B-Instruct,0.6814885166637886
4
+ gemma-2-9b-it,0.5700224486271801
5
+ Meta-Llama-3-70B-Instruct,0.6494560524952513
6
+ sg_llama3_70b_inst,0.6044724572612675
7
+ GPT4o_0513,0.7414954239336902
 
 
 
results/general_reasoning/zero_shot/indommlu.csv CHANGED
@@ -1,10 +1,5 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.53027571934041
3
- Meta-Llama-3.1-8B-Instruct,0.4701916015755391
4
- Qwen2-72B-Instruct,0.6356232058214835
5
- Meta-Llama-3-8B-Instruct,0.5115161225716003
6
- SeaLLMs-v3-7B-Chat,0.42826623940182923
7
- gemma-2-9b-it,0.5599839775685961
8
  Meta-Llama-3-70B-Instruct,0.6323519594098405
9
- gemma-2-2b-it,0.43447493157086586
10
- llama3-8b-cpt-sea-lionv2-instruct,0.4962280526069831
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.5252687095266707
 
 
 
 
 
3
  Meta-Llama-3-70B-Instruct,0.6323519594098405
4
+ sg_llama3_70b_inst,0.6394285332799252
5
+ GPT4o_0513,0.7584618465852193
results/general_reasoning/zero_shot/mmlu.csv CHANGED
@@ -1,10 +1,7 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.6654272434751519
3
- Meta-Llama-3.1-8B-Instruct,0.5518770110833036
4
- Qwen2-72B-Instruct,0.7935645334286736
5
- Meta-Llama-3-8B-Instruct,0.508044333214158
6
- SeaLLMs-v3-7B-Chat,0.6637826242402575
7
- gemma-2-9b-it,0.7107615302109402
8
- Meta-Llama-3-70B-Instruct,0.7607436539149088
9
- gemma-2-2b-it,0.5676081515909903
10
- llama3-8b-cpt-sea-lionv2-instruct,0.5619592420450482
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.6037182695745441
3
+ Meta-Llama-3.1-70B-Instruct,0.8058634250983197
4
+ gemma-2-9b-it,0.7100464783696818
5
+ Meta-Llama-3-70B-Instruct,0.7649624597783339
6
+ sg_llama3_70b_inst,0.7407937075437969
7
+ GPT4o_0513,0.8308187343582409
 
 
 
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -1,10 +1,6 @@
1
  Model,Accuracy
2
- Qwen2-7B-Instruct,0.696969696969697
3
- Meta-Llama-3.1-8B-Instruct,0.45454545454545453
4
- Qwen2-72B-Instruct,0.5757575757575758
5
- Meta-Llama-3-8B-Instruct,0.30303030303030304
6
- SeaLLMs-v3-7B-Chat,0.5151515151515151
7
  gemma-2-9b-it,0.48484848484848486
8
- Meta-Llama-3-70B-Instruct,0.45454545454545453
9
- gemma-2-2b-it,0.21212121212121213
10
- llama3-8b-cpt-sea-lionv2-instruct,0.09090909090909091
 
1
  Model,Accuracy
2
+ Meta-Llama-3.1-8B-Instruct,0.42424242424242425
3
+ Meta-Llama-3.1-70B-Instruct,0.48484848484848486
 
 
 
4
  gemma-2-9b-it,0.48484848484848486
5
+ Meta-Llama-3-70B-Instruct,0.5151515151515151
6
+ sg_llama3_70b_inst,0.42424242424242425