Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/cultural_reasoning/few_shot/cn_eval.csv +0 -4
- results/cultural_reasoning/few_shot/ph_eval.csv +0 -4
- results/cultural_reasoning/few_shot/sg_eval_v2_open.csv +1 -0
- results/cultural_reasoning/few_shot/us_eval.csv +0 -4
- results/cultural_reasoning/zero_shot/cn_eval.csv +6 -9
- results/cultural_reasoning/zero_shot/ph_eval.csv +5 -8
- results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +3 -0
- results/cultural_reasoning/zero_shot/us_eval.csv +5 -8
- results/dialogue/few_shot/dream.csv +0 -4
- results/dialogue/zero_shot/dream.csv +3 -8
- results/emotion/few_shot/ind_emotion.csv +0 -4
- results/emotion/few_shot/sst2.csv +0 -4
- results/emotion/zero_shot/ind_emotion.csv +3 -8
- results/emotion/zero_shot/sst2.csv +3 -8
- results/fundamental_nlp_tasks/few_shot/c3.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/cola.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/mnli.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/mrpc.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/ocnli.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/qnli.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/qqp.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/rte.csv +0 -4
- results/fundamental_nlp_tasks/few_shot/wnli.csv +0 -4
- results/fundamental_nlp_tasks/zero_shot/c3.csv +3 -8
- results/fundamental_nlp_tasks/zero_shot/cola.csv +3 -8
- results/fundamental_nlp_tasks/zero_shot/mnli.csv +3 -8
- results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -7
- results/fundamental_nlp_tasks/zero_shot/ocnli.csv +3 -8
- results/fundamental_nlp_tasks/zero_shot/qnli.csv +2 -8
- results/fundamental_nlp_tasks/zero_shot/qqp.csv +3 -8
- results/fundamental_nlp_tasks/zero_shot/rte.csv +2 -8
- results/fundamental_nlp_tasks/zero_shot/wnli.csv +2 -8
- results/general_reasoning/few_shot/c_eval.csv +0 -4
- results/general_reasoning/few_shot/cmmlu.csv +0 -4
- results/general_reasoning/few_shot/indommlu.csv +0 -4
- results/general_reasoning/few_shot/mmlu.csv +0 -4
- results/general_reasoning/few_shot/zbench.csv +0 -4
- results/general_reasoning/zero_shot/c_eval.csv +6 -9
- results/general_reasoning/zero_shot/cmmlu.csv +6 -9
- results/general_reasoning/zero_shot/indommlu.csv +3 -8
- results/general_reasoning/zero_shot/mmlu.csv +6 -9
- results/general_reasoning/zero_shot/zbench.csv +4 -8
results/cultural_reasoning/few_shot/cn_eval.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.6
|
3 |
-
Meta-Llama-3-8B,0.41904761904761906
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.4095238095238095
|
5 |
-
Meta-Llama-3.1-8B,0.4857142857142857
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/cultural_reasoning/few_shot/ph_eval.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.68
|
3 |
-
Meta-Llama-3-8B,0.54
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.52
|
5 |
-
Meta-Llama-3.1-8B,0.51
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/cultural_reasoning/few_shot/sg_eval_v2_open.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Model,Accuracy
|
results/cultural_reasoning/few_shot/us_eval.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.8785046728971962
|
3 |
-
Meta-Llama-3-8B,0.6915887850467289
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.719626168224299
|
5 |
-
Meta-Llama-3.1-8B,0.6728971962616822
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/cultural_reasoning/zero_shot/cn_eval.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
|
5 |
-
Meta-Llama-3-
|
6 |
-
|
7 |
-
|
8 |
-
Meta-Llama-3-70B-Instruct,0.5142857142857142
|
9 |
-
gemma-2-2b-it,0.4095238095238095
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.47619047619047616
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.4857142857142857
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.5428571428571428
|
4 |
+
gemma-2-9b-it,0.580952380952381
|
5 |
+
Meta-Llama-3-70B-Instruct,0.5333333333333333
|
6 |
+
sg_llama3_70b_inst,0.5523809523809524
|
7 |
+
GPT4o_0513,0.8095238095238095
|
|
|
|
|
|
results/cultural_reasoning/zero_shot/ph_eval.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
|
5 |
-
Meta-Llama-3-8B-Instruct,0.54
|
6 |
-
SeaLLMs-v3-7B-Chat,0.5
|
7 |
-
gemma-2-9b-it,0.61
|
8 |
Meta-Llama-3-70B-Instruct,0.63
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.6
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.68
|
4 |
+
gemma-2-9b-it,0.58
|
|
|
|
|
|
|
5 |
Meta-Llama-3-70B-Instruct,0.63
|
6 |
+
sg_llama3_70b_inst,0.69
|
7 |
+
GPT4o_0513,0.77
|
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Model,Accuracy
|
2 |
+
Meta-Llama-3-70B-Instruct,50.599999999999994
|
3 |
+
sg_llama3_70b_inst,51.959999999999994
|
results/cultural_reasoning/zero_shot/us_eval.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
|
5 |
-
Meta-Llama-3-8B-Instruct,0.6448598130841121
|
6 |
-
SeaLLMs-v3-7B-Chat,0.7009345794392523
|
7 |
-
gemma-2-9b-it,0.8317757009345794
|
8 |
Meta-Llama-3-70B-Instruct,0.8691588785046729
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.7289719626168224
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.8411214953271028
|
4 |
+
gemma-2-9b-it,0.8130841121495327
|
|
|
|
|
|
|
5 |
Meta-Llama-3-70B-Instruct,0.8691588785046729
|
6 |
+
sg_llama3_70b_inst,0.8598130841121495
|
7 |
+
GPT4o_0513,0.8691588785046729
|
results/dialogue/few_shot/dream.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.9510044096031357
|
3 |
-
Meta-Llama-3-8B,0.8250857422831945
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.8515433610975012
|
5 |
-
Meta-Llama-3.1-8B,0.8530132288094071
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/dialogue/zero_shot/dream.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.8858402743753062
|
4 |
-
Qwen2-72B-Instruct,0.9603135717785399
|
5 |
-
Meta-Llama-3-8B-Instruct,0.5433610975012249
|
6 |
-
SeaLLMs-v3-7B-Chat,0.9211170994610485
|
7 |
-
gemma-2-9b-it,0.9397354238118569
|
8 |
Meta-Llama-3-70B-Instruct,0.9480646741793238
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.9039686428221461
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.9480646741793238
|
4 |
+
sg_llama3_70b_inst,0.9524742773150416
|
5 |
+
GPT4o_0513,0.9583537481626654
|
results/emotion/few_shot/ind_emotion.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.7159090909090909
|
3 |
-
Meta-Llama-3-8B,0.4636363636363636
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.525
|
5 |
-
Meta-Llama-3.1-8B,0.5136363636363637
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/emotion/few_shot/sst2.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.9002293577981652
|
3 |
-
Meta-Llama-3-8B,0.6697247706422018
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.75
|
5 |
-
Meta-Llama-3.1-8B,0.8405963302752294
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/emotion/zero_shot/ind_emotion.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.6295454545454545
|
4 |
-
Qwen2-72B-Instruct,0.675
|
5 |
-
Meta-Llama-3-8B-Instruct,0.6522727272727272
|
6 |
-
SeaLLMs-v3-7B-Chat,0.34545454545454546
|
7 |
-
gemma-2-9b-it,0.7431818181818182
|
8 |
Meta-Llama-3-70B-Instruct,0.6909090909090909
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.6545454545454545
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.6909090909090909
|
4 |
+
sg_llama3_70b_inst,0.7
|
5 |
+
GPT4o_0513,0.7068181818181818
|
results/emotion/zero_shot/sst2.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.8784403669724771
|
4 |
-
Qwen2-72B-Instruct,0.9369266055045872
|
5 |
-
Meta-Llama-3-8B-Instruct,0.8669724770642202
|
6 |
-
SeaLLMs-v3-7B-Chat,0.9346330275229358
|
7 |
-
gemma-2-9b-it,0.9311926605504587
|
8 |
Meta-Llama-3-70B-Instruct,0.9495412844036697
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.8646788990825688
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.9495412844036697
|
4 |
+
sg_llama3_70b_inst,0.9334862385321101
|
5 |
+
GPT4o_0513,0.9415137614678899
|
results/fundamental_nlp_tasks/few_shot/c3.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.9390426327599103
|
3 |
-
Meta-Llama-3-8B,0.7703814510097232
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.7913238593866866
|
5 |
-
Meta-Llama-3.1-8B,0.8208676140613314
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/cola.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.7171620325982742
|
3 |
-
Meta-Llama-3-8B,0.6596356663470757
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.6021093000958773
|
5 |
-
Meta-Llama-3.1-8B,0.6222435282837967
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/mnli.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.7505
|
3 |
-
Meta-Llama-3-8B,0.46174988547869905
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.472
|
5 |
-
Meta-Llama-3.1-8B,0.48506133251895966
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/mrpc.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.6764705882352942
|
3 |
-
Meta-Llama-3-8B,0.5906862745098039
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.6078431372549019
|
5 |
-
Meta-Llama-3.1-8B,0.5661764705882353
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/ocnli.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.6840677966101695
|
3 |
-
Meta-Llama-3-8B,0.3935593220338983
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.3840677966101695
|
5 |
-
Meta-Llama-3.1-8B,0.411864406779661
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/qnli.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.572
|
3 |
-
Meta-Llama-3-8B,0.5059491122094087
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.49716273110012815
|
5 |
-
Meta-Llama-3.1-8B,0.5081457074867289
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/qqp.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.7215
|
3 |
-
Meta-Llama-3-8B,0.551
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.519
|
5 |
-
Meta-Llama-3.1-8B,0.5565
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/rte.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.776173285198556
|
3 |
-
Meta-Llama-3-8B,0.5487364620938628
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.6462093862815884
|
5 |
-
Meta-Llama-3.1-8B,0.6137184115523465
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/few_shot/wnli.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.8169014084507042
|
3 |
-
Meta-Llama-3-8B,0.4647887323943662
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.5915492957746479
|
5 |
-
Meta-Llama-3.1-8B,0.5211267605633803
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/fundamental_nlp_tasks/zero_shot/c3.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.7984293193717278
|
4 |
-
Qwen2-72B-Instruct,0.9599850411368736
|
5 |
-
Meta-Llama-3-8B-Instruct,0.8515332834704562
|
6 |
-
SeaLLMs-v3-7B-Chat,0.912490650710546
|
7 |
-
gemma-2-9b-it,0.9210919970082274
|
8 |
Meta-Llama-3-70B-Instruct,0.9521316379955124
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.8672400897531788
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.9521316379955124
|
4 |
+
sg_llama3_70b_inst,0.9289454001495886
|
5 |
+
GPT4o_0513,0.9648466716529543
|
results/fundamental_nlp_tasks/zero_shot/cola.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.7046979865771812
|
4 |
-
Qwen2-72B-Instruct,0.8360498561840843
|
5 |
-
Meta-Llama-3-8B-Instruct,0.6481303930968361
|
6 |
-
SeaLLMs-v3-7B-Chat,0.7890699904122723
|
7 |
-
gemma-2-9b-it,0.7967401725790988
|
8 |
Meta-Llama-3-70B-Instruct,0.835091083413231
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.6673058485139022
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.835091083413231
|
4 |
+
sg_llama3_70b_inst,0.8696069031639502
|
5 |
+
GPT4o_0513,0.8398849472674976
|
results/fundamental_nlp_tasks/zero_shot/mnli.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.4603756298671553
|
4 |
-
Qwen2-72B-Instruct,0.7979844251030692
|
5 |
-
Meta-Llama-3-8B-Instruct,0.5296991907161399
|
6 |
-
SeaLLMs-v3-7B-Chat,0.638
|
7 |
-
gemma-2-9b-it,0.707
|
8 |
Meta-Llama-3-70B-Instruct,0.6709421285692472
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.4825
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.6709421285692472
|
4 |
+
sg_llama3_70b_inst,0.7685
|
5 |
+
GPT4o_0513,0.8335
|
results/fundamental_nlp_tasks/zero_shot/mrpc.csv
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Qwen2-7B-Instruct,0.7745098039215687
|
3 |
Meta-Llama-3.1-8B-Instruct,0.6740196078431373
|
4 |
-
Qwen2-72B-Instruct,0.7941176470588235
|
5 |
-
Meta-Llama-3-8B-Instruct,0.6764705882352942
|
6 |
-
SeaLLMs-v3-7B-Chat,0.7475490196078431
|
7 |
-
gemma-2-9b-it,0.7450980392156863
|
8 |
Meta-Llama-3-70B-Instruct,0.7598039215686274
|
9 |
-
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.49264705882352944
|
|
|
1 |
Model,Accuracy
|
|
|
2 |
Meta-Llama-3.1-8B-Instruct,0.6740196078431373
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.7598039215686274
|
4 |
+
sg_llama3_70b_inst,0.7892156862745098
|
|
results/fundamental_nlp_tasks/zero_shot/ocnli.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.42135593220338985
|
4 |
-
Qwen2-72B-Instruct,0.7874576271186441
|
5 |
-
Meta-Llama-3-8B-Instruct,0.4322033898305085
|
6 |
-
SeaLLMs-v3-7B-Chat,0.5613559322033899
|
7 |
-
gemma-2-9b-it,0.6183050847457627
|
8 |
Meta-Llama-3-70B-Instruct,0.5928813559322034
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.40983050847457625
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.5928813559322034
|
4 |
+
sg_llama3_70b_inst,0.6420338983050847
|
5 |
+
GPT4o_0513,0.7308474576271187
|
results/fundamental_nlp_tasks/zero_shot/qnli.csv
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.6027823540179389
|
4 |
-
Qwen2-72B-Instruct,0.8894380377082189
|
5 |
-
Meta-Llama-3-8B-Instruct,0.5689181768259198
|
6 |
-
SeaLLMs-v3-7B-Chat,0.7181036060772469
|
7 |
-
gemma-2-9b-it,0.90481420464946
|
8 |
Meta-Llama-3-70B-Instruct,0.876807614863628
|
9 |
-
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.5652571846970529
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.5777045579352005
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.876807614863628
|
4 |
+
sg_llama3_70b_inst,0.9004210140948197
|
|
results/fundamental_nlp_tasks/zero_shot/qqp.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.5058125154588177
|
4 |
-
Qwen2-72B-Instruct,0.7992332426416028
|
5 |
-
Meta-Llama-3-8B-Instruct,0.5512490724709375
|
6 |
-
SeaLLMs-v3-7B-Chat,0.757
|
7 |
-
gemma-2-9b-it,0.761
|
8 |
Meta-Llama-3-70B-Instruct,0.7876082117239673
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.5645
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.7876082117239673
|
4 |
+
sg_llama3_70b_inst,0.804
|
5 |
+
GPT4o_0513,0.8085
|
results/fundamental_nlp_tasks/zero_shot/rte.csv
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.6895306859205776
|
4 |
-
Qwen2-72B-Instruct,0.8592057761732852
|
5 |
-
Meta-Llama-3-8B-Instruct,0.6028880866425993
|
6 |
-
SeaLLMs-v3-7B-Chat,0.7870036101083032
|
7 |
-
gemma-2-9b-it,0.7472924187725631
|
8 |
Meta-Llama-3-70B-Instruct,0.8086642599277978
|
9 |
-
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.6209386281588448
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.6750902527075813
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.8086642599277978
|
4 |
+
sg_llama3_70b_inst,0.8916967509025271
|
|
results/fundamental_nlp_tasks/zero_shot/wnli.csv
CHANGED
@@ -1,10 +1,4 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.4507042253521127
|
4 |
-
Qwen2-72B-Instruct,0.9014084507042254
|
5 |
-
Meta-Llama-3-8B-Instruct,0.4507042253521127
|
6 |
-
SeaLLMs-v3-7B-Chat,0.6619718309859155
|
7 |
-
gemma-2-9b-it,0.7464788732394366
|
8 |
Meta-Llama-3-70B-Instruct,0.7887323943661971
|
9 |
-
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.4788732394366197
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.49295774647887325
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.7887323943661971
|
4 |
+
sg_llama3_70b_inst,0.8309859154929577
|
|
results/general_reasoning/few_shot/c_eval.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.6183063511830635
|
3 |
-
Meta-Llama-3-8B,0.43773349937733497
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.42092154420921546
|
5 |
-
Meta-Llama-3.1-8B,0.44458281444582815
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/general_reasoning/few_shot/cmmlu.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.652650664824728
|
3 |
-
Meta-Llama-3-8B,0.4308409601105163
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.4389570022448627
|
5 |
-
Meta-Llama-3.1-8B,0.4556207908824037
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/general_reasoning/few_shot/indommlu.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.6355564456906335
|
3 |
-
Meta-Llama-3-8B,0.4500300420588824
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.5077775552440082
|
5 |
-
Meta-Llama-3.1-8B,0.4644502303224514
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/general_reasoning/few_shot/mmlu.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.7509474436896675
|
3 |
-
Meta-Llama-3-8B,0.5651054701465856
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.5598140865212728
|
5 |
-
Meta-Llama-3.1-8B,0.5749731855559528
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/general_reasoning/few_shot/zbench.csv
CHANGED
@@ -1,5 +1 @@
|
|
1 |
Model,Accuracy
|
2 |
-
Meta-Llama-3-70B,0.5151515151515151
|
3 |
-
Meta-Llama-3-8B,0.2727272727272727
|
4 |
-
llama3-8b-cpt-sea-lionv2-base,0.3333333333333333
|
5 |
-
Meta-Llama-3.1-8B,0.3939393939393939
|
|
|
1 |
Model,Accuracy
|
|
|
|
|
|
|
|
results/general_reasoning/zero_shot/c_eval.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
|
5 |
-
Meta-Llama-3-
|
6 |
-
|
7 |
-
|
8 |
-
Meta-Llama-3-70B-Instruct,0.6046077210460772
|
9 |
-
gemma-2-2b-it,0.4153175591531756
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.398505603985056
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.5149439601494396
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.6612702366127023
|
4 |
+
gemma-2-9b-it,0.5523038605230386
|
5 |
+
Meta-Llama-3-70B-Instruct,0.6220423412204235
|
6 |
+
sg_llama3_70b_inst,0.5722291407222914
|
7 |
+
GPT4o_0513,0.7073474470734745
|
|
|
|
|
|
results/general_reasoning/zero_shot/cmmlu.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
|
5 |
-
Meta-Llama-3-
|
6 |
-
|
7 |
-
|
8 |
-
Meta-Llama-3-70B-Instruct,0.6195821101709549
|
9 |
-
gemma-2-2b-it,0.4336902089449145
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.4105508547746503
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.5246934898981178
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.6814885166637886
|
4 |
+
gemma-2-9b-it,0.5700224486271801
|
5 |
+
Meta-Llama-3-70B-Instruct,0.6494560524952513
|
6 |
+
sg_llama3_70b_inst,0.6044724572612675
|
7 |
+
GPT4o_0513,0.7414954239336902
|
|
|
|
|
|
results/general_reasoning/zero_shot/indommlu.csv
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-8B-Instruct,0.4701916015755391
|
4 |
-
Qwen2-72B-Instruct,0.6356232058214835
|
5 |
-
Meta-Llama-3-8B-Instruct,0.5115161225716003
|
6 |
-
SeaLLMs-v3-7B-Chat,0.42826623940182923
|
7 |
-
gemma-2-9b-it,0.5599839775685961
|
8 |
Meta-Llama-3-70B-Instruct,0.6323519594098405
|
9 |
-
|
10 |
-
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.5252687095266707
|
|
|
|
|
|
|
|
|
|
|
3 |
Meta-Llama-3-70B-Instruct,0.6323519594098405
|
4 |
+
sg_llama3_70b_inst,0.6394285332799252
|
5 |
+
GPT4o_0513,0.7584618465852193
|
results/general_reasoning/zero_shot/mmlu.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
|
5 |
-
Meta-Llama-3-
|
6 |
-
|
7 |
-
|
8 |
-
Meta-Llama-3-70B-Instruct,0.7607436539149088
|
9 |
-
gemma-2-2b-it,0.5676081515909903
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.5619592420450482
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.6037182695745441
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.8058634250983197
|
4 |
+
gemma-2-9b-it,0.7100464783696818
|
5 |
+
Meta-Llama-3-70B-Instruct,0.7649624597783339
|
6 |
+
sg_llama3_70b_inst,0.7407937075437969
|
7 |
+
GPT4o_0513,0.8308187343582409
|
|
|
|
|
|
results/general_reasoning/zero_shot/zbench.csv
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
Model,Accuracy
|
2 |
-
|
3 |
-
Meta-Llama-3.1-
|
4 |
-
Qwen2-72B-Instruct,0.5757575757575758
|
5 |
-
Meta-Llama-3-8B-Instruct,0.30303030303030304
|
6 |
-
SeaLLMs-v3-7B-Chat,0.5151515151515151
|
7 |
gemma-2-9b-it,0.48484848484848486
|
8 |
-
Meta-Llama-3-70B-Instruct,0.
|
9 |
-
|
10 |
-
llama3-8b-cpt-sea-lionv2-instruct,0.09090909090909091
|
|
|
1 |
Model,Accuracy
|
2 |
+
Meta-Llama-3.1-8B-Instruct,0.42424242424242425
|
3 |
+
Meta-Llama-3.1-70B-Instruct,0.48484848484848486
|
|
|
|
|
|
|
4 |
gemma-2-9b-it,0.48484848484848486
|
5 |
+
Meta-Llama-3-70B-Instruct,0.5151515151515151
|
6 |
+
sg_llama3_70b_inst,0.42424242424242425
|
|