abrek commited on
Commit
ddd4644
·
verified ·
1 Parent(s): 6aca973

Upload 18 files

Browse files

Add new models to leaderboard for zero-shot experiments

results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "dtype": "auto",
4
+ "parallelize": "True",
5
+ "device_map": "balanced",
6
+ "model": "CerebrumTech/cere-llama-3-8b-tr",
7
+ "api": "hf",
8
+ "architecture": "LlamaForCausalLM",
9
+ "type": "instruction-tuned",
10
+ "num_parameters": "8b"
11
+ },
12
+ "results": [
13
+ {
14
+ "name": "xlsum_tr",
15
+ "task": "summarization",
16
+ "rouge1": 0.0148042679964553,
17
+ "rouge2": 0.006337012269480576,
18
+ "rougeL": 0.011798434065329946
19
+ },
20
+ {
21
+ "name": "wmt-tr-en-prompt",
22
+ "task": "machine_translation",
23
+ "wer": 0.9177312951556903,
24
+ "bleu": 0.0010336244771491927
25
+ },
26
+ {
27
+ "name": "wiki_lingua_tr",
28
+ "task": "summarization",
29
+ "rouge1": 0.07039446024412022,
30
+ "rouge2": 0.02125810875804613,
31
+ "rougeL": 0.05181069185122056
32
+ },
33
+ {
34
+ "name": "tr-wikihow-summ",
35
+ "task": "summarization",
36
+ "rouge1": 0.17657152314685107,
37
+ "rouge2": 0.05191014365298107,
38
+ "rougeL": 0.1243539526593285
39
+ },
40
+ {
41
+ "name": "mlsum_tr",
42
+ "task": "summarization",
43
+ "rouge1": 0.09056852256508315,
44
+ "rouge2": 0.05971047138214301,
45
+ "rougeL": 0.07758457056947823
46
+ },
47
+ {
48
+ "name": "gecturk_generation",
49
+ "task": "grammatical_error_correction",
50
+ "exact_match": 0.013385333911117531
51
+ },
52
+ {
53
+ "name": "xquad_tr",
54
+ "task": "extractive_question_answering",
55
+ "exact_match": 0.21176470588235294,
56
+ "f1": 0.4427003624698854
57
+ },
58
+ {
59
+ "name": "xcopa_tr",
60
+ "task": "multiple_choice",
61
+ "acc": 0.602,
62
+ "acc_norm": 0.602
63
+ },
64
+ {
65
+ "name": "turkish_plu",
66
+ "task": "multiple_choice",
67
+ "acc": 0.48672,
68
+ "acc_norm": 0.53664
69
+ },
70
+ {
71
+ "name": "turkish_plu_goal_inference",
72
+ "task": "multiple_choice",
73
+ "acc": 0.41816009557945044,
74
+ "acc_norm": 0.42771804062126645
75
+ },
76
+ {
77
+ "name": "turkish_plu_next_event_prediction",
78
+ "task": "multiple_choice",
79
+ "acc": 0.4687022900763359,
80
+ "acc_norm": 0.5572519083969466
81
+ },
82
+ {
83
+ "name": "turkish_plu_step_inference",
84
+ "task": "multiple_choice",
85
+ "acc": 0.35947712418300654,
86
+ "acc_norm": 0.5065359477124183
87
+ },
88
+ {
89
+ "name": "turkish_plu_step_ordering",
90
+ "task": "multiple_choice",
91
+ "acc": 0.6307541625857003,
92
+ "acc_norm": 0.6307541625857003
93
+ },
94
+ {
95
+ "name": "check_worthiness",
96
+ "task": "multiple_choice",
97
+ "acc": 0.3756855575868373,
98
+ "acc_norm": 0.37705667276051186
99
+ },
100
+ {
101
+ "name": "relevance_judgment",
102
+ "task": "multiple_choice",
103
+ "acc": 0.4725776965265082,
104
+ "acc_norm": 0.5425045703839122
105
+ },
106
+ {
107
+ "name": "tquad",
108
+ "task": "extractive_question_answering",
109
+ "exact_match": 0.492152466367713,
110
+ "f1": 0.7031663569609045
111
+ },
112
+ {
113
+ "name": "sts_tr",
114
+ "task": "text_classification",
115
+ "acc": 0.22117476432197244,
116
+ "acc_norm": 0.20087019579405366
117
+ },
118
+ {
119
+ "name": "offenseval_tr",
120
+ "task": "text_classification",
121
+ "acc": 0.3401360544217687,
122
+ "acc_norm": 0.6964285714285714
123
+ },
124
+ {
125
+ "name": "mnli_tr",
126
+ "task": "natural_language_inference",
127
+ "acc": 0.3208,
128
+ "acc_norm": 0.3151
129
+ },
130
+ {
131
+ "name": "snli_tr",
132
+ "task": "natural_language_inference",
133
+ "acc": 0.3238,
134
+ "acc_norm": 0.3203
135
+ },
136
+ {
137
+ "name": "xnli_tr",
138
+ "task": "natural_language_inference",
139
+ "acc": 0.3339321357285429,
140
+ "acc_norm": 0.32934131736526945
141
+ },
142
+ {
143
+ "name": "news_cat",
144
+ "task": "text_classification",
145
+ "acc": 0.684,
146
+ "acc_norm": 0.656
147
+ },
148
+ {
149
+ "name": "mkqa_tr",
150
+ "task": "extractive_question_answering",
151
+ "exact_match": 0.02219591595146493,
152
+ "f1": 0.08533792503078427
153
+ },
154
+ {
155
+ "name": "ironytr",
156
+ "task": "text_classification",
157
+ "acc": 0.5016666666666667,
158
+ "acc_norm": 0.5483333333333333
159
+ },
160
+ {
161
+ "name": "exams_tr",
162
+ "task": "multiple_choice",
163
+ "acc": 0.27989821882951654,
164
+ "acc_norm": 0.3231552162849873
165
+ },
166
+ {
167
+ "name": "belebele_tr",
168
+ "task": "multiple_choice",
169
+ "acc": 0.5144444444444445,
170
+ "acc_norm": 0.5144444444444445
171
+ }
172
+ ]
173
+ }
results/zero-shot/Llama-3.3-70B-Instruct.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "pretrained": "meta-llama/Llama-3.3-70B-Instruct",
4
+ "dtype": "bfloat16",
5
+ "parallelize": "True",
6
+ "device_map": "balanced",
7
+ "model": "meta-llama/Llama-3.3-70B-Instruct",
8
+ "api": "hf",
9
+ "max_length": "131072",
10
+ "type": "instruction-tuned",
11
+ "num_parameters": "70b",
12
+ "architecture": "LlamaForCausalLM"
13
+ },
14
+ "results": [
15
+ {
16
+ "name": "xquad_tr",
17
+ "task": "extractive_question_answering",
18
+ "exact_match": 0.1453781512605042,
19
+ "f1": 0.4189643669994899
20
+ },
21
+ {
22
+ "name": "xcopa_tr",
23
+ "task": "multiple_choice",
24
+ "acc": 0.65,
25
+ "acc_norm": 0.65
26
+ },
27
+ {
28
+ "name": "turkish_plu",
29
+ "task": "multiple_choice",
30
+ "acc": 0.5424,
31
+ "acc_norm": 0.58528
32
+ },
33
+ {
34
+ "name": "turkish_plu_goal_inference",
35
+ "task": "multiple_choice",
36
+ "acc": 0.4910394265232975,
37
+ "acc_norm": 0.5197132616487455
38
+ },
39
+ {
40
+ "name": "turkish_plu_next_event_prediction",
41
+ "task": "multiple_choice",
42
+ "acc": 0.5801526717557252,
43
+ "acc_norm": 0.6305343511450382
44
+ },
45
+ {
46
+ "name": "turkish_plu_step_inference",
47
+ "task": "multiple_choice",
48
+ "acc": 0.39052287581699346,
49
+ "acc_norm": 0.5163398692810458
50
+ },
51
+ {
52
+ "name": "turkish_plu_step_ordering",
53
+ "task": "multiple_choice",
54
+ "acc": 0.6513222331047992,
55
+ "acc_norm": 0.6513222331047992
56
+ },
57
+ {
58
+ "name": "check_worthiness",
59
+ "task": "multiple_choice",
60
+ "acc": 0.45521023765996343,
61
+ "acc_norm": 0.5018281535648994
62
+ },
63
+ {
64
+ "name": "relevance_judgment",
65
+ "task": "multiple_choice",
66
+ "acc": 0.42230347349177333,
67
+ "acc_norm": 0.42230347349177333
68
+ },
69
+ {
70
+ "name": "tquad",
71
+ "task": "extractive_question_answering",
72
+ "exact_match": 0.17376681614349773,
73
+ "f1": 0.5089287967649171
74
+ },
75
+ {
76
+ "name": "sts_tr",
77
+ "task": "text_classification",
78
+ "acc": 0.12907904278462654,
79
+ "acc_norm": 0.1406816533720087
80
+ },
81
+ {
82
+ "name": "offenseval_tr",
83
+ "task": "text_classification",
84
+ "acc": 0.8313492063492064,
85
+ "acc_norm": 0.8214285714285714
86
+ },
87
+ {
88
+ "name": "mnli_tr",
89
+ "task": "natural_language_inference",
90
+ "acc": 0.348,
91
+ "acc_norm": 0.3479
92
+ },
93
+ {
94
+ "name": "snli_tr",
95
+ "task": "natural_language_inference",
96
+ "acc": 0.3381,
97
+ "acc_norm": 0.337
98
+ },
99
+ {
100
+ "name": "xnli_tr",
101
+ "task": "natural_language_inference",
102
+ "acc": 0.46947791164658637,
103
+ "acc_norm": 0.46947791164658637
104
+ },
105
+ {
106
+ "name": "news_cat",
107
+ "task": "text_classification",
108
+ "acc": 0.78,
109
+ "acc_norm": 0.56
110
+ },
111
+ {
112
+ "name": "mkqa_tr",
113
+ "task": "extractive_question_answering",
114
+ "exact_match": 0.1633619414027819,
115
+ "f1": 0.24259971658697452
116
+ },
117
+ {
118
+ "name": "ironytr",
119
+ "task": "text_classification",
120
+ "acc": 0.5816666666666667,
121
+ "acc_norm": 0.6366666666666667
122
+ },
123
+ {
124
+ "name": "exams_tr",
125
+ "task": "multiple_choice",
126
+ "acc": 0.39185750636132316,
127
+ "acc_norm": 0.4071246819338422
128
+ },
129
+ {
130
+ "name": "belebele_tr",
131
+ "task": "multiple_choice",
132
+ "acc": 0.8677777777777778,
133
+ "acc_norm": 0.8677777777777778
134
+ },
135
+ {
136
+ "name": "wmt-tr-en-prompt",
137
+ "task": "machine_translation",
138
+ "wer": 1.0598921957580294,
139
+ "bleu": 0.1362810237287205
140
+ },
141
+ {
142
+ "name": "wiki_lingua_tr",
143
+ "task": "summarization",
144
+ "rouge1": 0.2814281701473272,
145
+ "rouge2": 0.11996704827558094,
146
+ "rougeL": 0.22703795465582283
147
+ },
148
+ {
149
+ "name": "xlsum_tr",
150
+ "task": "summarization",
151
+ "rouge1": 0.32304395528585916,
152
+ "rouge2": 0.16251841619434318,
153
+ "rougeL": 0.25187368390587817
154
+ },
155
+ {
156
+ "name": "tr-wikihow-summ",
157
+ "task": "summarization",
158
+ "rouge1": 0.23052711078813495,
159
+ "rouge2": 0.08492969364417007,
160
+ "rougeL": 0.16696416806934444
161
+ },
162
+ {
163
+ "name": "gecturk_generation",
164
+ "task": "grammatical_error_correction",
165
+ "exact_match": 0.0052482064615532766
166
+ }
167
+ ]
168
+ }
results/zero-shot/Ministral-8B-Instruct.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "mistralai/Ministral-8B-Instruct-2410",
4
+ "api": "hf",
5
+ "architecture": "MistralForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "32768",
8
+ "type": "instruction-tuned",
9
+ "num_parameters": "8b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xquad_tr",
14
+ "task": "extractive_question_answering",
15
+ "exact_match": 0.2285714285714286,
16
+ "f1": 0.4361183332526061
17
+ },
18
+ {
19
+ "name": "xcopa_tr",
20
+ "task": "multiple_choice",
21
+ "acc": 0.574,
22
+ "acc_norm": 0.574
23
+ },
24
+ {
25
+ "name": "turkish_plu",
26
+ "task": "multiple_choice",
27
+ "acc": 0.45344,
28
+ "acc_norm": 0.50752
29
+ },
30
+ {
31
+ "name": "turkish_plu_goal_inference",
32
+ "task": "multiple_choice",
33
+ "acc": 0.3751493428912784,
34
+ "acc_norm": 0.4050179211469534
35
+ },
36
+ {
37
+ "name": "turkish_plu_next_event_prediction",
38
+ "task": "multiple_choice",
39
+ "acc": 0.44122137404580153,
40
+ "acc_norm": 0.5267175572519084
41
+ },
42
+ {
43
+ "name": "turkish_plu_step_inference",
44
+ "task": "multiple_choice",
45
+ "acc": 0.31862745098039214,
46
+ "acc_norm": 0.4624183006535948
47
+ },
48
+ {
49
+ "name": "turkish_plu_step_ordering",
50
+ "task": "multiple_choice",
51
+ "acc": 0.6062683643486778,
52
+ "acc_norm": 0.6062683643486778
53
+ },
54
+ {
55
+ "name": "check_worthiness",
56
+ "task": "multiple_choice",
57
+ "acc": 0.3756855575868373,
58
+ "acc_norm": 0.39716636197440586
59
+ },
60
+ {
61
+ "name": "relevance_judgment",
62
+ "task": "multiple_choice",
63
+ "acc": 0.4218464351005484,
64
+ "acc_norm": 0.42321755027422303
65
+ },
66
+ {
67
+ "name": "tquad",
68
+ "task": "extractive_question_answering",
69
+ "exact_match": 0.17600896860986548,
70
+ "f1": 0.476865826654479
71
+ },
72
+ {
73
+ "name": "sts_tr",
74
+ "task": "text_classification",
75
+ "acc": 0.2139231327048586,
76
+ "acc_norm": 0.2189992748368383
77
+ },
78
+ {
79
+ "name": "offenseval_tr",
80
+ "task": "text_classification",
81
+ "acc": 0.20294784580498867,
82
+ "acc_norm": 0.2032312925170068
83
+ },
84
+ {
85
+ "name": "mnli_tr",
86
+ "task": "natural_language_inference",
87
+ "acc": 0.3215,
88
+ "acc_norm": 0.3297
89
+ },
90
+ {
91
+ "name": "snli_tr",
92
+ "task": "natural_language_inference",
93
+ "acc": 0.3233,
94
+ "acc_norm": 0.247
95
+ },
96
+ {
97
+ "name": "xnli_tr",
98
+ "task": "natural_language_inference",
99
+ "acc": 0.43815261044176707,
100
+ "acc_norm": 0.43815261044176707
101
+ },
102
+ {
103
+ "name": "news_cat",
104
+ "task": "text_classification",
105
+ "acc": 0.604,
106
+ "acc_norm": 0.54
107
+ },
108
+ {
109
+ "name": "mkqa_tr",
110
+ "task": "extractive_question_answering",
111
+ "exact_match": 0.021899970405445397,
112
+ "f1": 0.0845409956587106
113
+ },
114
+ {
115
+ "name": "ironytr",
116
+ "task": "text_classification",
117
+ "acc": 0.5,
118
+ "acc_norm": 0.5
119
+ },
120
+ {
121
+ "name": "exams_tr",
122
+ "task": "multiple_choice",
123
+ "acc": 0.31297709923664124,
124
+ "acc_norm": 0.3104325699745547
125
+ },
126
+ {
127
+ "name": "belebele_tr",
128
+ "task": "multiple_choice",
129
+ "acc": 0.6088888888888889,
130
+ "acc_norm": 0.6088888888888889
131
+ },
132
+ {
133
+ "name": "xlsum_tr",
134
+ "task": "summarization",
135
+ "rouge1": 0.2855519135003678,
136
+ "rouge2": 0.13803314536720374,
137
+ "rougeL": 0.23701549520837864
138
+ },
139
+ {
140
+ "name": "wmt-tr-en-prompt",
141
+ "task": "machine_translation",
142
+ "wer": 0.8606226070325712,
143
+ "bleu": 0.1123514947775922
144
+ },
145
+ {
146
+ "name": "wiki_lingua_tr",
147
+ "task": "summarization",
148
+ "rouge1": 0.2056155296354833,
149
+ "rouge2": 0.0689798652163523,
150
+ "rougeL": 0.15747045045694055
151
+ },
152
+ {
153
+ "name": "tr-wikihow-summ",
154
+ "task": "summarization",
155
+ "rouge1": 0.21626530316392872,
156
+ "rouge2": 0.06686924197911567,
157
+ "rougeL": 0.1590961585989622
158
+ },
159
+ {
160
+ "name": "mlsum_tr",
161
+ "task": "summarization",
162
+ "rouge1": 0.37278237032977407,
163
+ "rouge2": 0.2397900252623464,
164
+ "rougeL": 0.31375168224626315
165
+ },
166
+ {
167
+ "name": "gecturk_generation",
168
+ "task": "grammatical_error_correction",
169
+ "exact_match": 0.009003803745967548
170
+ }
171
+ ]
172
+ }
results/zero-shot/Mistral-7B-Instruct-v0.3.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "mistralai/Mistral-7B-Instruct-v0.3",
4
+ "api": "hf",
5
+ "architecture": "MistralForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "32768",
8
+ "type": "instruction-tuned",
9
+ "num_parameters": "7b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xquad_tr",
14
+ "task": "extractive_question_answering",
15
+ "exact_match": 0.1142857142857143,
16
+ "f1": 0.3231327219793003
17
+ },
18
+ {
19
+ "name": "xcopa_tr",
20
+ "task": "multiple_choice",
21
+ "acc": 0.572,
22
+ "acc_norm": 0.572
23
+ },
24
+ {
25
+ "name": "turkish_plu",
26
+ "task": "multiple_choice",
27
+ "acc": 0.42976,
28
+ "acc_norm": 0.48
29
+ },
30
+ {
31
+ "name": "turkish_plu_goal_inference",
32
+ "task": "multiple_choice",
33
+ "acc": 0.4121863799283154,
34
+ "acc_norm": 0.43966547192353644
35
+ },
36
+ {
37
+ "name": "turkish_plu_next_event_prediction",
38
+ "task": "multiple_choice",
39
+ "acc": 0.40916030534351144,
40
+ "acc_norm": 0.47480916030534354
41
+ },
42
+ {
43
+ "name": "turkish_plu_step_inference",
44
+ "task": "multiple_choice",
45
+ "acc": 0.2696078431372549,
46
+ "acc_norm": 0.41830065359477125
47
+ },
48
+ {
49
+ "name": "turkish_plu_step_ordering",
50
+ "task": "multiple_choice",
51
+ "acc": 0.5533790401567091,
52
+ "acc_norm": 0.5533790401567091
53
+ },
54
+ {
55
+ "name": "check_worthiness",
56
+ "task": "multiple_choice",
57
+ "acc": 0.37751371115173676,
58
+ "acc_norm": 0.3788848263254113
59
+ },
60
+ {
61
+ "name": "relevance_judgment",
62
+ "task": "multiple_choice",
63
+ "acc": 0.4547531992687386,
64
+ "acc_norm": 0.5342778793418648
65
+ },
66
+ {
67
+ "name": "tquad",
68
+ "task": "extractive_question_answering",
69
+ "exact_match": 0.0952914798206278,
70
+ "f1": 0.4079551297911521
71
+ },
72
+ {
73
+ "name": "sts_tr",
74
+ "task": "text_classification",
75
+ "acc": 0.12907904278462654,
76
+ "acc_norm": 0.17041334300217548
77
+ },
78
+ {
79
+ "name": "offenseval_tr",
80
+ "task": "text_classification",
81
+ "acc": 0.45209750566893425,
82
+ "acc_norm": 0.7831632653061225
83
+ },
84
+ {
85
+ "name": "mnli_tr",
86
+ "task": "natural_language_inference",
87
+ "acc": 0.3,
88
+ "acc_norm": 0.3128
89
+ },
90
+ {
91
+ "name": "snli_tr",
92
+ "task": "natural_language_inference",
93
+ "acc": 0.3227,
94
+ "acc_norm": 0.323
95
+ },
96
+ {
97
+ "name": "xnli_tr",
98
+ "task": "natural_language_inference",
99
+ "acc": 0.42650602409638555,
100
+ "acc_norm": 0.42650602409638555
101
+ },
102
+ {
103
+ "name": "news_cat",
104
+ "task": "text_classification",
105
+ "acc": 0.612,
106
+ "acc_norm": 0.452
107
+ },
108
+ {
109
+ "name": "mkqa_tr",
110
+ "task": "extractive_question_answering",
111
+ "exact_match": 0.050162770050310744,
112
+ "f1": 0.1016139540064362
113
+ },
114
+ {
115
+ "name": "ironytr",
116
+ "task": "text_classification",
117
+ "acc": 0.5066666666666667,
118
+ "acc_norm": 0.5983333333333334
119
+ },
120
+ {
121
+ "name": "exams_tr",
122
+ "task": "multiple_choice",
123
+ "acc": 0.2544529262086514,
124
+ "acc_norm": 0.3231552162849873
125
+ },
126
+ {
127
+ "name": "belebele_tr",
128
+ "task": "multiple_choice",
129
+ "acc": 0.46111111111111114,
130
+ "acc_norm": 0.46111111111111114
131
+ },
132
+ {
133
+ "name": "xlsum_tr",
134
+ "task": "summarization",
135
+ "rouge1": 0.25708723026741176,
136
+ "rouge2": 0.10899686780471457,
137
+ "rougeL": 0.2008610051989006
138
+ },
139
+ {
140
+ "name": "wmt-tr-en-prompt",
141
+ "task": "machine_translation",
142
+ "wer": 0.9249121578209252,
143
+ "bleu": 0.05933138324020342
144
+ },
145
+ {
146
+ "name": "wiki_lingua_tr",
147
+ "task": "summarization",
148
+ "rouge1": 0.19938444086541007,
149
+ "rouge2": 0.06226393773142071,
150
+ "rougeL": 0.1489360974379546
151
+ },
152
+ {
153
+ "name": "tr-wikihow-summ",
154
+ "task": "summarization",
155
+ "rouge1": 0.1147060386471999,
156
+ "rouge2": 0.036767323776569986,
157
+ "rougeL": 0.08789125135731646
158
+ },
159
+ {
160
+ "name": "mlsum_tr",
161
+ "task": "summarization",
162
+ "rouge1": 0.370968688296404,
163
+ "rouge2": 0.24117168042677828,
164
+ "rougeL": 0.3151307659048477
165
+ },
166
+ {
167
+ "name": "gecturk_generation",
168
+ "task": "grammatical_error_correction",
169
+ "exact_match": 0.009677885309836777
170
+ }
171
+ ]
172
+ }
results/zero-shot/Mistral-7B-v0.3.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "mistralai/Mistral-7B-v0.3",
4
+ "api": "hf",
5
+ "architecture": "MistralForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "32768",
8
+ "type": "pretrained",
9
+ "num_parameters": "7b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.149790294242554,
16
+ "rouge2": 0.06308205974922562,
17
+ "rougeL": 0.1209340855634673
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 1.164558847576675,
23
+ "bleu": 0.03750412480486115
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.18942128157251567,
29
+ "rouge2": 0.05970997875583092,
30
+ "rougeL": 0.14635645315684281
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.05788766682352111,
36
+ "rouge2": 0.01861117631687374,
37
+ "rougeL": 0.044379969364608036
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.26517424879332546,
43
+ "rouge2": 0.17304768736710063,
44
+ "rougeL": 0.230212437287503
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.23511001974096007
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.17058823529411765,
55
+ "f1": 0.3376787884560269
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.584,
61
+ "acc_norm": 0.584
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.4336,
67
+ "acc_norm": 0.50048
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.40979689366786143,
73
+ "acc_norm": 0.45639187574671447
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.38625954198473283,
79
+ "acc_norm": 0.4946564885496183
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.26633986928104575,
85
+ "acc_norm": 0.42810457516339867
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.5837414299706171,
91
+ "acc_norm": 0.5837414299706171
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.37614259597806216,
97
+ "acc_norm": 0.4789762340036563
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.4218464351005484,
103
+ "acc_norm": 0.42138939670932357
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.218609865470852,
109
+ "f1": 0.49261818596816426
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.14213197969543148,
115
+ "acc_norm": 0.19796954314720813
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.20691609977324263,
121
+ "acc_norm": 0.45691609977324266
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3233,
127
+ "acc_norm": 0.3227
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3208,
133
+ "acc_norm": 0.317
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.41365461847389556,
139
+ "acc_norm": 0.41365461847389556
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.66,
145
+ "acc_norm": 0.448
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.11453092630955904,
151
+ "f1": 0.15435166430563946
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.49833333333333335,
157
+ "acc_norm": 0.52
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.24173027989821882,
163
+ "acc_norm": 0.30279898218829515
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.4111111111111111,
169
+ "acc_norm": 0.4111111111111111
170
+ }
171
+ ]
172
+ }
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "dtype": "auto",
4
+ "parallelize": "True",
5
+ "device_map": "balanced",
6
+ "model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
7
+ "api": "hf",
8
+ "architecture": "MixtralForCausalLM",
9
+ "dtype": "bfloat16",
10
+ "type": "instruction-tuned",
11
+ "num_parameters": "46b"
12
+ },
13
+ "results": [
14
+ {
15
+ "name": "xquad_tr",
16
+ "task": "extractive_question_answering",
17
+ "exact_match": 0.10672268907563025,
18
+ "f1": 0.31503337329539344
19
+ },
20
+ {
21
+ "name": "xcopa_tr",
22
+ "task": "multiple_choice",
23
+ "acc": 0.564,
24
+ "acc_norm": 0.564
25
+ },
26
+ {
27
+ "name": "turkish_plu",
28
+ "task": "multiple_choice",
29
+ "acc": 0.47104,
30
+ "acc_norm": 0.52544
31
+ },
32
+ {
33
+ "name": "turkish_plu_goal_inference",
34
+ "task": "multiple_choice",
35
+ "acc": 0.44563918757467147,
36
+ "acc_norm": 0.45758661887694146
37
+ },
38
+ {
39
+ "name": "turkish_plu_next_event_prediction",
40
+ "task": "multiple_choice",
41
+ "acc": 0.46106870229007635,
42
+ "acc_norm": 0.5694656488549619
43
+ },
44
+ {
45
+ "name": "turkish_plu_step_inference",
46
+ "task": "multiple_choice",
47
+ "acc": 0.31699346405228757,
48
+ "acc_norm": 0.4624183006535948
49
+ },
50
+ {
51
+ "name": "turkish_plu_step_ordering",
52
+ "task": "multiple_choice",
53
+ "acc": 0.5905974534769833,
54
+ "acc_norm": 0.5905974534769833
55
+ },
56
+ {
57
+ "name": "check_worthiness",
58
+ "task": "multiple_choice",
59
+ "acc": 0.37522851919561245,
60
+ "acc_norm": 0.3711151736745887
61
+ },
62
+ {
63
+ "name": "relevance_judgment",
64
+ "task": "multiple_choice",
65
+ "acc": 0.4287020109689214,
66
+ "acc_norm": 0.4890310786106033
67
+ },
68
+ {
69
+ "name": "tquad",
70
+ "task": "extractive_question_answering",
71
+ "exact_match": 0.09753363228699552,
72
+ "f1": 0.4107003915145391
73
+ },
74
+ {
75
+ "name": "sts_tr",
76
+ "task": "text_classification",
77
+ "acc": 0.12980420594633793,
78
+ "acc_norm": 0.18564176939811458
79
+ },
80
+ {
81
+ "name": "offenseval_tr",
82
+ "task": "text_classification",
83
+ "acc": 0.6292517006802721,
84
+ "acc_norm": 0.7859977324263039
85
+ },
86
+ {
87
+ "name": "mnli_tr",
88
+ "task": "natural_language_inference",
89
+ "acc": 0.2757,
90
+ "acc_norm": 0.3115
91
+ },
92
+ {
93
+ "name": "snli_tr",
94
+ "task": "natural_language_inference",
95
+ "acc": 0.3078,
96
+ "acc_norm": 0.3217
97
+ },
98
+ {
99
+ "name": "xnli_tr",
100
+ "task": "natural_language_inference",
101
+ "acc": 0.4506024096385542,
102
+ "acc_norm": 0.4506024096385542
103
+ },
104
+ {
105
+ "name": "news_cat",
106
+ "task": "text_classification",
107
+ "acc": 0.54,
108
+ "acc_norm": 0.34
109
+ },
110
+ {
111
+ "name": "mkqa_tr",
112
+ "task": "extractive_question_answering",
113
+ "exact_match": 0.12518496596626222,
114
+ "f1": 0.18805772867641507
115
+ },
116
+ {
117
+ "name": "ironytr",
118
+ "task": "text_classification",
119
+ "acc": 0.525,
120
+ "acc_norm": 0.6683333333333333
121
+ },
122
+ {
123
+ "name": "exams_tr",
124
+ "task": "multiple_choice",
125
+ "acc": 0.2926208651399491,
126
+ "acc_norm": 0.3460559796437659
127
+ },
128
+ {
129
+ "name": "belebele_tr",
130
+ "task": "multiple_choice",
131
+ "acc": 0.5855555555555556,
132
+ "acc_norm": 0.5855555555555556
133
+ },
134
+ {
135
+ "name": "xlsum_tr",
136
+ "task": "summarization",
137
+ "rouge1": 0.2431491335984058,
138
+ "rouge2": 0.10574384463000014,
139
+ "rougeL": 0.19188752602582665
140
+ },
141
+ {
142
+ "name": "wmt-tr-en-prompt",
143
+ "task": "machine_translation",
144
+ "wer": 0.873496978572932,
145
+ "bleu": 0.09482204368236244
146
+ },
147
+ {
148
+ "name": "wiki_lingua_tr",
149
+ "task": "summarization",
150
+ "rouge1": 0.1822644758431921,
151
+ "rouge2": 0.05997572295534047,
152
+ "rougeL": 0.14060742524010394
153
+ },
154
+ {
155
+ "name": "tr-wikihow-summ",
156
+ "task": "summarization",
157
+ "rouge1": 0.1586518753843558,
158
+ "rouge2": 0.04879306307120871,
159
+ "rougeL": 0.1191709081457354
160
+ },
161
+ {
162
+ "name": "mlsum_tr",
163
+ "task": "summarization",
164
+ "rouge1": 0.34741912725252516,
165
+ "rouge2": 0.22190156804649477,
166
+ "rougeL": 0.2915726415448087
167
+ },
168
+ {
169
+ "name": "gecturk_generation",
170
+ "task": "grammatical_error_correction",
171
+ "exact_match": 0.036255958399537776
172
+ }
173
+ ]
174
+ }
results/zero-shot/Qwen2.5-0.5B-Instruct.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-0.5B-Instruct",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "instruction-tuned",
9
+ "num_parameters": "0.5b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.23863563451736716,
16
+ "rouge2": 0.09013532339992156,
17
+ "rougeL": 0.17413374740786924
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 1.2982149372648228,
23
+ "bleu": 0.03132059223101698
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.17874342388633518,
29
+ "rouge2": 0.05016075064860983,
30
+ "rougeL": 0.13196976287126827
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.20346847563598916,
36
+ "rouge2": 0.057692528559452054,
37
+ "rougeL": 0.14650500990126503
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.3611886967384703,
43
+ "rouge2": 0.22895911125049848,
44
+ "rougeL": 0.2970046611327582
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.0005296355144686793
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.13361344537815126,
55
+ "f1": 0.24680114628123545
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.536,
61
+ "acc_norm": 0.536
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.41568,
67
+ "acc_norm": 0.45696
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.36798088410991636,
73
+ "acc_norm": 0.36678614097968937
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.35572519083969467,
79
+ "acc_norm": 0.44122137404580153
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.28104575163398693,
85
+ "acc_norm": 0.4019607843137255
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.5739471106758081,
91
+ "acc_norm": 0.5739471106758081
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.3916819012797075,
97
+ "acc_norm": 0.6229433272394881
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.4218464351005484,
103
+ "acc_norm": 0.43007312614259596
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.1625560538116592,
109
+ "f1": 0.3002481362714293
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.1305293691080493,
115
+ "acc_norm": 0.1986947063089195
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.21428571428571427,
121
+ "acc_norm": 0.41950113378684806
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3211,
127
+ "acc_norm": 0.3212
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3239,
133
+ "acc_norm": 0.3237
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.36626506024096384,
139
+ "acc_norm": 0.36626506024096384
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.292,
145
+ "acc_norm": 0.272
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.007102693104468778,
151
+ "f1": 0.019193813490945396
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.47333333333333333,
157
+ "acc_norm": 0.49333333333333335
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.2366412213740458,
163
+ "acc_norm": 0.26208651399491095
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.3,
169
+ "acc_norm": 0.3
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-0.5B.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-0.5B",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "pretrained",
9
+ "num_parameters": "0.5b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.24486744839512292,
16
+ "rouge2": 0.09223336406082884,
17
+ "rougeL": 0.17919357452932383
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 2.5245853761833037,
23
+ "bleu": 0.014391606379183295
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.19235726612644397,
29
+ "rouge2": 0.05618807633412984,
30
+ "rougeL": 0.138761647221388
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.21150588695760153,
36
+ "rouge2": 0.060555129464851025,
37
+ "rougeL": 0.15026722717354687
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.37608183260351713,
43
+ "rouge2": 0.24317811162325445,
44
+ "rougeL": 0.3109581125782851
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.008859357696566999
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.040336134453781515,
55
+ "f1": 0.1415668185953022
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.548,
61
+ "acc_norm": 0.548
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.40832,
67
+ "acc_norm": 0.45184
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.36200716845878134,
73
+ "acc_norm": 0.3536439665471924
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.35725190839694654,
79
+ "acc_norm": 0.4305343511450382
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.2647058823529412,
85
+ "acc_norm": 0.4199346405228758
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.56513222331048,
91
+ "acc_norm": 0.56513222331048
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.620201096892139,
97
+ "acc_norm": 0.6220292504570384
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.5521023765996343,
103
+ "acc_norm": 0.5763254113345521
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.07174887892376682,
109
+ "f1": 0.2361024569228557
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.12907904278462654,
115
+ "acc_norm": 0.14720812182741116
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.3373015873015873,
121
+ "acc_norm": 0.7845804988662132
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3208,
127
+ "acc_norm": 0.3211
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3244,
133
+ "acc_norm": 0.3237
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.3614457831325301,
139
+ "acc_norm": 0.3614457831325301
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.268,
145
+ "acc_norm": 0.232
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.0011837821840781297,
151
+ "f1": 0.008064623072727376
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.49333333333333335,
157
+ "acc_norm": 0.505
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.21119592875318066,
163
+ "acc_norm": 0.26208651399491095
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.29888888888888887,
169
+ "acc_norm": 0.29888888888888887
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-1.5B-Instruct.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-1.5B-Instruct",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "instruction-tuned",
9
+ "num_parameters": "1.5b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.2623073705824601,
16
+ "rouge2": 0.11026827068108053,
17
+ "rougeL": 0.19910145473422672
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 1.2141147884224404,
23
+ "bleu": 0.046509658480976955
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.20401876230987648,
29
+ "rouge2": 0.06137162235274428,
30
+ "rougeL": 0.14973642357201794
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.213601462920457,
36
+ "rouge2": 0.0630097851113338,
37
+ "rougeL": 0.1537348275034766
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.38237711642823147,
43
+ "rouge2": 0.24586898786864927,
44
+ "rougeL": 0.314102032096956
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.0012037170783379075
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.16470588235294117,
55
+ "f1": 0.3054485466158489
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.546,
61
+ "acc_norm": 0.546
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.42528,
67
+ "acc_norm": 0.49536
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.35722819593787336,
73
+ "acc_norm": 0.42771804062126645
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.4030534351145038,
79
+ "acc_norm": 0.48854961832061067
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.28104575163398693,
85
+ "acc_norm": 0.45098039215686275
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.5817825661116552,
91
+ "acc_norm": 0.5817825661116552
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.6142595978062158,
97
+ "acc_norm": 0.6238574040219378
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.6005484460694699,
103
+ "acc_norm": 0.5781535648994516
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.1468609865470852,
109
+ "f1": 0.3275513362731245
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.12907904278462654,
115
+ "acc_norm": 0.12980420594633793
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.20748299319727892,
121
+ "acc_norm": 0.2568027210884354
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3213,
127
+ "acc_norm": 0.3213
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3237,
133
+ "acc_norm": 0.324
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.38835341365461845,
139
+ "acc_norm": 0.38835341365461845
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.488,
145
+ "acc_norm": 0.328
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.0017756732761171944,
151
+ "f1": 0.014047009643700398
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.49666666666666665,
157
+ "acc_norm": 0.5283333333333333
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.22900763358778625,
163
+ "acc_norm": 0.26463104325699743
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.5344444444444445,
169
+ "acc_norm": 0.5344444444444445
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-1.5B.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-1.5B",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "pretrained",
9
+ "num_parameters": "1.5b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.26530686725683517,
16
+ "rouge2": 0.1186340395264098,
17
+ "rougeL": 0.21072366214436372
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 1.3627024164432318,
23
+ "bleu": 0.04065669768703689
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.1989672547304563,
29
+ "rouge2": 0.05893942882571811,
30
+ "rougeL": 0.14295071989157748
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.21551714657943752,
36
+ "rouge2": 0.06276605057309345,
37
+ "rougeL": 0.1530489363520035
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.3720197986496941,
43
+ "rouge2": 0.24001941620807693,
44
+ "rougeL": 0.30891873779373347
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.004188935432615918
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.3184873949579832,
55
+ "f1": 0.4728187788037503
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.542,
61
+ "acc_norm": 0.542
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.4208,
67
+ "acc_norm": 0.48704
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.35842293906810035,
73
+ "acc_norm": 0.4169653524492234
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.3969465648854962,
79
+ "acc_norm": 0.4854961832061069
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.272875816993464,
85
+ "acc_norm": 0.4362745098039216
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.5759059745347699,
91
+ "acc_norm": 0.5759059745347699
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.6229433272394881,
97
+ "acc_norm": 0.6238574040219378
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.4346435100548446,
103
+ "acc_norm": 0.5868372943327239
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.3430493273542601,
109
+ "f1": 0.5584198786751099
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.12907904278462654,
115
+ "acc_norm": 0.12907904278462654
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.27380952380952384,
121
+ "acc_norm": 0.6590136054421769
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3212,
127
+ "acc_norm": 0.3208
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3237,
133
+ "acc_norm": 0.3238
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.4108433734939759,
139
+ "acc_norm": 0.4108433734939759
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.484,
145
+ "acc_norm": 0.312
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.0025155371411660255,
151
+ "f1": 0.021308629203477533
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.5233333333333333,
157
+ "acc_norm": 0.52
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.21628498727735368,
163
+ "acc_norm": 0.2544529262086514
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.4666666666666667,
169
+ "acc_norm": 0.4666666666666667
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-14B-Instruct.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "max_length": "131072",
4
+ "dtype": "bfloat16",
5
+ "model": "Qwen/Qwen2.5-14B-Instruct",
6
+ "api": "hf",
7
+ "architecture": "Qwen2ForCausalLM",
8
+ "dtype": "bfloat16",
9
+ "max_length": "131072",
10
+ "type": "instruction-tuned",
11
+ "num_parameters": "7b"
12
+ },
13
+ "results": [
14
+ {
15
+ "name": "xquad_tr",
16
+ "task": "extractive_question_answering",
17
+ "exact_match": 0.009243697478991597,
18
+ "f1": 0.1621475839693222
19
+ },
20
+ {
21
+ "name": "xcopa_tr",
22
+ "task": "multiple_choice",
23
+ "acc": 0.666,
24
+ "acc_norm": 0.666
25
+ },
26
+ {
27
+ "name": "turkish_plu",
28
+ "task": "multiple_choice",
29
+ "acc": 0.48512,
30
+ "acc_norm": 0.53216
31
+ },
32
+ {
33
+ "name": "turkish_plu_goal_inference",
34
+ "task": "multiple_choice",
35
+ "acc": 0.4062126642771804,
36
+ "acc_norm": 0.41935483870967744
37
+ },
38
+ {
39
+ "name": "turkish_plu_next_event_prediction",
40
+ "task": "multiple_choice",
41
+ "acc": 0.49770992366412214,
42
+ "acc_norm": 0.5725190839694656
43
+ },
44
+ {
45
+ "name": "turkish_plu_step_inference",
46
+ "task": "multiple_choice",
47
+ "acc": 0.35130718954248363,
48
+ "acc_norm": 0.4934640522875817
49
+ },
50
+ {
51
+ "name": "turkish_plu_step_ordering",
52
+ "task": "multiple_choice",
53
+ "acc": 0.6219392752203722,
54
+ "acc_norm": 0.6219392752203722
55
+ },
56
+ {
57
+ "name": "check_worthiness",
58
+ "task": "multiple_choice",
59
+ "acc": 0.38848263254113347,
60
+ "acc_norm": 0.45749542961608775
61
+ },
62
+ {
63
+ "name": "relevance_judgment",
64
+ "task": "multiple_choice",
65
+ "acc": 0.7458866544789763,
66
+ "acc_norm": 0.7842778793418648
67
+ },
68
+ {
69
+ "name": "tquad",
70
+ "task": "extractive_question_answering",
71
+ "exact_match": 0.0033632286995515697,
72
+ "f1": 0.2073964222096445
73
+ },
74
+ {
75
+ "name": "sts_tr",
76
+ "task": "text_classification",
77
+ "acc": 0.24873096446700507,
78
+ "acc_norm": 0.224075416968818
79
+ },
80
+ {
81
+ "name": "offenseval_tr",
82
+ "task": "text_classification",
83
+ "acc": 0.5473356009070295,
84
+ "acc_norm": 0.6992630385487528
85
+ },
86
+ {
87
+ "name": "mnli_tr",
88
+ "task": "natural_language_inference",
89
+ "acc": 0.2993,
90
+ "acc_norm": 0.4052
91
+ },
92
+ {
93
+ "name": "snli_tr",
94
+ "task": "natural_language_inference",
95
+ "acc": 0.249,
96
+ "acc_norm": 0.4158
97
+ },
98
+ {
99
+ "name": "xnli_tr",
100
+ "task": "natural_language_inference",
101
+ "acc": 0.4108433734939759,
102
+ "acc_norm": 0.4108433734939759
103
+ },
104
+ {
105
+ "name": "news_cat",
106
+ "task": "text_classification",
107
+ "acc": 0.324,
108
+ "acc_norm": 0.372
109
+ },
110
+ {
111
+ "name": "mkqa_tr",
112
+ "task": "extractive_question_answering",
113
+ "exact_match": 0.0001479727730097662,
114
+ "f1": 0.032689256412897535
115
+ },
116
+ {
117
+ "name": "ironytr",
118
+ "task": "text_classification",
119
+ "acc": 0.6133333333333333,
120
+ "acc_norm": 0.68
121
+ },
122
+ {
123
+ "name": "exams_tr",
124
+ "task": "multiple_choice",
125
+ "acc": 0.29770992366412213,
126
+ "acc_norm": 0.32061068702290074
127
+ },
128
+ {
129
+ "name": "belebele_tr",
130
+ "task": "multiple_choice",
131
+ "acc": 0.8466666666666667,
132
+ "acc_norm": 0.8466666666666667
133
+ },
134
+ {
135
+ "name": "xlsum_tr",
136
+ "task": "summarization",
137
+ "rouge1": 0.32286587644778963,
138
+ "rouge2": 0.15718603235490425,
139
+ "rougeL": 0.2513469242124575
140
+ },
141
+ {
142
+ "name": "wmt-tr-en-prompt",
143
+ "task": "machine_translation",
144
+ "wer": 0.876353734204216,
145
+ "bleu": 0.12408567637656073
146
+ },
147
+ {
148
+ "name": "wiki_lingua_tr",
149
+ "task": "summarization",
150
+ "rouge1": 0.21257634641569856,
151
+ "rouge2": 0.07113576521772344,
152
+ "rougeL": 0.16327508915103117
153
+ },
154
+ {
155
+ "name": "tr-wikihow-summ",
156
+ "task": "summarization",
157
+ "rouge1": 0.15933964568392708,
158
+ "rouge2": 0.046667426668942254,
159
+ "rougeL": 0.12246246131371726
160
+ },
161
+ {
162
+ "name": "mlsum_tr",
163
+ "task": "summarization",
164
+ "rouge1": 0.3941083980026566,
165
+ "rouge2": 0.2370970171442021,
166
+ "rougeL": 0.3180069634000636
167
+ },
168
+ {
169
+ "name": "gecturk_generation",
170
+ "task": "grammatical_error_correction",
171
+ "exact_match": 0.00130001444460494
172
+ }
173
+ ]
174
+ }
results/zero-shot/Qwen2.5-14B.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "max_length": "131072",
4
+ "dtype": "bfloat16",
5
+ "model": "Qwen/Qwen2.5-14B",
6
+ "api": "hf",
7
+ "architecture": "Qwen2ForCausalLM",
8
+ "dtype": "bfloat16",
9
+ "max_length": "131072",
10
+ "type": "pretrained",
11
+ "num_parameters": "14b"
12
+ },
13
+ "results": [
14
+ {
15
+ "name": "xquad_tr",
16
+ "task": "extractive_question_answering",
17
+ "exact_match": 0.40252100840336136,
18
+ "f1": 0.6176467678580342
19
+ },
20
+ {
21
+ "name": "xcopa_tr",
22
+ "task": "multiple_choice",
23
+ "acc": 0.646,
24
+ "acc_norm": 0.646
25
+ },
26
+ {
27
+ "name": "turkish_plu",
28
+ "task": "multiple_choice",
29
+ "acc": 0.48736,
30
+ "acc_norm": 0.5392
31
+ },
32
+ {
33
+ "name": "turkish_plu_goal_inference",
34
+ "task": "multiple_choice",
35
+ "acc": 0.4133811230585424,
36
+ "acc_norm": 0.4324970131421744
37
+ },
38
+ {
39
+ "name": "turkish_plu_next_event_prediction",
40
+ "task": "multiple_choice",
41
+ "acc": 0.4870229007633588,
42
+ "acc_norm": 0.5816793893129771
43
+ },
44
+ {
45
+ "name": "turkish_plu_step_inference",
46
+ "task": "multiple_choice",
47
+ "acc": 0.35294117647058826,
48
+ "acc_norm": 0.49019607843137253
49
+ },
50
+ {
51
+ "name": "turkish_plu_step_ordering",
52
+ "task": "multiple_choice",
53
+ "acc": 0.6287952987267384,
54
+ "acc_norm": 0.6287952987267384
55
+ },
56
+ {
57
+ "name": "check_worthiness",
58
+ "task": "multiple_choice",
59
+ "acc": 0.37614259597806216,
60
+ "acc_norm": 0.3756855575868373
61
+ },
62
+ {
63
+ "name": "relevance_judgment",
64
+ "task": "multiple_choice",
65
+ "acc": 0.4506398537477148,
66
+ "acc_norm": 0.5708409506398537
67
+ },
68
+ {
69
+ "name": "tquad",
70
+ "task": "extractive_question_answering",
71
+ "exact_match": 0.34753363228699546,
72
+ "f1": 0.614345609122
73
+ },
74
+ {
75
+ "name": "sts_tr",
76
+ "task": "text_classification",
77
+ "acc": 0.2037708484408992,
78
+ "acc_norm": 0.2610587382160986
79
+ },
80
+ {
81
+ "name": "offenseval_tr",
82
+ "task": "text_classification",
83
+ "acc": 0.22023809523809523,
84
+ "acc_norm": 0.2962018140589569
85
+ },
86
+ {
87
+ "name": "mnli_tr",
88
+ "task": "natural_language_inference",
89
+ "acc": 0.3202,
90
+ "acc_norm": 0.3281
91
+ },
92
+ {
93
+ "name": "snli_tr",
94
+ "task": "natural_language_inference",
95
+ "acc": 0.3227,
96
+ "acc_norm": 0.3329
97
+ },
98
+ {
99
+ "name": "xnli_tr",
100
+ "task": "natural_language_inference",
101
+ "acc": 0.46546184738955826,
102
+ "acc_norm": 0.46546184738955826
103
+ },
104
+ {
105
+ "name": "news_cat",
106
+ "task": "text_classification",
107
+ "acc": 0.524,
108
+ "acc_norm": 0.348
109
+ },
110
+ {
111
+ "name": "mkqa_tr",
112
+ "task": "extractive_question_answering",
113
+ "exact_match": 0.050014797277300974,
114
+ "f1": 0.11195620922043903
115
+ },
116
+ {
117
+ "name": "ironytr",
118
+ "task": "text_classification",
119
+ "acc": 0.5616666666666666,
120
+ "acc_norm": 0.6183333333333333
121
+ },
122
+ {
123
+ "name": "exams_tr",
124
+ "task": "multiple_choice",
125
+ "acc": 0.33078880407124683,
126
+ "acc_norm": 0.35877862595419846
127
+ },
128
+ {
129
+ "name": "belebele_tr",
130
+ "task": "multiple_choice",
131
+ "acc": 0.8122222222222222,
132
+ "acc_norm": 0.8122222222222222
133
+ },
134
+ {
135
+ "name": "xlsum_tr",
136
+ "task": "summarization",
137
+ "rouge1": 0.2866278776668776,
138
+ "rouge2": 0.1308383753682692,
139
+ "rougeL": 0.22217070278595147
140
+ },
141
+ {
142
+ "name": "wmt-tr-en-prompt",
143
+ "task": "machine_translation",
144
+ "wer": 1.6546507240124098,
145
+ "bleu": 0.08096461200991427
146
+ },
147
+ {
148
+ "name": "wiki_lingua_tr",
149
+ "task": "summarization",
150
+ "rouge1": 0.20802332507327073,
151
+ "rouge2": 0.06755910819968403,
152
+ "rougeL": 0.15425156655216665
153
+ },
154
+ {
155
+ "name": "tr-wikihow-summ",
156
+ "task": "summarization",
157
+ "rouge1": 0.22012543165161014,
158
+ "rouge2": 0.06567086903148794,
159
+ "rougeL": 0.15604855476586732
160
+ },
161
+ {
162
+ "name": "mlsum_tr",
163
+ "task": "summarization",
164
+ "rouge1": 0.3928051448993858,
165
+ "rouge2": 0.25674608200884674,
166
+ "rougeL": 0.3276023476233169
167
+ },
168
+ {
169
+ "name": "gecturk_generation",
170
+ "task": "grammatical_error_correction",
171
+ "exact_match": 0.003707448601280755
172
+ }
173
+ ]
174
+ }
results/zero-shot/Qwen2.5-3B-Instruct.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-3B-Instruct",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "instruction-tuned",
9
+ "num_parameters": "3b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.2717423560904909,
16
+ "rouge2": 0.1203805256265841,
17
+ "rougeL": 0.20835793423392474
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 1.3083152705118002,
23
+ "bleu": 0.06557652285165357
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.20972310903888913,
29
+ "rouge2": 0.06624065006707994,
30
+ "rougeL": 0.15663818204368896
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.21539354708256803,
36
+ "rouge2": 0.06750207152961056,
37
+ "rougeL": 0.15713052980260883
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.36923031529839273,
43
+ "rouge2": 0.22733869486812047,
44
+ "rougeL": 0.30137527399984854
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.0032741104530791083
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.06050420168067227,
55
+ "f1": 0.17614501216061587
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.562,
61
+ "acc_norm": 0.562
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.44832,
67
+ "acc_norm": 0.4976
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.38948626045400236,
73
+ "acc_norm": 0.4074074074074074
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.4198473282442748,
79
+ "acc_norm": 0.4916030534351145
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.32189542483660133,
85
+ "acc_norm": 0.4722222222222222
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.5905974534769833,
91
+ "acc_norm": 0.5905974534769833
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.37614259597806216,
97
+ "acc_norm": 0.41910420475319926
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.4218464351005484,
103
+ "acc_norm": 0.5863802559414991
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.032511210762331835,
109
+ "f1": 0.17915922696126974
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.16823785351704135,
115
+ "acc_norm": 0.21102248005801305
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.3764172335600907,
121
+ "acc_norm": 0.6312358276643991
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3202,
127
+ "acc_norm": 0.3166
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.324,
133
+ "acc_norm": 0.3233
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.3899598393574297,
139
+ "acc_norm": 0.3899598393574297
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.372,
145
+ "acc_norm": 0.316
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.0013317549570878958,
151
+ "f1": 0.011368014565970922
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.5133333333333333,
157
+ "acc_norm": 0.5
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.2748091603053435,
163
+ "acc_norm": 0.2748091603053435
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.6744444444444444,
169
+ "acc_norm": 0.6744444444444444
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-3B.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-3B",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "pretrained",
9
+ "num_parameters": "3b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.2626105174898534,
16
+ "rouge2": 0.11378189592008409,
17
+ "rougeL": 0.20776464247370657
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 2.715338611222304,
23
+ "bleu": 0.04073294466582842
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.2070375365034586,
29
+ "rouge2": 0.06545062813959457,
30
+ "rougeL": 0.15181208318674888
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.2166032592490747,
36
+ "rouge2": 0.06488692040082837,
37
+ "rougeL": 0.15493867817520438
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.37920771518464447,
43
+ "rouge2": 0.2485854189993293,
44
+ "rougeL": 0.3183591683826359
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.01261495498098127
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.3226890756302521,
55
+ "f1": 0.4922457700639336
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.552,
61
+ "acc_norm": 0.552
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.43936,
67
+ "acc_norm": 0.49536
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.37992831541218636,
73
+ "acc_norm": 0.4109916367980884
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.40610687022900765,
79
+ "acc_norm": 0.4900763358778626
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.2973856209150327,
85
+ "acc_norm": 0.45098039215686275
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.594515181194907,
91
+ "acc_norm": 0.594515181194907
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.37614259597806216,
97
+ "acc_norm": 0.37614259597806216
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.4218464351005484,
103
+ "acc_norm": 0.42230347349177333
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.2679372197309417,
109
+ "f1": 0.5013276144111743
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.12907904278462654,
115
+ "acc_norm": 0.1319796954314721
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.483843537414966,
121
+ "acc_norm": 0.7939342403628118
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3212,
127
+ "acc_norm": 0.3212
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3219,
133
+ "acc_norm": 0.31
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.3887550200803213,
139
+ "acc_norm": 0.3887550200803213
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.448,
145
+ "acc_norm": 0.336
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.0011837821840781297,
151
+ "f1": 0.02167535190151917
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.5466666666666666,
157
+ "acc_norm": 0.505
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.26463104325699743,
163
+ "acc_norm": 0.272264631043257
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.6188888888888889,
169
+ "acc_norm": 0.6188888888888889
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-7B-Instruct.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-7B-Instruct",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "instruction-tuned",
9
+ "num_parameters": "7b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.2469558574263228,
16
+ "rouge2": 0.11595092662162905,
17
+ "rougeL": 0.19088347093150124
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 2.665304473413403,
23
+ "bleu": 0.05378866280156646
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.19651060875148446,
29
+ "rouge2": 0.06277513772426871,
30
+ "rougeL": 0.15024685156698064
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.18795422261380992,
36
+ "rouge2": 0.057607529002163975,
37
+ "rougeL": 0.1399141590028576
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.33348239796227963,
43
+ "rouge2": 0.20530295055546918,
44
+ "rougeL": 0.2702778828157603
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.0007222302470027445
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.009243697478991597,
55
+ "f1": 0.20658958502282965
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.618,
61
+ "acc_norm": 0.618
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.47136,
67
+ "acc_norm": 0.5168
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.4109916367980884,
73
+ "acc_norm": 0.4324970131421744
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.467175572519084,
79
+ "acc_norm": 0.5251908396946565
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.32189542483660133,
85
+ "acc_norm": 0.4624183006535948
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.6131243878550441,
91
+ "acc_norm": 0.6131243878550441
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.37614259597806216,
97
+ "acc_norm": 0.37705667276051186
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.5635283363802559,
103
+ "acc_norm": 0.649908592321755
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.005605381165919282,
109
+ "f1": 0.2515091110747535
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.18274111675126903,
115
+ "acc_norm": 0.20449601160261058
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.8027210884353742,
121
+ "acc_norm": 0.7996031746031746
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3128,
127
+ "acc_norm": 0.3443
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3019,
133
+ "acc_norm": 0.3201
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.41044176706827307,
139
+ "acc_norm": 0.41044176706827307
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.4,
145
+ "acc_norm": 0.244
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.0,
151
+ "f1": 0.02283069752218492
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.55,
157
+ "acc_norm": 0.6
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.30279898218829515,
163
+ "acc_norm": 0.3435114503816794
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.7344444444444445,
169
+ "acc_norm": 0.7344444444444445
170
+ }
171
+ ]
172
+ }
results/zero-shot/Qwen2.5-7B.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "Qwen/Qwen2.5-7B",
4
+ "api": "hf",
5
+ "architecture": "Qwen2ForCausalLM",
6
+ "dtype": "bfloat16",
7
+ "max_length": "131072",
8
+ "type": "pretrained",
9
+ "num_parameters": "7b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "xlsum_tr",
14
+ "task": "summarization",
15
+ "rouge1": 0.2771480541706062,
16
+ "rouge2": 0.12265578549173298,
17
+ "rougeL": 0.21527848396268273
18
+ },
19
+ {
20
+ "name": "wmt-tr-en-prompt",
21
+ "task": "machine_translation",
22
+ "wer": 2.758041226669275,
23
+ "bleu": 0.05541606336453955
24
+ },
25
+ {
26
+ "name": "wiki_lingua_tr",
27
+ "task": "summarization",
28
+ "rouge1": 0.2142341169252447,
29
+ "rouge2": 0.069300770560285,
30
+ "rougeL": 0.15734638541997004
31
+ },
32
+ {
33
+ "name": "tr-wikihow-summ",
34
+ "task": "summarization",
35
+ "rouge1": 0.2205921941830655,
36
+ "rouge2": 0.06633246639608908,
37
+ "rougeL": 0.1577913671817974
38
+ },
39
+ {
40
+ "name": "mlsum_tr",
41
+ "task": "summarization",
42
+ "rouge1": 0.38481724984581955,
43
+ "rouge2": 0.25317842640354704,
44
+ "rougeL": 0.3226132671645973
45
+ },
46
+ {
47
+ "name": "gecturk_generation",
48
+ "task": "grammatical_error_correction",
49
+ "exact_match": 0.005537098560354375
50
+ },
51
+ {
52
+ "name": "xquad_tr",
53
+ "task": "extractive_question_answering",
54
+ "exact_match": 0.319327731092437,
55
+ "f1": 0.5120848322696311
56
+ },
57
+ {
58
+ "name": "xcopa_tr",
59
+ "task": "multiple_choice",
60
+ "acc": 0.598,
61
+ "acc_norm": 0.598
62
+ },
63
+ {
64
+ "name": "turkish_plu",
65
+ "task": "multiple_choice",
66
+ "acc": 0.48288,
67
+ "acc_norm": 0.53376
68
+ },
69
+ {
70
+ "name": "turkish_plu_goal_inference",
71
+ "task": "multiple_choice",
72
+ "acc": 0.4253285543608124,
73
+ "acc_norm": 0.44683393070489846
74
+ },
75
+ {
76
+ "name": "turkish_plu_next_event_prediction",
77
+ "task": "multiple_choice",
78
+ "acc": 0.4717557251908397,
79
+ "acc_norm": 0.549618320610687
80
+ },
81
+ {
82
+ "name": "turkish_plu_step_inference",
83
+ "task": "multiple_choice",
84
+ "acc": 0.32189542483660133,
85
+ "acc_norm": 0.46895424836601307
86
+ },
87
+ {
88
+ "name": "turkish_plu_step_ordering",
89
+ "task": "multiple_choice",
90
+ "acc": 0.633692458374143,
91
+ "acc_norm": 0.633692458374143
92
+ },
93
+ {
94
+ "name": "check_worthiness",
95
+ "task": "multiple_choice",
96
+ "acc": 0.399908592321755,
97
+ "acc_norm": 0.5361060329067642
98
+ },
99
+ {
100
+ "name": "relevance_judgment",
101
+ "task": "multiple_choice",
102
+ "acc": 0.4259597806215722,
103
+ "acc_norm": 0.4437842778793419
104
+ },
105
+ {
106
+ "name": "tquad",
107
+ "task": "extractive_question_answering",
108
+ "exact_match": 0.2802690582959641,
109
+ "f1": 0.5504499810832788
110
+ },
111
+ {
112
+ "name": "sts_tr",
113
+ "task": "text_classification",
114
+ "acc": 0.1696881798404641,
115
+ "acc_norm": 0.18201595358955766
116
+ },
117
+ {
118
+ "name": "offenseval_tr",
119
+ "task": "text_classification",
120
+ "acc": 0.7738095238095238,
121
+ "acc_norm": 0.7956349206349206
122
+ },
123
+ {
124
+ "name": "mnli_tr",
125
+ "task": "natural_language_inference",
126
+ "acc": 0.3204,
127
+ "acc_norm": 0.3466
128
+ },
129
+ {
130
+ "name": "snli_tr",
131
+ "task": "natural_language_inference",
132
+ "acc": 0.3236,
133
+ "acc_norm": 0.3272
134
+ },
135
+ {
136
+ "name": "xnli_tr",
137
+ "task": "natural_language_inference",
138
+ "acc": 0.41847389558232934,
139
+ "acc_norm": 0.41847389558232934
140
+ },
141
+ {
142
+ "name": "news_cat",
143
+ "task": "text_classification",
144
+ "acc": 0.548,
145
+ "acc_norm": 0.336
146
+ },
147
+ {
148
+ "name": "mkqa_tr",
149
+ "task": "extractive_question_answering",
150
+ "exact_match": 0.013909440662918023,
151
+ "f1": 0.05323007126210795
152
+ },
153
+ {
154
+ "name": "ironytr",
155
+ "task": "text_classification",
156
+ "acc": 0.5733333333333334,
157
+ "acc_norm": 0.54
158
+ },
159
+ {
160
+ "name": "exams_tr",
161
+ "task": "multiple_choice",
162
+ "acc": 0.2951653944020356,
163
+ "acc_norm": 0.356234096692112
164
+ },
165
+ {
166
+ "name": "belebele_tr",
167
+ "task": "multiple_choice",
168
+ "acc": 0.7388888888888889,
169
+ "acc_norm": 0.7388888888888889
170
+ }
171
+ ]
172
+ }
results/zero-shot/aya-23-35B.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "dtype": "auto",
4
+ "parallelize": "True",
5
+ "device_map": "balanced",
6
+ "model": "CohereForAI/aya-23-35B",
7
+ "api": "hf",
8
+ "architecture": "CohereForCausalLM",
9
+ "dtype": "float16",
10
+ "type": "instruction-tuned",
11
+ "num_parameters": "35b"
12
+ },
13
+ "results": [
14
+ {
15
+ "name": "xquad_tr",
16
+ "task": "extractive_question_answering",
17
+ "exact_match": 0.3092436974789916,
18
+ "f1": 0.4925851410138433
19
+ },
20
+ {
21
+ "name": "xcopa_tr",
22
+ "task": "multiple_choice",
23
+ "acc": 0.604,
24
+ "acc_norm": 0.604
25
+ },
26
+ {
27
+ "name": "turkish_plu",
28
+ "task": "multiple_choice",
29
+ "acc": 0.48832,
30
+ "acc_norm": 0.51744
31
+ },
32
+ {
33
+ "name": "turkish_plu_goal_inference",
34
+ "task": "multiple_choice",
35
+ "acc": 0.43010752688172044,
36
+ "acc_norm": 0.40860215053763443
37
+ },
38
+ {
39
+ "name": "turkish_plu_next_event_prediction",
40
+ "task": "multiple_choice",
41
+ "acc": 0.5206106870229008,
42
+ "acc_norm": 0.5908396946564886
43
+ },
44
+ {
45
+ "name": "turkish_plu_step_inference",
46
+ "task": "multiple_choice",
47
+ "acc": 0.35130718954248363,
48
+ "acc_norm": 0.4542483660130719
49
+ },
50
+ {
51
+ "name": "turkish_plu_step_ordering",
52
+ "task": "multiple_choice",
53
+ "acc": 0.5974534769833496,
54
+ "acc_norm": 0.5974534769833496
55
+ },
56
+ {
57
+ "name": "check_worthiness",
58
+ "task": "multiple_choice",
59
+ "acc": 0.37614259597806216,
60
+ "acc_norm": 0.37614259597806216
61
+ },
62
+ {
63
+ "name": "relevance_judgment",
64
+ "task": "multiple_choice",
65
+ "acc": 0.46983546617915906,
66
+ "acc_norm": 0.5553016453382084
67
+ },
68
+ {
69
+ "name": "tquad",
70
+ "task": "extractive_question_answering",
71
+ "exact_match": 0.2062780269058296,
72
+ "f1": 0.4775440049958143
73
+ },
74
+ {
75
+ "name": "sts_tr",
76
+ "task": "text_classification",
77
+ "acc": 0.25380710659898476,
78
+ "acc_norm": 0.11965192168237854
79
+ },
80
+ {
81
+ "name": "offenseval_tr",
82
+ "task": "text_classification",
83
+ "acc": 0.2100340136054422,
84
+ "acc_norm": 0.23922902494331066
85
+ },
86
+ {
87
+ "name": "mnli_tr",
88
+ "task": "natural_language_inference",
89
+ "acc": 0.2957,
90
+ "acc_norm": 0.3475
91
+ },
92
+ {
93
+ "name": "snli_tr",
94
+ "task": "natural_language_inference",
95
+ "acc": 0.2881,
96
+ "acc_norm": 0.3364
97
+ },
98
+ {
99
+ "name": "xnli_tr",
100
+ "task": "natural_language_inference",
101
+ "acc": 0.5060240963855421,
102
+ "acc_norm": 0.5060240963855421
103
+ },
104
+ {
105
+ "name": "news_cat",
106
+ "task": "text_classification",
107
+ "acc": 0.556,
108
+ "acc_norm": 0.356
109
+ },
110
+ {
111
+ "name": "mkqa_tr",
112
+ "task": "extractive_question_answering",
113
+ "exact_match": 0.19428825096182303,
114
+ "f1": 0.2786006074753464
115
+ },
116
+ {
117
+ "name": "ironytr",
118
+ "task": "text_classification",
119
+ "acc": 0.5016666666666667,
120
+ "acc_norm": 0.49166666666666664
121
+ },
122
+ {
123
+ "name": "exams_tr",
124
+ "task": "multiple_choice",
125
+ "acc": 0.29770992366412213,
126
+ "acc_norm": 0.3231552162849873
127
+ },
128
+ {
129
+ "name": "belebele_tr",
130
+ "task": "multiple_choice",
131
+ "acc": 0.7288888888888889,
132
+ "acc_norm": 0.7288888888888889
133
+ },
134
+ {
135
+ "name": "gecturk_generation",
136
+ "task": "grammatical_error_correction",
137
+ "exact_match": 0.009437141894169195
138
+ },
139
+ {
140
+ "name": "xlsum_tr",
141
+ "task": "summarization",
142
+ "rouge1": 0.26107330055626987,
143
+ "rouge2": 0.13324102083895656,
144
+ "rougeL": 0.214838816984684
145
+ },
146
+ {
147
+ "name": "wmt-tr-en-prompt",
148
+ "task": "machine_translation",
149
+ "wer": 0.7343050156418351,
150
+ "bleu": 0.18474168394967388
151
+ },
152
+ {
153
+ "name": "wiki_lingua_tr",
154
+ "task": "summarization",
155
+ "rouge1": 0.3521005179613347,
156
+ "rouge2": 0.1839412116950937,
157
+ "rougeL": 0.3093989984717051
158
+ },
159
+ {
160
+ "name": "tr-wikihow-summ",
161
+ "task": "summarization",
162
+ "rouge1": 0.2767758420029493,
163
+ "rouge2": 0.1271603930418029,
164
+ "rougeL": 0.23279989970428439
165
+ },
166
+ {
167
+ "name": "mlsum_tr",
168
+ "task": "summarization",
169
+ "rouge1": 0.3920969453077054,
170
+ "rouge2": 0.25937196554017156,
171
+ "rougeL": 0.33144850765201345
172
+ }
173
+ ]
174
+ }
results/zero-shot/aya-expanse-32b.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "parallelize": "True",
4
+ "device_map": "balanced",
5
+ "model": "CohereForAI/aya-expanse-32b",
6
+ "api": "hf",
7
+ "architecture": "CohereForCausalLM",
8
+ "dtype": "float16",
9
+ "type": "instruction-tuned",
10
+ "num_parameters": "32b"
11
+ },
12
+ "results": [
13
+ {
14
+ "name": "xquad_tr",
15
+ "task": "extractive_question_answering",
16
+ "exact_match": 0.319327731092437,
17
+ "f1": 0.5392716511089678
18
+ },
19
+ {
20
+ "name": "xcopa_tr",
21
+ "task": "multiple_choice",
22
+ "acc": 0.592,
23
+ "acc_norm": 0.592
24
+ },
25
+ {
26
+ "name": "turkish_plu",
27
+ "task": "multiple_choice",
28
+ "acc": 0.51808,
29
+ "acc_norm": 0.55616
30
+ },
31
+ {
32
+ "name": "turkish_plu_goal_inference",
33
+ "task": "multiple_choice",
34
+ "acc": 0.44802867383512546,
35
+ "acc_norm": 0.45639187574671447
36
+ },
37
+ {
38
+ "name": "turkish_plu_next_event_prediction",
39
+ "task": "multiple_choice",
40
+ "acc": 0.5511450381679389,
41
+ "acc_norm": 0.6106870229007634
42
+ },
43
+ {
44
+ "name": "turkish_plu_step_inference",
45
+ "task": "multiple_choice",
46
+ "acc": 0.39215686274509803,
47
+ "acc_norm": 0.511437908496732
48
+ },
49
+ {
50
+ "name": "turkish_plu_step_ordering",
51
+ "task": "multiple_choice",
52
+ "acc": 0.6297747306562194,
53
+ "acc_norm": 0.6297747306562194
54
+ },
55
+ {
56
+ "name": "check_worthiness",
57
+ "task": "multiple_choice",
58
+ "acc": 0.37751371115173676,
59
+ "acc_norm": 0.3793418647166362
60
+ },
61
+ {
62
+ "name": "relevance_judgment",
63
+ "task": "multiple_choice",
64
+ "acc": 0.6937842778793418,
65
+ "acc_norm": 0.7408592321755028
66
+ },
67
+ {
68
+ "name": "tquad",
69
+ "task": "extractive_question_answering",
70
+ "exact_match": 0.3015695067264574,
71
+ "f1": 0.5825292681833019
72
+ },
73
+ {
74
+ "name": "sts_tr",
75
+ "task": "text_classification",
76
+ "acc": 0.21464829586656997,
77
+ "acc_norm": 0.22987672226250908
78
+ },
79
+ {
80
+ "name": "offenseval_tr",
81
+ "task": "text_classification",
82
+ "acc": 0.6706349206349206,
83
+ "acc_norm": 0.7936507936507936
84
+ },
85
+ {
86
+ "name": "mnli_tr",
87
+ "task": "natural_language_inference",
88
+ "acc": 0.2444,
89
+ "acc_norm": 0.3458
90
+ },
91
+ {
92
+ "name": "snli_tr",
93
+ "task": "natural_language_inference",
94
+ "acc": 0.1896,
95
+ "acc_norm": 0.3355
96
+ },
97
+ {
98
+ "name": "xnli_tr",
99
+ "task": "natural_language_inference",
100
+ "acc": 0.5056224899598394,
101
+ "acc_norm": 0.5056224899598394
102
+ },
103
+ {
104
+ "name": "news_cat",
105
+ "task": "text_classification",
106
+ "acc": 0.828,
107
+ "acc_norm": 0.68
108
+ },
109
+ {
110
+ "name": "mkqa_tr",
111
+ "task": "extractive_question_answering",
112
+ "exact_match": 0.16424977804084048,
113
+ "f1": 0.25720974268367947
114
+ },
115
+ {
116
+ "name": "ironytr",
117
+ "task": "text_classification",
118
+ "acc": 0.5,
119
+ "acc_norm": 0.5566666666666666
120
+ },
121
+ {
122
+ "name": "exams_tr",
123
+ "task": "multiple_choice",
124
+ "acc": 0.36895674300254455,
125
+ "acc_norm": 0.39185750636132316
126
+ },
127
+ {
128
+ "name": "belebele_tr",
129
+ "task": "multiple_choice",
130
+ "acc": 0.8344444444444444,
131
+ "acc_norm": 0.8344444444444444
132
+ },
133
+ {
134
+ "name": "xlsum_tr",
135
+ "task": "summarization",
136
+ "rouge1": 0.36837282882318917,
137
+ "rouge2": 0.2144125271579892,
138
+ "rougeL": 0.3114288520291558
139
+ },
140
+ {
141
+ "name": "wmt-tr-en-prompt",
142
+ "task": "machine_translation",
143
+ "wer": 0.721465283605015,
144
+ "bleu": 0.2010197464685068
145
+ },
146
+ {
147
+ "name": "wiki_lingua_tr",
148
+ "task": "summarization",
149
+ "rouge1": 0.3958195144552331,
150
+ "rouge2": 0.2145940709808375,
151
+ "rougeL": 0.34455596224977914
152
+ },
153
+ {
154
+ "name": "tr-wikihow-summ",
155
+ "task": "summarization",
156
+ "rouge1": 0.34294866079774666,
157
+ "rouge2": 0.16631660541703744,
158
+ "rougeL": 0.2853574006828194
159
+ },
160
+ {
161
+ "name": "mlsum_tr",
162
+ "task": "summarization",
163
+ "rouge1": 0.43617843344099383,
164
+ "rouge2": 0.301267876751885,
165
+ "rougeL": 0.3690068926127347
166
+ },
167
+ {
168
+ "name": "gecturk_generation",
169
+ "task": "grammatical_error_correction",
170
+ "exact_match": 0
171
+ }
172
+ ]
173
+ }