Upload 18 files
Browse filesAdd new models to leaderboard for zero-shot experiments
- results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json +173 -0
- results/zero-shot/Llama-3.3-70B-Instruct.json +168 -0
- results/zero-shot/Ministral-8B-Instruct.json +172 -0
- results/zero-shot/Mistral-7B-Instruct-v0.3.json +172 -0
- results/zero-shot/Mistral-7B-v0.3.json +172 -0
- results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json +174 -0
- results/zero-shot/Qwen2.5-0.5B-Instruct.json +172 -0
- results/zero-shot/Qwen2.5-0.5B.json +172 -0
- results/zero-shot/Qwen2.5-1.5B-Instruct.json +172 -0
- results/zero-shot/Qwen2.5-1.5B.json +172 -0
- results/zero-shot/Qwen2.5-14B-Instruct.json +174 -0
- results/zero-shot/Qwen2.5-14B.json +174 -0
- results/zero-shot/Qwen2.5-3B-Instruct.json +172 -0
- results/zero-shot/Qwen2.5-3B.json +172 -0
- results/zero-shot/Qwen2.5-7B-Instruct.json +172 -0
- results/zero-shot/Qwen2.5-7B.json +172 -0
- results/zero-shot/aya-23-35B.json +174 -0
- results/zero-shot/aya-expanse-32b.json +173 -0
results/zero-shot/CerebrumTech__cere-llama-3-8b-tr.json
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"dtype": "auto",
|
4 |
+
"parallelize": "True",
|
5 |
+
"device_map": "balanced",
|
6 |
+
"model": "CerebrumTech/cere-llama-3-8b-tr",
|
7 |
+
"api": "hf",
|
8 |
+
"architecture": "LlamaForCausalLM",
|
9 |
+
"type": "instruction-tuned",
|
10 |
+
"num_parameters": "8b"
|
11 |
+
},
|
12 |
+
"results": [
|
13 |
+
{
|
14 |
+
"name": "xlsum_tr",
|
15 |
+
"task": "summarization",
|
16 |
+
"rouge1": 0.0148042679964553,
|
17 |
+
"rouge2": 0.006337012269480576,
|
18 |
+
"rougeL": 0.011798434065329946
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"name": "wmt-tr-en-prompt",
|
22 |
+
"task": "machine_translation",
|
23 |
+
"wer": 0.9177312951556903,
|
24 |
+
"bleu": 0.0010336244771491927
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"name": "wiki_lingua_tr",
|
28 |
+
"task": "summarization",
|
29 |
+
"rouge1": 0.07039446024412022,
|
30 |
+
"rouge2": 0.02125810875804613,
|
31 |
+
"rougeL": 0.05181069185122056
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"name": "tr-wikihow-summ",
|
35 |
+
"task": "summarization",
|
36 |
+
"rouge1": 0.17657152314685107,
|
37 |
+
"rouge2": 0.05191014365298107,
|
38 |
+
"rougeL": 0.1243539526593285
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"name": "mlsum_tr",
|
42 |
+
"task": "summarization",
|
43 |
+
"rouge1": 0.09056852256508315,
|
44 |
+
"rouge2": 0.05971047138214301,
|
45 |
+
"rougeL": 0.07758457056947823
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"name": "gecturk_generation",
|
49 |
+
"task": "grammatical_error_correction",
|
50 |
+
"exact_match": 0.013385333911117531
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"name": "xquad_tr",
|
54 |
+
"task": "extractive_question_answering",
|
55 |
+
"exact_match": 0.21176470588235294,
|
56 |
+
"f1": 0.4427003624698854
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"name": "xcopa_tr",
|
60 |
+
"task": "multiple_choice",
|
61 |
+
"acc": 0.602,
|
62 |
+
"acc_norm": 0.602
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"name": "turkish_plu",
|
66 |
+
"task": "multiple_choice",
|
67 |
+
"acc": 0.48672,
|
68 |
+
"acc_norm": 0.53664
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"name": "turkish_plu_goal_inference",
|
72 |
+
"task": "multiple_choice",
|
73 |
+
"acc": 0.41816009557945044,
|
74 |
+
"acc_norm": 0.42771804062126645
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"name": "turkish_plu_next_event_prediction",
|
78 |
+
"task": "multiple_choice",
|
79 |
+
"acc": 0.4687022900763359,
|
80 |
+
"acc_norm": 0.5572519083969466
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"name": "turkish_plu_step_inference",
|
84 |
+
"task": "multiple_choice",
|
85 |
+
"acc": 0.35947712418300654,
|
86 |
+
"acc_norm": 0.5065359477124183
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"name": "turkish_plu_step_ordering",
|
90 |
+
"task": "multiple_choice",
|
91 |
+
"acc": 0.6307541625857003,
|
92 |
+
"acc_norm": 0.6307541625857003
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"name": "check_worthiness",
|
96 |
+
"task": "multiple_choice",
|
97 |
+
"acc": 0.3756855575868373,
|
98 |
+
"acc_norm": 0.37705667276051186
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"name": "relevance_judgment",
|
102 |
+
"task": "multiple_choice",
|
103 |
+
"acc": 0.4725776965265082,
|
104 |
+
"acc_norm": 0.5425045703839122
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"name": "tquad",
|
108 |
+
"task": "extractive_question_answering",
|
109 |
+
"exact_match": 0.492152466367713,
|
110 |
+
"f1": 0.7031663569609045
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"name": "sts_tr",
|
114 |
+
"task": "text_classification",
|
115 |
+
"acc": 0.22117476432197244,
|
116 |
+
"acc_norm": 0.20087019579405366
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"name": "offenseval_tr",
|
120 |
+
"task": "text_classification",
|
121 |
+
"acc": 0.3401360544217687,
|
122 |
+
"acc_norm": 0.6964285714285714
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"name": "mnli_tr",
|
126 |
+
"task": "natural_language_inference",
|
127 |
+
"acc": 0.3208,
|
128 |
+
"acc_norm": 0.3151
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"name": "snli_tr",
|
132 |
+
"task": "natural_language_inference",
|
133 |
+
"acc": 0.3238,
|
134 |
+
"acc_norm": 0.3203
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"name": "xnli_tr",
|
138 |
+
"task": "natural_language_inference",
|
139 |
+
"acc": 0.3339321357285429,
|
140 |
+
"acc_norm": 0.32934131736526945
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"name": "news_cat",
|
144 |
+
"task": "text_classification",
|
145 |
+
"acc": 0.684,
|
146 |
+
"acc_norm": 0.656
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"name": "mkqa_tr",
|
150 |
+
"task": "extractive_question_answering",
|
151 |
+
"exact_match": 0.02219591595146493,
|
152 |
+
"f1": 0.08533792503078427
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"name": "ironytr",
|
156 |
+
"task": "text_classification",
|
157 |
+
"acc": 0.5016666666666667,
|
158 |
+
"acc_norm": 0.5483333333333333
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"name": "exams_tr",
|
162 |
+
"task": "multiple_choice",
|
163 |
+
"acc": 0.27989821882951654,
|
164 |
+
"acc_norm": 0.3231552162849873
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"name": "belebele_tr",
|
168 |
+
"task": "multiple_choice",
|
169 |
+
"acc": 0.5144444444444445,
|
170 |
+
"acc_norm": 0.5144444444444445
|
171 |
+
}
|
172 |
+
]
|
173 |
+
}
|
results/zero-shot/Llama-3.3-70B-Instruct.json
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"pretrained": "meta-llama/Llama-3.3-70B-Instruct",
|
4 |
+
"dtype": "bfloat16",
|
5 |
+
"parallelize": "True",
|
6 |
+
"device_map": "balanced",
|
7 |
+
"model": "meta-llama/Llama-3.3-70B-Instruct",
|
8 |
+
"api": "hf",
|
9 |
+
"max_length": "131072",
|
10 |
+
"type": "instruction-tuned",
|
11 |
+
"num_parameters": "70b",
|
12 |
+
"architecture": "LlamaForCausalLM"
|
13 |
+
},
|
14 |
+
"results": [
|
15 |
+
{
|
16 |
+
"name": "xquad_tr",
|
17 |
+
"task": "extractive_question_answering",
|
18 |
+
"exact_match": 0.1453781512605042,
|
19 |
+
"f1": 0.4189643669994899
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"name": "xcopa_tr",
|
23 |
+
"task": "multiple_choice",
|
24 |
+
"acc": 0.65,
|
25 |
+
"acc_norm": 0.65
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"name": "turkish_plu",
|
29 |
+
"task": "multiple_choice",
|
30 |
+
"acc": 0.5424,
|
31 |
+
"acc_norm": 0.58528
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"name": "turkish_plu_goal_inference",
|
35 |
+
"task": "multiple_choice",
|
36 |
+
"acc": 0.4910394265232975,
|
37 |
+
"acc_norm": 0.5197132616487455
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "turkish_plu_next_event_prediction",
|
41 |
+
"task": "multiple_choice",
|
42 |
+
"acc": 0.5801526717557252,
|
43 |
+
"acc_norm": 0.6305343511450382
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"name": "turkish_plu_step_inference",
|
47 |
+
"task": "multiple_choice",
|
48 |
+
"acc": 0.39052287581699346,
|
49 |
+
"acc_norm": 0.5163398692810458
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "turkish_plu_step_ordering",
|
53 |
+
"task": "multiple_choice",
|
54 |
+
"acc": 0.6513222331047992,
|
55 |
+
"acc_norm": 0.6513222331047992
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "check_worthiness",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.45521023765996343,
|
61 |
+
"acc_norm": 0.5018281535648994
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "relevance_judgment",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.42230347349177333,
|
67 |
+
"acc_norm": 0.42230347349177333
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "tquad",
|
71 |
+
"task": "extractive_question_answering",
|
72 |
+
"exact_match": 0.17376681614349773,
|
73 |
+
"f1": 0.5089287967649171
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "sts_tr",
|
77 |
+
"task": "text_classification",
|
78 |
+
"acc": 0.12907904278462654,
|
79 |
+
"acc_norm": 0.1406816533720087
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "offenseval_tr",
|
83 |
+
"task": "text_classification",
|
84 |
+
"acc": 0.8313492063492064,
|
85 |
+
"acc_norm": 0.8214285714285714
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "mnli_tr",
|
89 |
+
"task": "natural_language_inference",
|
90 |
+
"acc": 0.348,
|
91 |
+
"acc_norm": 0.3479
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "snli_tr",
|
95 |
+
"task": "natural_language_inference",
|
96 |
+
"acc": 0.3381,
|
97 |
+
"acc_norm": 0.337
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "xnli_tr",
|
101 |
+
"task": "natural_language_inference",
|
102 |
+
"acc": 0.46947791164658637,
|
103 |
+
"acc_norm": 0.46947791164658637
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "news_cat",
|
107 |
+
"task": "text_classification",
|
108 |
+
"acc": 0.78,
|
109 |
+
"acc_norm": 0.56
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "mkqa_tr",
|
113 |
+
"task": "extractive_question_answering",
|
114 |
+
"exact_match": 0.1633619414027819,
|
115 |
+
"f1": 0.24259971658697452
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "ironytr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.5816666666666667,
|
121 |
+
"acc_norm": 0.6366666666666667
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "exams_tr",
|
125 |
+
"task": "multiple_choice",
|
126 |
+
"acc": 0.39185750636132316,
|
127 |
+
"acc_norm": 0.4071246819338422
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "belebele_tr",
|
131 |
+
"task": "multiple_choice",
|
132 |
+
"acc": 0.8677777777777778,
|
133 |
+
"acc_norm": 0.8677777777777778
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "wmt-tr-en-prompt",
|
137 |
+
"task": "machine_translation",
|
138 |
+
"wer": 1.0598921957580294,
|
139 |
+
"bleu": 0.1362810237287205
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "wiki_lingua_tr",
|
143 |
+
"task": "summarization",
|
144 |
+
"rouge1": 0.2814281701473272,
|
145 |
+
"rouge2": 0.11996704827558094,
|
146 |
+
"rougeL": 0.22703795465582283
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"name": "xlsum_tr",
|
150 |
+
"task": "summarization",
|
151 |
+
"rouge1": 0.32304395528585916,
|
152 |
+
"rouge2": 0.16251841619434318,
|
153 |
+
"rougeL": 0.25187368390587817
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"name": "tr-wikihow-summ",
|
157 |
+
"task": "summarization",
|
158 |
+
"rouge1": 0.23052711078813495,
|
159 |
+
"rouge2": 0.08492969364417007,
|
160 |
+
"rougeL": 0.16696416806934444
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"name": "gecturk_generation",
|
164 |
+
"task": "grammatical_error_correction",
|
165 |
+
"exact_match": 0.0052482064615532766
|
166 |
+
}
|
167 |
+
]
|
168 |
+
}
|
results/zero-shot/Ministral-8B-Instruct.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "mistralai/Ministral-8B-Instruct-2410",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "MistralForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "32768",
|
8 |
+
"type": "instruction-tuned",
|
9 |
+
"num_parameters": "8b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xquad_tr",
|
14 |
+
"task": "extractive_question_answering",
|
15 |
+
"exact_match": 0.2285714285714286,
|
16 |
+
"f1": 0.4361183332526061
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"name": "xcopa_tr",
|
20 |
+
"task": "multiple_choice",
|
21 |
+
"acc": 0.574,
|
22 |
+
"acc_norm": 0.574
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"name": "turkish_plu",
|
26 |
+
"task": "multiple_choice",
|
27 |
+
"acc": 0.45344,
|
28 |
+
"acc_norm": 0.50752
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "turkish_plu_goal_inference",
|
32 |
+
"task": "multiple_choice",
|
33 |
+
"acc": 0.3751493428912784,
|
34 |
+
"acc_norm": 0.4050179211469534
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"name": "turkish_plu_next_event_prediction",
|
38 |
+
"task": "multiple_choice",
|
39 |
+
"acc": 0.44122137404580153,
|
40 |
+
"acc_norm": 0.5267175572519084
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"name": "turkish_plu_step_inference",
|
44 |
+
"task": "multiple_choice",
|
45 |
+
"acc": 0.31862745098039214,
|
46 |
+
"acc_norm": 0.4624183006535948
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"name": "turkish_plu_step_ordering",
|
50 |
+
"task": "multiple_choice",
|
51 |
+
"acc": 0.6062683643486778,
|
52 |
+
"acc_norm": 0.6062683643486778
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"name": "check_worthiness",
|
56 |
+
"task": "multiple_choice",
|
57 |
+
"acc": 0.3756855575868373,
|
58 |
+
"acc_norm": 0.39716636197440586
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"name": "relevance_judgment",
|
62 |
+
"task": "multiple_choice",
|
63 |
+
"acc": 0.4218464351005484,
|
64 |
+
"acc_norm": 0.42321755027422303
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"name": "tquad",
|
68 |
+
"task": "extractive_question_answering",
|
69 |
+
"exact_match": 0.17600896860986548,
|
70 |
+
"f1": 0.476865826654479
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"name": "sts_tr",
|
74 |
+
"task": "text_classification",
|
75 |
+
"acc": 0.2139231327048586,
|
76 |
+
"acc_norm": 0.2189992748368383
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"name": "offenseval_tr",
|
80 |
+
"task": "text_classification",
|
81 |
+
"acc": 0.20294784580498867,
|
82 |
+
"acc_norm": 0.2032312925170068
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"name": "mnli_tr",
|
86 |
+
"task": "natural_language_inference",
|
87 |
+
"acc": 0.3215,
|
88 |
+
"acc_norm": 0.3297
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"name": "snli_tr",
|
92 |
+
"task": "natural_language_inference",
|
93 |
+
"acc": 0.3233,
|
94 |
+
"acc_norm": 0.247
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"name": "xnli_tr",
|
98 |
+
"task": "natural_language_inference",
|
99 |
+
"acc": 0.43815261044176707,
|
100 |
+
"acc_norm": 0.43815261044176707
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"name": "news_cat",
|
104 |
+
"task": "text_classification",
|
105 |
+
"acc": 0.604,
|
106 |
+
"acc_norm": 0.54
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"name": "mkqa_tr",
|
110 |
+
"task": "extractive_question_answering",
|
111 |
+
"exact_match": 0.021899970405445397,
|
112 |
+
"f1": 0.0845409956587106
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"name": "ironytr",
|
116 |
+
"task": "text_classification",
|
117 |
+
"acc": 0.5,
|
118 |
+
"acc_norm": 0.5
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"name": "exams_tr",
|
122 |
+
"task": "multiple_choice",
|
123 |
+
"acc": 0.31297709923664124,
|
124 |
+
"acc_norm": 0.3104325699745547
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"name": "belebele_tr",
|
128 |
+
"task": "multiple_choice",
|
129 |
+
"acc": 0.6088888888888889,
|
130 |
+
"acc_norm": 0.6088888888888889
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"name": "xlsum_tr",
|
134 |
+
"task": "summarization",
|
135 |
+
"rouge1": 0.2855519135003678,
|
136 |
+
"rouge2": 0.13803314536720374,
|
137 |
+
"rougeL": 0.23701549520837864
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"name": "wmt-tr-en-prompt",
|
141 |
+
"task": "machine_translation",
|
142 |
+
"wer": 0.8606226070325712,
|
143 |
+
"bleu": 0.1123514947775922
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"name": "wiki_lingua_tr",
|
147 |
+
"task": "summarization",
|
148 |
+
"rouge1": 0.2056155296354833,
|
149 |
+
"rouge2": 0.0689798652163523,
|
150 |
+
"rougeL": 0.15747045045694055
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"name": "tr-wikihow-summ",
|
154 |
+
"task": "summarization",
|
155 |
+
"rouge1": 0.21626530316392872,
|
156 |
+
"rouge2": 0.06686924197911567,
|
157 |
+
"rougeL": 0.1590961585989622
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "mlsum_tr",
|
161 |
+
"task": "summarization",
|
162 |
+
"rouge1": 0.37278237032977407,
|
163 |
+
"rouge2": 0.2397900252623464,
|
164 |
+
"rougeL": 0.31375168224626315
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"name": "gecturk_generation",
|
168 |
+
"task": "grammatical_error_correction",
|
169 |
+
"exact_match": 0.009003803745967548
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Mistral-7B-Instruct-v0.3.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "mistralai/Mistral-7B-Instruct-v0.3",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "MistralForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "32768",
|
8 |
+
"type": "instruction-tuned",
|
9 |
+
"num_parameters": "7b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xquad_tr",
|
14 |
+
"task": "extractive_question_answering",
|
15 |
+
"exact_match": 0.1142857142857143,
|
16 |
+
"f1": 0.3231327219793003
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"name": "xcopa_tr",
|
20 |
+
"task": "multiple_choice",
|
21 |
+
"acc": 0.572,
|
22 |
+
"acc_norm": 0.572
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"name": "turkish_plu",
|
26 |
+
"task": "multiple_choice",
|
27 |
+
"acc": 0.42976,
|
28 |
+
"acc_norm": 0.48
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "turkish_plu_goal_inference",
|
32 |
+
"task": "multiple_choice",
|
33 |
+
"acc": 0.4121863799283154,
|
34 |
+
"acc_norm": 0.43966547192353644
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"name": "turkish_plu_next_event_prediction",
|
38 |
+
"task": "multiple_choice",
|
39 |
+
"acc": 0.40916030534351144,
|
40 |
+
"acc_norm": 0.47480916030534354
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"name": "turkish_plu_step_inference",
|
44 |
+
"task": "multiple_choice",
|
45 |
+
"acc": 0.2696078431372549,
|
46 |
+
"acc_norm": 0.41830065359477125
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"name": "turkish_plu_step_ordering",
|
50 |
+
"task": "multiple_choice",
|
51 |
+
"acc": 0.5533790401567091,
|
52 |
+
"acc_norm": 0.5533790401567091
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"name": "check_worthiness",
|
56 |
+
"task": "multiple_choice",
|
57 |
+
"acc": 0.37751371115173676,
|
58 |
+
"acc_norm": 0.3788848263254113
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"name": "relevance_judgment",
|
62 |
+
"task": "multiple_choice",
|
63 |
+
"acc": 0.4547531992687386,
|
64 |
+
"acc_norm": 0.5342778793418648
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"name": "tquad",
|
68 |
+
"task": "extractive_question_answering",
|
69 |
+
"exact_match": 0.0952914798206278,
|
70 |
+
"f1": 0.4079551297911521
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"name": "sts_tr",
|
74 |
+
"task": "text_classification",
|
75 |
+
"acc": 0.12907904278462654,
|
76 |
+
"acc_norm": 0.17041334300217548
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"name": "offenseval_tr",
|
80 |
+
"task": "text_classification",
|
81 |
+
"acc": 0.45209750566893425,
|
82 |
+
"acc_norm": 0.7831632653061225
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"name": "mnli_tr",
|
86 |
+
"task": "natural_language_inference",
|
87 |
+
"acc": 0.3,
|
88 |
+
"acc_norm": 0.3128
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"name": "snli_tr",
|
92 |
+
"task": "natural_language_inference",
|
93 |
+
"acc": 0.3227,
|
94 |
+
"acc_norm": 0.323
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"name": "xnli_tr",
|
98 |
+
"task": "natural_language_inference",
|
99 |
+
"acc": 0.42650602409638555,
|
100 |
+
"acc_norm": 0.42650602409638555
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"name": "news_cat",
|
104 |
+
"task": "text_classification",
|
105 |
+
"acc": 0.612,
|
106 |
+
"acc_norm": 0.452
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"name": "mkqa_tr",
|
110 |
+
"task": "extractive_question_answering",
|
111 |
+
"exact_match": 0.050162770050310744,
|
112 |
+
"f1": 0.1016139540064362
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"name": "ironytr",
|
116 |
+
"task": "text_classification",
|
117 |
+
"acc": 0.5066666666666667,
|
118 |
+
"acc_norm": 0.5983333333333334
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"name": "exams_tr",
|
122 |
+
"task": "multiple_choice",
|
123 |
+
"acc": 0.2544529262086514,
|
124 |
+
"acc_norm": 0.3231552162849873
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"name": "belebele_tr",
|
128 |
+
"task": "multiple_choice",
|
129 |
+
"acc": 0.46111111111111114,
|
130 |
+
"acc_norm": 0.46111111111111114
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"name": "xlsum_tr",
|
134 |
+
"task": "summarization",
|
135 |
+
"rouge1": 0.25708723026741176,
|
136 |
+
"rouge2": 0.10899686780471457,
|
137 |
+
"rougeL": 0.2008610051989006
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"name": "wmt-tr-en-prompt",
|
141 |
+
"task": "machine_translation",
|
142 |
+
"wer": 0.9249121578209252,
|
143 |
+
"bleu": 0.05933138324020342
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"name": "wiki_lingua_tr",
|
147 |
+
"task": "summarization",
|
148 |
+
"rouge1": 0.19938444086541007,
|
149 |
+
"rouge2": 0.06226393773142071,
|
150 |
+
"rougeL": 0.1489360974379546
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"name": "tr-wikihow-summ",
|
154 |
+
"task": "summarization",
|
155 |
+
"rouge1": 0.1147060386471999,
|
156 |
+
"rouge2": 0.036767323776569986,
|
157 |
+
"rougeL": 0.08789125135731646
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "mlsum_tr",
|
161 |
+
"task": "summarization",
|
162 |
+
"rouge1": 0.370968688296404,
|
163 |
+
"rouge2": 0.24117168042677828,
|
164 |
+
"rougeL": 0.3151307659048477
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"name": "gecturk_generation",
|
168 |
+
"task": "grammatical_error_correction",
|
169 |
+
"exact_match": 0.009677885309836777
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Mistral-7B-v0.3.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "mistralai/Mistral-7B-v0.3",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "MistralForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "32768",
|
8 |
+
"type": "pretrained",
|
9 |
+
"num_parameters": "7b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.149790294242554,
|
16 |
+
"rouge2": 0.06308205974922562,
|
17 |
+
"rougeL": 0.1209340855634673
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 1.164558847576675,
|
23 |
+
"bleu": 0.03750412480486115
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.18942128157251567,
|
29 |
+
"rouge2": 0.05970997875583092,
|
30 |
+
"rougeL": 0.14635645315684281
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.05788766682352111,
|
36 |
+
"rouge2": 0.01861117631687374,
|
37 |
+
"rougeL": 0.044379969364608036
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.26517424879332546,
|
43 |
+
"rouge2": 0.17304768736710063,
|
44 |
+
"rougeL": 0.230212437287503
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.23511001974096007
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.17058823529411765,
|
55 |
+
"f1": 0.3376787884560269
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.584,
|
61 |
+
"acc_norm": 0.584
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.4336,
|
67 |
+
"acc_norm": 0.50048
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.40979689366786143,
|
73 |
+
"acc_norm": 0.45639187574671447
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.38625954198473283,
|
79 |
+
"acc_norm": 0.4946564885496183
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.26633986928104575,
|
85 |
+
"acc_norm": 0.42810457516339867
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.5837414299706171,
|
91 |
+
"acc_norm": 0.5837414299706171
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.37614259597806216,
|
97 |
+
"acc_norm": 0.4789762340036563
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.4218464351005484,
|
103 |
+
"acc_norm": 0.42138939670932357
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.218609865470852,
|
109 |
+
"f1": 0.49261818596816426
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.14213197969543148,
|
115 |
+
"acc_norm": 0.19796954314720813
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.20691609977324263,
|
121 |
+
"acc_norm": 0.45691609977324266
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3233,
|
127 |
+
"acc_norm": 0.3227
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3208,
|
133 |
+
"acc_norm": 0.317
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.41365461847389556,
|
139 |
+
"acc_norm": 0.41365461847389556
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.66,
|
145 |
+
"acc_norm": 0.448
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.11453092630955904,
|
151 |
+
"f1": 0.15435166430563946
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.49833333333333335,
|
157 |
+
"acc_norm": 0.52
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.24173027989821882,
|
163 |
+
"acc_norm": 0.30279898218829515
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.4111111111111111,
|
169 |
+
"acc_norm": 0.4111111111111111
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Mixtral-8x7B-Instruct-v0.1.json
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"dtype": "auto",
|
4 |
+
"parallelize": "True",
|
5 |
+
"device_map": "balanced",
|
6 |
+
"model": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
7 |
+
"api": "hf",
|
8 |
+
"architecture": "MixtralForCausalLM",
|
9 |
+
"dtype": "bfloat16",
|
10 |
+
"type": "instruction-tuned",
|
11 |
+
"num_parameters": "46b"
|
12 |
+
},
|
13 |
+
"results": [
|
14 |
+
{
|
15 |
+
"name": "xquad_tr",
|
16 |
+
"task": "extractive_question_answering",
|
17 |
+
"exact_match": 0.10672268907563025,
|
18 |
+
"f1": 0.31503337329539344
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"name": "xcopa_tr",
|
22 |
+
"task": "multiple_choice",
|
23 |
+
"acc": 0.564,
|
24 |
+
"acc_norm": 0.564
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"name": "turkish_plu",
|
28 |
+
"task": "multiple_choice",
|
29 |
+
"acc": 0.47104,
|
30 |
+
"acc_norm": 0.52544
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "turkish_plu_goal_inference",
|
34 |
+
"task": "multiple_choice",
|
35 |
+
"acc": 0.44563918757467147,
|
36 |
+
"acc_norm": 0.45758661887694146
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"name": "turkish_plu_next_event_prediction",
|
40 |
+
"task": "multiple_choice",
|
41 |
+
"acc": 0.46106870229007635,
|
42 |
+
"acc_norm": 0.5694656488549619
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"name": "turkish_plu_step_inference",
|
46 |
+
"task": "multiple_choice",
|
47 |
+
"acc": 0.31699346405228757,
|
48 |
+
"acc_norm": 0.4624183006535948
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"name": "turkish_plu_step_ordering",
|
52 |
+
"task": "multiple_choice",
|
53 |
+
"acc": 0.5905974534769833,
|
54 |
+
"acc_norm": 0.5905974534769833
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"name": "check_worthiness",
|
58 |
+
"task": "multiple_choice",
|
59 |
+
"acc": 0.37522851919561245,
|
60 |
+
"acc_norm": 0.3711151736745887
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"name": "relevance_judgment",
|
64 |
+
"task": "multiple_choice",
|
65 |
+
"acc": 0.4287020109689214,
|
66 |
+
"acc_norm": 0.4890310786106033
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"name": "tquad",
|
70 |
+
"task": "extractive_question_answering",
|
71 |
+
"exact_match": 0.09753363228699552,
|
72 |
+
"f1": 0.4107003915145391
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "sts_tr",
|
76 |
+
"task": "text_classification",
|
77 |
+
"acc": 0.12980420594633793,
|
78 |
+
"acc_norm": 0.18564176939811458
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "offenseval_tr",
|
82 |
+
"task": "text_classification",
|
83 |
+
"acc": 0.6292517006802721,
|
84 |
+
"acc_norm": 0.7859977324263039
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"name": "mnli_tr",
|
88 |
+
"task": "natural_language_inference",
|
89 |
+
"acc": 0.2757,
|
90 |
+
"acc_norm": 0.3115
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"name": "snli_tr",
|
94 |
+
"task": "natural_language_inference",
|
95 |
+
"acc": 0.3078,
|
96 |
+
"acc_norm": 0.3217
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"name": "xnli_tr",
|
100 |
+
"task": "natural_language_inference",
|
101 |
+
"acc": 0.4506024096385542,
|
102 |
+
"acc_norm": 0.4506024096385542
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "news_cat",
|
106 |
+
"task": "text_classification",
|
107 |
+
"acc": 0.54,
|
108 |
+
"acc_norm": 0.34
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"name": "mkqa_tr",
|
112 |
+
"task": "extractive_question_answering",
|
113 |
+
"exact_match": 0.12518496596626222,
|
114 |
+
"f1": 0.18805772867641507
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"name": "ironytr",
|
118 |
+
"task": "text_classification",
|
119 |
+
"acc": 0.525,
|
120 |
+
"acc_norm": 0.6683333333333333
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"name": "exams_tr",
|
124 |
+
"task": "multiple_choice",
|
125 |
+
"acc": 0.2926208651399491,
|
126 |
+
"acc_norm": 0.3460559796437659
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"name": "belebele_tr",
|
130 |
+
"task": "multiple_choice",
|
131 |
+
"acc": 0.5855555555555556,
|
132 |
+
"acc_norm": 0.5855555555555556
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"name": "xlsum_tr",
|
136 |
+
"task": "summarization",
|
137 |
+
"rouge1": 0.2431491335984058,
|
138 |
+
"rouge2": 0.10574384463000014,
|
139 |
+
"rougeL": 0.19188752602582665
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "wmt-tr-en-prompt",
|
143 |
+
"task": "machine_translation",
|
144 |
+
"wer": 0.873496978572932,
|
145 |
+
"bleu": 0.09482204368236244
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "wiki_lingua_tr",
|
149 |
+
"task": "summarization",
|
150 |
+
"rouge1": 0.1822644758431921,
|
151 |
+
"rouge2": 0.05997572295534047,
|
152 |
+
"rougeL": 0.14060742524010394
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"name": "tr-wikihow-summ",
|
156 |
+
"task": "summarization",
|
157 |
+
"rouge1": 0.1586518753843558,
|
158 |
+
"rouge2": 0.04879306307120871,
|
159 |
+
"rougeL": 0.1191709081457354
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"name": "mlsum_tr",
|
163 |
+
"task": "summarization",
|
164 |
+
"rouge1": 0.34741912725252516,
|
165 |
+
"rouge2": 0.22190156804649477,
|
166 |
+
"rougeL": 0.2915726415448087
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"name": "gecturk_generation",
|
170 |
+
"task": "grammatical_error_correction",
|
171 |
+
"exact_match": 0.036255958399537776
|
172 |
+
}
|
173 |
+
]
|
174 |
+
}
|
results/zero-shot/Qwen2.5-0.5B-Instruct.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-0.5B-Instruct",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "instruction-tuned",
|
9 |
+
"num_parameters": "0.5b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.23863563451736716,
|
16 |
+
"rouge2": 0.09013532339992156,
|
17 |
+
"rougeL": 0.17413374740786924
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 1.2982149372648228,
|
23 |
+
"bleu": 0.03132059223101698
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.17874342388633518,
|
29 |
+
"rouge2": 0.05016075064860983,
|
30 |
+
"rougeL": 0.13196976287126827
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.20346847563598916,
|
36 |
+
"rouge2": 0.057692528559452054,
|
37 |
+
"rougeL": 0.14650500990126503
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.3611886967384703,
|
43 |
+
"rouge2": 0.22895911125049848,
|
44 |
+
"rougeL": 0.2970046611327582
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.0005296355144686793
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.13361344537815126,
|
55 |
+
"f1": 0.24680114628123545
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.536,
|
61 |
+
"acc_norm": 0.536
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.41568,
|
67 |
+
"acc_norm": 0.45696
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.36798088410991636,
|
73 |
+
"acc_norm": 0.36678614097968937
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.35572519083969467,
|
79 |
+
"acc_norm": 0.44122137404580153
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.28104575163398693,
|
85 |
+
"acc_norm": 0.4019607843137255
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.5739471106758081,
|
91 |
+
"acc_norm": 0.5739471106758081
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.3916819012797075,
|
97 |
+
"acc_norm": 0.6229433272394881
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.4218464351005484,
|
103 |
+
"acc_norm": 0.43007312614259596
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.1625560538116592,
|
109 |
+
"f1": 0.3002481362714293
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.1305293691080493,
|
115 |
+
"acc_norm": 0.1986947063089195
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.21428571428571427,
|
121 |
+
"acc_norm": 0.41950113378684806
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3211,
|
127 |
+
"acc_norm": 0.3212
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3239,
|
133 |
+
"acc_norm": 0.3237
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.36626506024096384,
|
139 |
+
"acc_norm": 0.36626506024096384
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.292,
|
145 |
+
"acc_norm": 0.272
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.007102693104468778,
|
151 |
+
"f1": 0.019193813490945396
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.47333333333333333,
|
157 |
+
"acc_norm": 0.49333333333333335
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.2366412213740458,
|
163 |
+
"acc_norm": 0.26208651399491095
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.3,
|
169 |
+
"acc_norm": 0.3
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-0.5B.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-0.5B",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "pretrained",
|
9 |
+
"num_parameters": "0.5b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.24486744839512292,
|
16 |
+
"rouge2": 0.09223336406082884,
|
17 |
+
"rougeL": 0.17919357452932383
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 2.5245853761833037,
|
23 |
+
"bleu": 0.014391606379183295
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.19235726612644397,
|
29 |
+
"rouge2": 0.05618807633412984,
|
30 |
+
"rougeL": 0.138761647221388
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.21150588695760153,
|
36 |
+
"rouge2": 0.060555129464851025,
|
37 |
+
"rougeL": 0.15026722717354687
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.37608183260351713,
|
43 |
+
"rouge2": 0.24317811162325445,
|
44 |
+
"rougeL": 0.3109581125782851
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.008859357696566999
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.040336134453781515,
|
55 |
+
"f1": 0.1415668185953022
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.548,
|
61 |
+
"acc_norm": 0.548
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.40832,
|
67 |
+
"acc_norm": 0.45184
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.36200716845878134,
|
73 |
+
"acc_norm": 0.3536439665471924
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.35725190839694654,
|
79 |
+
"acc_norm": 0.4305343511450382
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.2647058823529412,
|
85 |
+
"acc_norm": 0.4199346405228758
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.56513222331048,
|
91 |
+
"acc_norm": 0.56513222331048
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.620201096892139,
|
97 |
+
"acc_norm": 0.6220292504570384
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.5521023765996343,
|
103 |
+
"acc_norm": 0.5763254113345521
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.07174887892376682,
|
109 |
+
"f1": 0.2361024569228557
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.12907904278462654,
|
115 |
+
"acc_norm": 0.14720812182741116
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.3373015873015873,
|
121 |
+
"acc_norm": 0.7845804988662132
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3208,
|
127 |
+
"acc_norm": 0.3211
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3244,
|
133 |
+
"acc_norm": 0.3237
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.3614457831325301,
|
139 |
+
"acc_norm": 0.3614457831325301
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.268,
|
145 |
+
"acc_norm": 0.232
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.0011837821840781297,
|
151 |
+
"f1": 0.008064623072727376
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.49333333333333335,
|
157 |
+
"acc_norm": 0.505
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.21119592875318066,
|
163 |
+
"acc_norm": 0.26208651399491095
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.29888888888888887,
|
169 |
+
"acc_norm": 0.29888888888888887
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-1.5B-Instruct.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-1.5B-Instruct",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "instruction-tuned",
|
9 |
+
"num_parameters": "1.5b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.2623073705824601,
|
16 |
+
"rouge2": 0.11026827068108053,
|
17 |
+
"rougeL": 0.19910145473422672
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 1.2141147884224404,
|
23 |
+
"bleu": 0.046509658480976955
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.20401876230987648,
|
29 |
+
"rouge2": 0.06137162235274428,
|
30 |
+
"rougeL": 0.14973642357201794
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.213601462920457,
|
36 |
+
"rouge2": 0.0630097851113338,
|
37 |
+
"rougeL": 0.1537348275034766
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.38237711642823147,
|
43 |
+
"rouge2": 0.24586898786864927,
|
44 |
+
"rougeL": 0.314102032096956
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.0012037170783379075
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.16470588235294117,
|
55 |
+
"f1": 0.3054485466158489
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.546,
|
61 |
+
"acc_norm": 0.546
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.42528,
|
67 |
+
"acc_norm": 0.49536
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.35722819593787336,
|
73 |
+
"acc_norm": 0.42771804062126645
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.4030534351145038,
|
79 |
+
"acc_norm": 0.48854961832061067
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.28104575163398693,
|
85 |
+
"acc_norm": 0.45098039215686275
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.5817825661116552,
|
91 |
+
"acc_norm": 0.5817825661116552
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.6142595978062158,
|
97 |
+
"acc_norm": 0.6238574040219378
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.6005484460694699,
|
103 |
+
"acc_norm": 0.5781535648994516
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.1468609865470852,
|
109 |
+
"f1": 0.3275513362731245
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.12907904278462654,
|
115 |
+
"acc_norm": 0.12980420594633793
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.20748299319727892,
|
121 |
+
"acc_norm": 0.2568027210884354
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3213,
|
127 |
+
"acc_norm": 0.3213
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3237,
|
133 |
+
"acc_norm": 0.324
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.38835341365461845,
|
139 |
+
"acc_norm": 0.38835341365461845
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.488,
|
145 |
+
"acc_norm": 0.328
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.0017756732761171944,
|
151 |
+
"f1": 0.014047009643700398
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.49666666666666665,
|
157 |
+
"acc_norm": 0.5283333333333333
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.22900763358778625,
|
163 |
+
"acc_norm": 0.26463104325699743
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.5344444444444445,
|
169 |
+
"acc_norm": 0.5344444444444445
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-1.5B.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-1.5B",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "pretrained",
|
9 |
+
"num_parameters": "1.5b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.26530686725683517,
|
16 |
+
"rouge2": 0.1186340395264098,
|
17 |
+
"rougeL": 0.21072366214436372
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 1.3627024164432318,
|
23 |
+
"bleu": 0.04065669768703689
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.1989672547304563,
|
29 |
+
"rouge2": 0.05893942882571811,
|
30 |
+
"rougeL": 0.14295071989157748
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.21551714657943752,
|
36 |
+
"rouge2": 0.06276605057309345,
|
37 |
+
"rougeL": 0.1530489363520035
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.3720197986496941,
|
43 |
+
"rouge2": 0.24001941620807693,
|
44 |
+
"rougeL": 0.30891873779373347
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.004188935432615918
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.3184873949579832,
|
55 |
+
"f1": 0.4728187788037503
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.542,
|
61 |
+
"acc_norm": 0.542
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.4208,
|
67 |
+
"acc_norm": 0.48704
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.35842293906810035,
|
73 |
+
"acc_norm": 0.4169653524492234
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.3969465648854962,
|
79 |
+
"acc_norm": 0.4854961832061069
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.272875816993464,
|
85 |
+
"acc_norm": 0.4362745098039216
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.5759059745347699,
|
91 |
+
"acc_norm": 0.5759059745347699
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.6229433272394881,
|
97 |
+
"acc_norm": 0.6238574040219378
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.4346435100548446,
|
103 |
+
"acc_norm": 0.5868372943327239
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.3430493273542601,
|
109 |
+
"f1": 0.5584198786751099
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.12907904278462654,
|
115 |
+
"acc_norm": 0.12907904278462654
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.27380952380952384,
|
121 |
+
"acc_norm": 0.6590136054421769
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3212,
|
127 |
+
"acc_norm": 0.3208
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3237,
|
133 |
+
"acc_norm": 0.3238
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.4108433734939759,
|
139 |
+
"acc_norm": 0.4108433734939759
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.484,
|
145 |
+
"acc_norm": 0.312
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.0025155371411660255,
|
151 |
+
"f1": 0.021308629203477533
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.5233333333333333,
|
157 |
+
"acc_norm": 0.52
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.21628498727735368,
|
163 |
+
"acc_norm": 0.2544529262086514
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.4666666666666667,
|
169 |
+
"acc_norm": 0.4666666666666667
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-14B-Instruct.json
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"max_length": "131072",
|
4 |
+
"dtype": "bfloat16",
|
5 |
+
"model": "Qwen/Qwen2.5-14B-Instruct",
|
6 |
+
"api": "hf",
|
7 |
+
"architecture": "Qwen2ForCausalLM",
|
8 |
+
"dtype": "bfloat16",
|
9 |
+
"max_length": "131072",
|
10 |
+
"type": "instruction-tuned",
|
11 |
+
"num_parameters": "7b"
|
12 |
+
},
|
13 |
+
"results": [
|
14 |
+
{
|
15 |
+
"name": "xquad_tr",
|
16 |
+
"task": "extractive_question_answering",
|
17 |
+
"exact_match": 0.009243697478991597,
|
18 |
+
"f1": 0.1621475839693222
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"name": "xcopa_tr",
|
22 |
+
"task": "multiple_choice",
|
23 |
+
"acc": 0.666,
|
24 |
+
"acc_norm": 0.666
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"name": "turkish_plu",
|
28 |
+
"task": "multiple_choice",
|
29 |
+
"acc": 0.48512,
|
30 |
+
"acc_norm": 0.53216
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "turkish_plu_goal_inference",
|
34 |
+
"task": "multiple_choice",
|
35 |
+
"acc": 0.4062126642771804,
|
36 |
+
"acc_norm": 0.41935483870967744
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"name": "turkish_plu_next_event_prediction",
|
40 |
+
"task": "multiple_choice",
|
41 |
+
"acc": 0.49770992366412214,
|
42 |
+
"acc_norm": 0.5725190839694656
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"name": "turkish_plu_step_inference",
|
46 |
+
"task": "multiple_choice",
|
47 |
+
"acc": 0.35130718954248363,
|
48 |
+
"acc_norm": 0.4934640522875817
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"name": "turkish_plu_step_ordering",
|
52 |
+
"task": "multiple_choice",
|
53 |
+
"acc": 0.6219392752203722,
|
54 |
+
"acc_norm": 0.6219392752203722
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"name": "check_worthiness",
|
58 |
+
"task": "multiple_choice",
|
59 |
+
"acc": 0.38848263254113347,
|
60 |
+
"acc_norm": 0.45749542961608775
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"name": "relevance_judgment",
|
64 |
+
"task": "multiple_choice",
|
65 |
+
"acc": 0.7458866544789763,
|
66 |
+
"acc_norm": 0.7842778793418648
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"name": "tquad",
|
70 |
+
"task": "extractive_question_answering",
|
71 |
+
"exact_match": 0.0033632286995515697,
|
72 |
+
"f1": 0.2073964222096445
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "sts_tr",
|
76 |
+
"task": "text_classification",
|
77 |
+
"acc": 0.24873096446700507,
|
78 |
+
"acc_norm": 0.224075416968818
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "offenseval_tr",
|
82 |
+
"task": "text_classification",
|
83 |
+
"acc": 0.5473356009070295,
|
84 |
+
"acc_norm": 0.6992630385487528
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"name": "mnli_tr",
|
88 |
+
"task": "natural_language_inference",
|
89 |
+
"acc": 0.2993,
|
90 |
+
"acc_norm": 0.4052
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"name": "snli_tr",
|
94 |
+
"task": "natural_language_inference",
|
95 |
+
"acc": 0.249,
|
96 |
+
"acc_norm": 0.4158
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"name": "xnli_tr",
|
100 |
+
"task": "natural_language_inference",
|
101 |
+
"acc": 0.4108433734939759,
|
102 |
+
"acc_norm": 0.4108433734939759
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "news_cat",
|
106 |
+
"task": "text_classification",
|
107 |
+
"acc": 0.324,
|
108 |
+
"acc_norm": 0.372
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"name": "mkqa_tr",
|
112 |
+
"task": "extractive_question_answering",
|
113 |
+
"exact_match": 0.0001479727730097662,
|
114 |
+
"f1": 0.032689256412897535
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"name": "ironytr",
|
118 |
+
"task": "text_classification",
|
119 |
+
"acc": 0.6133333333333333,
|
120 |
+
"acc_norm": 0.68
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"name": "exams_tr",
|
124 |
+
"task": "multiple_choice",
|
125 |
+
"acc": 0.29770992366412213,
|
126 |
+
"acc_norm": 0.32061068702290074
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"name": "belebele_tr",
|
130 |
+
"task": "multiple_choice",
|
131 |
+
"acc": 0.8466666666666667,
|
132 |
+
"acc_norm": 0.8466666666666667
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"name": "xlsum_tr",
|
136 |
+
"task": "summarization",
|
137 |
+
"rouge1": 0.32286587644778963,
|
138 |
+
"rouge2": 0.15718603235490425,
|
139 |
+
"rougeL": 0.2513469242124575
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "wmt-tr-en-prompt",
|
143 |
+
"task": "machine_translation",
|
144 |
+
"wer": 0.876353734204216,
|
145 |
+
"bleu": 0.12408567637656073
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "wiki_lingua_tr",
|
149 |
+
"task": "summarization",
|
150 |
+
"rouge1": 0.21257634641569856,
|
151 |
+
"rouge2": 0.07113576521772344,
|
152 |
+
"rougeL": 0.16327508915103117
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"name": "tr-wikihow-summ",
|
156 |
+
"task": "summarization",
|
157 |
+
"rouge1": 0.15933964568392708,
|
158 |
+
"rouge2": 0.046667426668942254,
|
159 |
+
"rougeL": 0.12246246131371726
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"name": "mlsum_tr",
|
163 |
+
"task": "summarization",
|
164 |
+
"rouge1": 0.3941083980026566,
|
165 |
+
"rouge2": 0.2370970171442021,
|
166 |
+
"rougeL": 0.3180069634000636
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"name": "gecturk_generation",
|
170 |
+
"task": "grammatical_error_correction",
|
171 |
+
"exact_match": 0.00130001444460494
|
172 |
+
}
|
173 |
+
]
|
174 |
+
}
|
results/zero-shot/Qwen2.5-14B.json
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"max_length": "131072",
|
4 |
+
"dtype": "bfloat16",
|
5 |
+
"model": "Qwen/Qwen2.5-14B",
|
6 |
+
"api": "hf",
|
7 |
+
"architecture": "Qwen2ForCausalLM",
|
8 |
+
"dtype": "bfloat16",
|
9 |
+
"max_length": "131072",
|
10 |
+
"type": "pretrained",
|
11 |
+
"num_parameters": "14b"
|
12 |
+
},
|
13 |
+
"results": [
|
14 |
+
{
|
15 |
+
"name": "xquad_tr",
|
16 |
+
"task": "extractive_question_answering",
|
17 |
+
"exact_match": 0.40252100840336136,
|
18 |
+
"f1": 0.6176467678580342
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"name": "xcopa_tr",
|
22 |
+
"task": "multiple_choice",
|
23 |
+
"acc": 0.646,
|
24 |
+
"acc_norm": 0.646
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"name": "turkish_plu",
|
28 |
+
"task": "multiple_choice",
|
29 |
+
"acc": 0.48736,
|
30 |
+
"acc_norm": 0.5392
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "turkish_plu_goal_inference",
|
34 |
+
"task": "multiple_choice",
|
35 |
+
"acc": 0.4133811230585424,
|
36 |
+
"acc_norm": 0.4324970131421744
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"name": "turkish_plu_next_event_prediction",
|
40 |
+
"task": "multiple_choice",
|
41 |
+
"acc": 0.4870229007633588,
|
42 |
+
"acc_norm": 0.5816793893129771
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"name": "turkish_plu_step_inference",
|
46 |
+
"task": "multiple_choice",
|
47 |
+
"acc": 0.35294117647058826,
|
48 |
+
"acc_norm": 0.49019607843137253
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"name": "turkish_plu_step_ordering",
|
52 |
+
"task": "multiple_choice",
|
53 |
+
"acc": 0.6287952987267384,
|
54 |
+
"acc_norm": 0.6287952987267384
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"name": "check_worthiness",
|
58 |
+
"task": "multiple_choice",
|
59 |
+
"acc": 0.37614259597806216,
|
60 |
+
"acc_norm": 0.3756855575868373
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"name": "relevance_judgment",
|
64 |
+
"task": "multiple_choice",
|
65 |
+
"acc": 0.4506398537477148,
|
66 |
+
"acc_norm": 0.5708409506398537
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"name": "tquad",
|
70 |
+
"task": "extractive_question_answering",
|
71 |
+
"exact_match": 0.34753363228699546,
|
72 |
+
"f1": 0.614345609122
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "sts_tr",
|
76 |
+
"task": "text_classification",
|
77 |
+
"acc": 0.2037708484408992,
|
78 |
+
"acc_norm": 0.2610587382160986
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "offenseval_tr",
|
82 |
+
"task": "text_classification",
|
83 |
+
"acc": 0.22023809523809523,
|
84 |
+
"acc_norm": 0.2962018140589569
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"name": "mnli_tr",
|
88 |
+
"task": "natural_language_inference",
|
89 |
+
"acc": 0.3202,
|
90 |
+
"acc_norm": 0.3281
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"name": "snli_tr",
|
94 |
+
"task": "natural_language_inference",
|
95 |
+
"acc": 0.3227,
|
96 |
+
"acc_norm": 0.3329
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"name": "xnli_tr",
|
100 |
+
"task": "natural_language_inference",
|
101 |
+
"acc": 0.46546184738955826,
|
102 |
+
"acc_norm": 0.46546184738955826
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "news_cat",
|
106 |
+
"task": "text_classification",
|
107 |
+
"acc": 0.524,
|
108 |
+
"acc_norm": 0.348
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"name": "mkqa_tr",
|
112 |
+
"task": "extractive_question_answering",
|
113 |
+
"exact_match": 0.050014797277300974,
|
114 |
+
"f1": 0.11195620922043903
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"name": "ironytr",
|
118 |
+
"task": "text_classification",
|
119 |
+
"acc": 0.5616666666666666,
|
120 |
+
"acc_norm": 0.6183333333333333
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"name": "exams_tr",
|
124 |
+
"task": "multiple_choice",
|
125 |
+
"acc": 0.33078880407124683,
|
126 |
+
"acc_norm": 0.35877862595419846
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"name": "belebele_tr",
|
130 |
+
"task": "multiple_choice",
|
131 |
+
"acc": 0.8122222222222222,
|
132 |
+
"acc_norm": 0.8122222222222222
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"name": "xlsum_tr",
|
136 |
+
"task": "summarization",
|
137 |
+
"rouge1": 0.2866278776668776,
|
138 |
+
"rouge2": 0.1308383753682692,
|
139 |
+
"rougeL": 0.22217070278595147
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "wmt-tr-en-prompt",
|
143 |
+
"task": "machine_translation",
|
144 |
+
"wer": 1.6546507240124098,
|
145 |
+
"bleu": 0.08096461200991427
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "wiki_lingua_tr",
|
149 |
+
"task": "summarization",
|
150 |
+
"rouge1": 0.20802332507327073,
|
151 |
+
"rouge2": 0.06755910819968403,
|
152 |
+
"rougeL": 0.15425156655216665
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"name": "tr-wikihow-summ",
|
156 |
+
"task": "summarization",
|
157 |
+
"rouge1": 0.22012543165161014,
|
158 |
+
"rouge2": 0.06567086903148794,
|
159 |
+
"rougeL": 0.15604855476586732
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"name": "mlsum_tr",
|
163 |
+
"task": "summarization",
|
164 |
+
"rouge1": 0.3928051448993858,
|
165 |
+
"rouge2": 0.25674608200884674,
|
166 |
+
"rougeL": 0.3276023476233169
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"name": "gecturk_generation",
|
170 |
+
"task": "grammatical_error_correction",
|
171 |
+
"exact_match": 0.003707448601280755
|
172 |
+
}
|
173 |
+
]
|
174 |
+
}
|
results/zero-shot/Qwen2.5-3B-Instruct.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-3B-Instruct",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "instruction-tuned",
|
9 |
+
"num_parameters": "3b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.2717423560904909,
|
16 |
+
"rouge2": 0.1203805256265841,
|
17 |
+
"rougeL": 0.20835793423392474
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 1.3083152705118002,
|
23 |
+
"bleu": 0.06557652285165357
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.20972310903888913,
|
29 |
+
"rouge2": 0.06624065006707994,
|
30 |
+
"rougeL": 0.15663818204368896
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.21539354708256803,
|
36 |
+
"rouge2": 0.06750207152961056,
|
37 |
+
"rougeL": 0.15713052980260883
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.36923031529839273,
|
43 |
+
"rouge2": 0.22733869486812047,
|
44 |
+
"rougeL": 0.30137527399984854
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.0032741104530791083
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.06050420168067227,
|
55 |
+
"f1": 0.17614501216061587
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.562,
|
61 |
+
"acc_norm": 0.562
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.44832,
|
67 |
+
"acc_norm": 0.4976
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.38948626045400236,
|
73 |
+
"acc_norm": 0.4074074074074074
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.4198473282442748,
|
79 |
+
"acc_norm": 0.4916030534351145
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.32189542483660133,
|
85 |
+
"acc_norm": 0.4722222222222222
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.5905974534769833,
|
91 |
+
"acc_norm": 0.5905974534769833
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.37614259597806216,
|
97 |
+
"acc_norm": 0.41910420475319926
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.4218464351005484,
|
103 |
+
"acc_norm": 0.5863802559414991
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.032511210762331835,
|
109 |
+
"f1": 0.17915922696126974
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.16823785351704135,
|
115 |
+
"acc_norm": 0.21102248005801305
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.3764172335600907,
|
121 |
+
"acc_norm": 0.6312358276643991
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3202,
|
127 |
+
"acc_norm": 0.3166
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.324,
|
133 |
+
"acc_norm": 0.3233
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.3899598393574297,
|
139 |
+
"acc_norm": 0.3899598393574297
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.372,
|
145 |
+
"acc_norm": 0.316
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.0013317549570878958,
|
151 |
+
"f1": 0.011368014565970922
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.5133333333333333,
|
157 |
+
"acc_norm": 0.5
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.2748091603053435,
|
163 |
+
"acc_norm": 0.2748091603053435
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.6744444444444444,
|
169 |
+
"acc_norm": 0.6744444444444444
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-3B.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-3B",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "pretrained",
|
9 |
+
"num_parameters": "3b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.2626105174898534,
|
16 |
+
"rouge2": 0.11378189592008409,
|
17 |
+
"rougeL": 0.20776464247370657
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 2.715338611222304,
|
23 |
+
"bleu": 0.04073294466582842
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.2070375365034586,
|
29 |
+
"rouge2": 0.06545062813959457,
|
30 |
+
"rougeL": 0.15181208318674888
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.2166032592490747,
|
36 |
+
"rouge2": 0.06488692040082837,
|
37 |
+
"rougeL": 0.15493867817520438
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.37920771518464447,
|
43 |
+
"rouge2": 0.2485854189993293,
|
44 |
+
"rougeL": 0.3183591683826359
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.01261495498098127
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.3226890756302521,
|
55 |
+
"f1": 0.4922457700639336
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.552,
|
61 |
+
"acc_norm": 0.552
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.43936,
|
67 |
+
"acc_norm": 0.49536
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.37992831541218636,
|
73 |
+
"acc_norm": 0.4109916367980884
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.40610687022900765,
|
79 |
+
"acc_norm": 0.4900763358778626
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.2973856209150327,
|
85 |
+
"acc_norm": 0.45098039215686275
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.594515181194907,
|
91 |
+
"acc_norm": 0.594515181194907
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.37614259597806216,
|
97 |
+
"acc_norm": 0.37614259597806216
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.4218464351005484,
|
103 |
+
"acc_norm": 0.42230347349177333
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.2679372197309417,
|
109 |
+
"f1": 0.5013276144111743
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.12907904278462654,
|
115 |
+
"acc_norm": 0.1319796954314721
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.483843537414966,
|
121 |
+
"acc_norm": 0.7939342403628118
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3212,
|
127 |
+
"acc_norm": 0.3212
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3219,
|
133 |
+
"acc_norm": 0.31
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.3887550200803213,
|
139 |
+
"acc_norm": 0.3887550200803213
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.448,
|
145 |
+
"acc_norm": 0.336
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.0011837821840781297,
|
151 |
+
"f1": 0.02167535190151917
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.5466666666666666,
|
157 |
+
"acc_norm": 0.505
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.26463104325699743,
|
163 |
+
"acc_norm": 0.272264631043257
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.6188888888888889,
|
169 |
+
"acc_norm": 0.6188888888888889
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-7B-Instruct.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-7B-Instruct",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "instruction-tuned",
|
9 |
+
"num_parameters": "7b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.2469558574263228,
|
16 |
+
"rouge2": 0.11595092662162905,
|
17 |
+
"rougeL": 0.19088347093150124
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 2.665304473413403,
|
23 |
+
"bleu": 0.05378866280156646
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.19651060875148446,
|
29 |
+
"rouge2": 0.06277513772426871,
|
30 |
+
"rougeL": 0.15024685156698064
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.18795422261380992,
|
36 |
+
"rouge2": 0.057607529002163975,
|
37 |
+
"rougeL": 0.1399141590028576
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.33348239796227963,
|
43 |
+
"rouge2": 0.20530295055546918,
|
44 |
+
"rougeL": 0.2702778828157603
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.0007222302470027445
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.009243697478991597,
|
55 |
+
"f1": 0.20658958502282965
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.618,
|
61 |
+
"acc_norm": 0.618
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.47136,
|
67 |
+
"acc_norm": 0.5168
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.4109916367980884,
|
73 |
+
"acc_norm": 0.4324970131421744
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.467175572519084,
|
79 |
+
"acc_norm": 0.5251908396946565
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.32189542483660133,
|
85 |
+
"acc_norm": 0.4624183006535948
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.6131243878550441,
|
91 |
+
"acc_norm": 0.6131243878550441
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.37614259597806216,
|
97 |
+
"acc_norm": 0.37705667276051186
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.5635283363802559,
|
103 |
+
"acc_norm": 0.649908592321755
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.005605381165919282,
|
109 |
+
"f1": 0.2515091110747535
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.18274111675126903,
|
115 |
+
"acc_norm": 0.20449601160261058
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.8027210884353742,
|
121 |
+
"acc_norm": 0.7996031746031746
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3128,
|
127 |
+
"acc_norm": 0.3443
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3019,
|
133 |
+
"acc_norm": 0.3201
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.41044176706827307,
|
139 |
+
"acc_norm": 0.41044176706827307
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.4,
|
145 |
+
"acc_norm": 0.244
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.0,
|
151 |
+
"f1": 0.02283069752218492
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.55,
|
157 |
+
"acc_norm": 0.6
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.30279898218829515,
|
163 |
+
"acc_norm": 0.3435114503816794
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.7344444444444445,
|
169 |
+
"acc_norm": 0.7344444444444445
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/Qwen2.5-7B.json
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "Qwen/Qwen2.5-7B",
|
4 |
+
"api": "hf",
|
5 |
+
"architecture": "Qwen2ForCausalLM",
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"max_length": "131072",
|
8 |
+
"type": "pretrained",
|
9 |
+
"num_parameters": "7b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "xlsum_tr",
|
14 |
+
"task": "summarization",
|
15 |
+
"rouge1": 0.2771480541706062,
|
16 |
+
"rouge2": 0.12265578549173298,
|
17 |
+
"rougeL": 0.21527848396268273
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "wmt-tr-en-prompt",
|
21 |
+
"task": "machine_translation",
|
22 |
+
"wer": 2.758041226669275,
|
23 |
+
"bleu": 0.05541606336453955
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "wiki_lingua_tr",
|
27 |
+
"task": "summarization",
|
28 |
+
"rouge1": 0.2142341169252447,
|
29 |
+
"rouge2": 0.069300770560285,
|
30 |
+
"rougeL": 0.15734638541997004
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "tr-wikihow-summ",
|
34 |
+
"task": "summarization",
|
35 |
+
"rouge1": 0.2205921941830655,
|
36 |
+
"rouge2": 0.06633246639608908,
|
37 |
+
"rougeL": 0.1577913671817974
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"name": "mlsum_tr",
|
41 |
+
"task": "summarization",
|
42 |
+
"rouge1": 0.38481724984581955,
|
43 |
+
"rouge2": 0.25317842640354704,
|
44 |
+
"rougeL": 0.3226132671645973
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"name": "gecturk_generation",
|
48 |
+
"task": "grammatical_error_correction",
|
49 |
+
"exact_match": 0.005537098560354375
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"name": "xquad_tr",
|
53 |
+
"task": "extractive_question_answering",
|
54 |
+
"exact_match": 0.319327731092437,
|
55 |
+
"f1": 0.5120848322696311
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"name": "xcopa_tr",
|
59 |
+
"task": "multiple_choice",
|
60 |
+
"acc": 0.598,
|
61 |
+
"acc_norm": 0.598
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"name": "turkish_plu",
|
65 |
+
"task": "multiple_choice",
|
66 |
+
"acc": 0.48288,
|
67 |
+
"acc_norm": 0.53376
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"name": "turkish_plu_goal_inference",
|
71 |
+
"task": "multiple_choice",
|
72 |
+
"acc": 0.4253285543608124,
|
73 |
+
"acc_norm": 0.44683393070489846
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"name": "turkish_plu_next_event_prediction",
|
77 |
+
"task": "multiple_choice",
|
78 |
+
"acc": 0.4717557251908397,
|
79 |
+
"acc_norm": 0.549618320610687
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"name": "turkish_plu_step_inference",
|
83 |
+
"task": "multiple_choice",
|
84 |
+
"acc": 0.32189542483660133,
|
85 |
+
"acc_norm": 0.46895424836601307
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"name": "turkish_plu_step_ordering",
|
89 |
+
"task": "multiple_choice",
|
90 |
+
"acc": 0.633692458374143,
|
91 |
+
"acc_norm": 0.633692458374143
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "check_worthiness",
|
95 |
+
"task": "multiple_choice",
|
96 |
+
"acc": 0.399908592321755,
|
97 |
+
"acc_norm": 0.5361060329067642
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"name": "relevance_judgment",
|
101 |
+
"task": "multiple_choice",
|
102 |
+
"acc": 0.4259597806215722,
|
103 |
+
"acc_norm": 0.4437842778793419
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"name": "tquad",
|
107 |
+
"task": "extractive_question_answering",
|
108 |
+
"exact_match": 0.2802690582959641,
|
109 |
+
"f1": 0.5504499810832788
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"name": "sts_tr",
|
113 |
+
"task": "text_classification",
|
114 |
+
"acc": 0.1696881798404641,
|
115 |
+
"acc_norm": 0.18201595358955766
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"name": "offenseval_tr",
|
119 |
+
"task": "text_classification",
|
120 |
+
"acc": 0.7738095238095238,
|
121 |
+
"acc_norm": 0.7956349206349206
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"name": "mnli_tr",
|
125 |
+
"task": "natural_language_inference",
|
126 |
+
"acc": 0.3204,
|
127 |
+
"acc_norm": 0.3466
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"name": "snli_tr",
|
131 |
+
"task": "natural_language_inference",
|
132 |
+
"acc": 0.3236,
|
133 |
+
"acc_norm": 0.3272
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"name": "xnli_tr",
|
137 |
+
"task": "natural_language_inference",
|
138 |
+
"acc": 0.41847389558232934,
|
139 |
+
"acc_norm": 0.41847389558232934
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"name": "news_cat",
|
143 |
+
"task": "text_classification",
|
144 |
+
"acc": 0.548,
|
145 |
+
"acc_norm": 0.336
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"name": "mkqa_tr",
|
149 |
+
"task": "extractive_question_answering",
|
150 |
+
"exact_match": 0.013909440662918023,
|
151 |
+
"f1": 0.05323007126210795
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "ironytr",
|
155 |
+
"task": "text_classification",
|
156 |
+
"acc": 0.5733333333333334,
|
157 |
+
"acc_norm": 0.54
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "exams_tr",
|
161 |
+
"task": "multiple_choice",
|
162 |
+
"acc": 0.2951653944020356,
|
163 |
+
"acc_norm": 0.356234096692112
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"name": "belebele_tr",
|
167 |
+
"task": "multiple_choice",
|
168 |
+
"acc": 0.7388888888888889,
|
169 |
+
"acc_norm": 0.7388888888888889
|
170 |
+
}
|
171 |
+
]
|
172 |
+
}
|
results/zero-shot/aya-23-35B.json
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"dtype": "auto",
|
4 |
+
"parallelize": "True",
|
5 |
+
"device_map": "balanced",
|
6 |
+
"model": "CohereForAI/aya-23-35B",
|
7 |
+
"api": "hf",
|
8 |
+
"architecture": "CohereForCausalLM",
|
9 |
+
"dtype": "float16",
|
10 |
+
"type": "instruction-tuned",
|
11 |
+
"num_parameters": "35b"
|
12 |
+
},
|
13 |
+
"results": [
|
14 |
+
{
|
15 |
+
"name": "xquad_tr",
|
16 |
+
"task": "extractive_question_answering",
|
17 |
+
"exact_match": 0.3092436974789916,
|
18 |
+
"f1": 0.4925851410138433
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"name": "xcopa_tr",
|
22 |
+
"task": "multiple_choice",
|
23 |
+
"acc": 0.604,
|
24 |
+
"acc_norm": 0.604
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"name": "turkish_plu",
|
28 |
+
"task": "multiple_choice",
|
29 |
+
"acc": 0.48832,
|
30 |
+
"acc_norm": 0.51744
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"name": "turkish_plu_goal_inference",
|
34 |
+
"task": "multiple_choice",
|
35 |
+
"acc": 0.43010752688172044,
|
36 |
+
"acc_norm": 0.40860215053763443
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"name": "turkish_plu_next_event_prediction",
|
40 |
+
"task": "multiple_choice",
|
41 |
+
"acc": 0.5206106870229008,
|
42 |
+
"acc_norm": 0.5908396946564886
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"name": "turkish_plu_step_inference",
|
46 |
+
"task": "multiple_choice",
|
47 |
+
"acc": 0.35130718954248363,
|
48 |
+
"acc_norm": 0.4542483660130719
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"name": "turkish_plu_step_ordering",
|
52 |
+
"task": "multiple_choice",
|
53 |
+
"acc": 0.5974534769833496,
|
54 |
+
"acc_norm": 0.5974534769833496
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"name": "check_worthiness",
|
58 |
+
"task": "multiple_choice",
|
59 |
+
"acc": 0.37614259597806216,
|
60 |
+
"acc_norm": 0.37614259597806216
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"name": "relevance_judgment",
|
64 |
+
"task": "multiple_choice",
|
65 |
+
"acc": 0.46983546617915906,
|
66 |
+
"acc_norm": 0.5553016453382084
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"name": "tquad",
|
70 |
+
"task": "extractive_question_answering",
|
71 |
+
"exact_match": 0.2062780269058296,
|
72 |
+
"f1": 0.4775440049958143
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"name": "sts_tr",
|
76 |
+
"task": "text_classification",
|
77 |
+
"acc": 0.25380710659898476,
|
78 |
+
"acc_norm": 0.11965192168237854
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"name": "offenseval_tr",
|
82 |
+
"task": "text_classification",
|
83 |
+
"acc": 0.2100340136054422,
|
84 |
+
"acc_norm": 0.23922902494331066
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"name": "mnli_tr",
|
88 |
+
"task": "natural_language_inference",
|
89 |
+
"acc": 0.2957,
|
90 |
+
"acc_norm": 0.3475
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"name": "snli_tr",
|
94 |
+
"task": "natural_language_inference",
|
95 |
+
"acc": 0.2881,
|
96 |
+
"acc_norm": 0.3364
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"name": "xnli_tr",
|
100 |
+
"task": "natural_language_inference",
|
101 |
+
"acc": 0.5060240963855421,
|
102 |
+
"acc_norm": 0.5060240963855421
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"name": "news_cat",
|
106 |
+
"task": "text_classification",
|
107 |
+
"acc": 0.556,
|
108 |
+
"acc_norm": 0.356
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"name": "mkqa_tr",
|
112 |
+
"task": "extractive_question_answering",
|
113 |
+
"exact_match": 0.19428825096182303,
|
114 |
+
"f1": 0.2786006074753464
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"name": "ironytr",
|
118 |
+
"task": "text_classification",
|
119 |
+
"acc": 0.5016666666666667,
|
120 |
+
"acc_norm": 0.49166666666666664
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"name": "exams_tr",
|
124 |
+
"task": "multiple_choice",
|
125 |
+
"acc": 0.29770992366412213,
|
126 |
+
"acc_norm": 0.3231552162849873
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"name": "belebele_tr",
|
130 |
+
"task": "multiple_choice",
|
131 |
+
"acc": 0.7288888888888889,
|
132 |
+
"acc_norm": 0.7288888888888889
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"name": "gecturk_generation",
|
136 |
+
"task": "grammatical_error_correction",
|
137 |
+
"exact_match": 0.009437141894169195
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"name": "xlsum_tr",
|
141 |
+
"task": "summarization",
|
142 |
+
"rouge1": 0.26107330055626987,
|
143 |
+
"rouge2": 0.13324102083895656,
|
144 |
+
"rougeL": 0.214838816984684
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"name": "wmt-tr-en-prompt",
|
148 |
+
"task": "machine_translation",
|
149 |
+
"wer": 0.7343050156418351,
|
150 |
+
"bleu": 0.18474168394967388
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"name": "wiki_lingua_tr",
|
154 |
+
"task": "summarization",
|
155 |
+
"rouge1": 0.3521005179613347,
|
156 |
+
"rouge2": 0.1839412116950937,
|
157 |
+
"rougeL": 0.3093989984717051
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"name": "tr-wikihow-summ",
|
161 |
+
"task": "summarization",
|
162 |
+
"rouge1": 0.2767758420029493,
|
163 |
+
"rouge2": 0.1271603930418029,
|
164 |
+
"rougeL": 0.23279989970428439
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"name": "mlsum_tr",
|
168 |
+
"task": "summarization",
|
169 |
+
"rouge1": 0.3920969453077054,
|
170 |
+
"rouge2": 0.25937196554017156,
|
171 |
+
"rougeL": 0.33144850765201345
|
172 |
+
}
|
173 |
+
]
|
174 |
+
}
|
results/zero-shot/aya-expanse-32b.json
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"parallelize": "True",
|
4 |
+
"device_map": "balanced",
|
5 |
+
"model": "CohereForAI/aya-expanse-32b",
|
6 |
+
"api": "hf",
|
7 |
+
"architecture": "CohereForCausalLM",
|
8 |
+
"dtype": "float16",
|
9 |
+
"type": "instruction-tuned",
|
10 |
+
"num_parameters": "32b"
|
11 |
+
},
|
12 |
+
"results": [
|
13 |
+
{
|
14 |
+
"name": "xquad_tr",
|
15 |
+
"task": "extractive_question_answering",
|
16 |
+
"exact_match": 0.319327731092437,
|
17 |
+
"f1": 0.5392716511089678
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "xcopa_tr",
|
21 |
+
"task": "multiple_choice",
|
22 |
+
"acc": 0.592,
|
23 |
+
"acc_norm": 0.592
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "turkish_plu",
|
27 |
+
"task": "multiple_choice",
|
28 |
+
"acc": 0.51808,
|
29 |
+
"acc_norm": 0.55616
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"name": "turkish_plu_goal_inference",
|
33 |
+
"task": "multiple_choice",
|
34 |
+
"acc": 0.44802867383512546,
|
35 |
+
"acc_norm": 0.45639187574671447
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"name": "turkish_plu_next_event_prediction",
|
39 |
+
"task": "multiple_choice",
|
40 |
+
"acc": 0.5511450381679389,
|
41 |
+
"acc_norm": 0.6106870229007634
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"name": "turkish_plu_step_inference",
|
45 |
+
"task": "multiple_choice",
|
46 |
+
"acc": 0.39215686274509803,
|
47 |
+
"acc_norm": 0.511437908496732
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"name": "turkish_plu_step_ordering",
|
51 |
+
"task": "multiple_choice",
|
52 |
+
"acc": 0.6297747306562194,
|
53 |
+
"acc_norm": 0.6297747306562194
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"name": "check_worthiness",
|
57 |
+
"task": "multiple_choice",
|
58 |
+
"acc": 0.37751371115173676,
|
59 |
+
"acc_norm": 0.3793418647166362
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"name": "relevance_judgment",
|
63 |
+
"task": "multiple_choice",
|
64 |
+
"acc": 0.6937842778793418,
|
65 |
+
"acc_norm": 0.7408592321755028
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"name": "tquad",
|
69 |
+
"task": "extractive_question_answering",
|
70 |
+
"exact_match": 0.3015695067264574,
|
71 |
+
"f1": 0.5825292681833019
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"name": "sts_tr",
|
75 |
+
"task": "text_classification",
|
76 |
+
"acc": 0.21464829586656997,
|
77 |
+
"acc_norm": 0.22987672226250908
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"name": "offenseval_tr",
|
81 |
+
"task": "text_classification",
|
82 |
+
"acc": 0.6706349206349206,
|
83 |
+
"acc_norm": 0.7936507936507936
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"name": "mnli_tr",
|
87 |
+
"task": "natural_language_inference",
|
88 |
+
"acc": 0.2444,
|
89 |
+
"acc_norm": 0.3458
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"name": "snli_tr",
|
93 |
+
"task": "natural_language_inference",
|
94 |
+
"acc": 0.1896,
|
95 |
+
"acc_norm": 0.3355
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"name": "xnli_tr",
|
99 |
+
"task": "natural_language_inference",
|
100 |
+
"acc": 0.5056224899598394,
|
101 |
+
"acc_norm": 0.5056224899598394
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"name": "news_cat",
|
105 |
+
"task": "text_classification",
|
106 |
+
"acc": 0.828,
|
107 |
+
"acc_norm": 0.68
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"name": "mkqa_tr",
|
111 |
+
"task": "extractive_question_answering",
|
112 |
+
"exact_match": 0.16424977804084048,
|
113 |
+
"f1": 0.25720974268367947
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"name": "ironytr",
|
117 |
+
"task": "text_classification",
|
118 |
+
"acc": 0.5,
|
119 |
+
"acc_norm": 0.5566666666666666
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"name": "exams_tr",
|
123 |
+
"task": "multiple_choice",
|
124 |
+
"acc": 0.36895674300254455,
|
125 |
+
"acc_norm": 0.39185750636132316
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"name": "belebele_tr",
|
129 |
+
"task": "multiple_choice",
|
130 |
+
"acc": 0.8344444444444444,
|
131 |
+
"acc_norm": 0.8344444444444444
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"name": "xlsum_tr",
|
135 |
+
"task": "summarization",
|
136 |
+
"rouge1": 0.36837282882318917,
|
137 |
+
"rouge2": 0.2144125271579892,
|
138 |
+
"rougeL": 0.3114288520291558
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"name": "wmt-tr-en-prompt",
|
142 |
+
"task": "machine_translation",
|
143 |
+
"wer": 0.721465283605015,
|
144 |
+
"bleu": 0.2010197464685068
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"name": "wiki_lingua_tr",
|
148 |
+
"task": "summarization",
|
149 |
+
"rouge1": 0.3958195144552331,
|
150 |
+
"rouge2": 0.2145940709808375,
|
151 |
+
"rougeL": 0.34455596224977914
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"name": "tr-wikihow-summ",
|
155 |
+
"task": "summarization",
|
156 |
+
"rouge1": 0.34294866079774666,
|
157 |
+
"rouge2": 0.16631660541703744,
|
158 |
+
"rougeL": 0.2853574006828194
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"name": "mlsum_tr",
|
162 |
+
"task": "summarization",
|
163 |
+
"rouge1": 0.43617843344099383,
|
164 |
+
"rouge2": 0.301267876751885,
|
165 |
+
"rougeL": 0.3690068926127347
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"name": "gecturk_generation",
|
169 |
+
"task": "grammatical_error_correction",
|
170 |
+
"exact_match": 0
|
171 |
+
}
|
172 |
+
]
|
173 |
+
}
|