Ilker Kesen
commited on
Commit
·
6aca973
1
Parent(s):
280375b
include llama-3.2-3b in the leaderboard
Browse files
results/zero-shot/llama-3.2-3b.json
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"model": "meta-llama/Llama-3.2-3B",
|
4 |
+
"api": "hf",
|
5 |
+
"dtype": "bfloat16",
|
6 |
+
"max_length": 131072,
|
7 |
+
"architecture": "LlamaForCausalLM",
|
8 |
+
"type": "pretrained",
|
9 |
+
"num_parameters": "3b"
|
10 |
+
},
|
11 |
+
"results": [
|
12 |
+
{
|
13 |
+
"name": "belebele_tr",
|
14 |
+
"task": "multiple_choice",
|
15 |
+
"acc": 0.47333333333333333,
|
16 |
+
"acc_norm": 0.47333333333333333
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"name": "check_worthiness",
|
20 |
+
"task": "multiple_choice",
|
21 |
+
"acc": 0.37614259597806216,
|
22 |
+
"acc_norm": 0.42458866544789764
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"name": "ironytr",
|
26 |
+
"task": "text_classification",
|
27 |
+
"acc": 0.5,
|
28 |
+
"acc_norm": 0.5116666666666667
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"name": "mkqa_tr",
|
32 |
+
"task": "extractive_question_answering",
|
33 |
+
"exact_match": 0.06007694584196508,
|
34 |
+
"f1": 0.10654344736882515
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"name": "mnli_tr",
|
38 |
+
"task": "natural_language_inference",
|
39 |
+
"acc": 0.3488,
|
40 |
+
"acc_norm": 0.3478
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"name": "news_cat",
|
44 |
+
"task": "text_classification",
|
45 |
+
"acc": 0.664,
|
46 |
+
"acc_norm": 0.544
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"name": "offenseval_tr",
|
50 |
+
"task": "text_classification",
|
51 |
+
"acc": 0.25396825396825395,
|
52 |
+
"acc_norm": 0.677437641723356
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"name": "relevance_judgment",
|
56 |
+
"task": "multiple_choice",
|
57 |
+
"acc": 0.42230347349177333,
|
58 |
+
"acc_norm": 0.4227605118829982
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"name": "snli_tr",
|
62 |
+
"task": "natural_language_inference",
|
63 |
+
"acc": 0.336,
|
64 |
+
"acc_norm": 0.3368
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"name": "sts_tr",
|
68 |
+
"task": "text_classification",
|
69 |
+
"acc": 0.1319796954314721,
|
70 |
+
"acc_norm": 0.20667150108774474
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"name": "tquad",
|
74 |
+
"task": "extractive_question_answering",
|
75 |
+
"exact_match": 0.21188340807174888,
|
76 |
+
"f1": 0.4583574684635767
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"name": "turkish_plu_goal_inference",
|
80 |
+
"task": "multiple_choice",
|
81 |
+
"acc": 0.3906810035842294,
|
82 |
+
"acc_norm": 0.3906810035842294
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"name": "turkish_plu_next_event_prediction",
|
86 |
+
"task": "multiple_choice",
|
87 |
+
"acc": 0.4122137404580153,
|
88 |
+
"acc_norm": 0.5389312977099237
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"name": "turkish_plu_step_inference",
|
92 |
+
"task": "multiple_choice",
|
93 |
+
"acc": 0.30718954248366015,
|
94 |
+
"acc_norm": 0.4493464052287582
|
95 |
+
},
|
96 |
+
{
|
97 |
+
"name": "turkish_plu_step_ordering",
|
98 |
+
"task": "multiple_choice",
|
99 |
+
"acc": 0.5974534769833496,
|
100 |
+
"acc_norm": 0.5974534769833496
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"name": "xcopa_tr",
|
104 |
+
"task": "multiple_choice",
|
105 |
+
"acc": 0.57,
|
106 |
+
"acc_norm": 0.57
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"name": "xnli_tr",
|
110 |
+
"task": "natural_language_inference",
|
111 |
+
"acc": 0.457429718875502,
|
112 |
+
"acc_norm": 0.457429718875502
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"name": "xquad_tr",
|
116 |
+
"task": "extractive_question_answering",
|
117 |
+
"exact_match": 0.15546218487394958,
|
118 |
+
"f1": 0.27803412264497246
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"name": "gecturk_generation",
|
122 |
+
"task": "grammatical_error_correction",
|
123 |
+
"exact_match": 0.004670422263951081
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"name": "mlsum_tr",
|
127 |
+
"task": "summarization",
|
128 |
+
"rouge1": 0.365518560170642,
|
129 |
+
"rouge2": 0.23287817844657932,
|
130 |
+
"rougeL": 0.3078744009567239
|
131 |
+
},
|
132 |
+
{
|
133 |
+
"name": "tr-wikihow-summ",
|
134 |
+
"task": "summarization",
|
135 |
+
"rouge1": 0.21593529633420477,
|
136 |
+
"rouge2": 0.06503735021906278,
|
137 |
+
"rougeL": 0.15632749796055023
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"name": "wiki_lingua_tr",
|
141 |
+
"task": "summarization",
|
142 |
+
"rouge1": 0.2043150922082644,
|
143 |
+
"rouge2": 0.06428071184104861,
|
144 |
+
"rougeL": 0.1502017339873826
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"name": "wmt-tr-en-prompt",
|
148 |
+
"task": "machine_translation",
|
149 |
+
"wer": 0.9944581931250852,
|
150 |
+
"bleu": 0.07893246404648234
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"name": "xlsum_tr",
|
154 |
+
"task": "summarization",
|
155 |
+
"rouge1": 0.2718989612690207,
|
156 |
+
"rouge2": 0.12031087767355472,
|
157 |
+
"rougeL": 0.21504760362022454
|
158 |
+
}
|
159 |
+
]
|
160 |
+
}
|