Ilker Kesen commited on
Commit
6aca973
·
1 Parent(s): 280375b

include llama-3.2-3b in the leaderboard

Browse files
Files changed (1) hide show
  1. results/zero-shot/llama-3.2-3b.json +160 -0
results/zero-shot/llama-3.2-3b.json ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model": "meta-llama/Llama-3.2-3B",
4
+ "api": "hf",
5
+ "dtype": "bfloat16",
6
+ "max_length": 131072,
7
+ "architecture": "LlamaForCausalLM",
8
+ "type": "pretrained",
9
+ "num_parameters": "3b"
10
+ },
11
+ "results": [
12
+ {
13
+ "name": "belebele_tr",
14
+ "task": "multiple_choice",
15
+ "acc": 0.47333333333333333,
16
+ "acc_norm": 0.47333333333333333
17
+ },
18
+ {
19
+ "name": "check_worthiness",
20
+ "task": "multiple_choice",
21
+ "acc": 0.37614259597806216,
22
+ "acc_norm": 0.42458866544789764
23
+ },
24
+ {
25
+ "name": "ironytr",
26
+ "task": "text_classification",
27
+ "acc": 0.5,
28
+ "acc_norm": 0.5116666666666667
29
+ },
30
+ {
31
+ "name": "mkqa_tr",
32
+ "task": "extractive_question_answering",
33
+ "exact_match": 0.06007694584196508,
34
+ "f1": 0.10654344736882515
35
+ },
36
+ {
37
+ "name": "mnli_tr",
38
+ "task": "natural_language_inference",
39
+ "acc": 0.3488,
40
+ "acc_norm": 0.3478
41
+ },
42
+ {
43
+ "name": "news_cat",
44
+ "task": "text_classification",
45
+ "acc": 0.664,
46
+ "acc_norm": 0.544
47
+ },
48
+ {
49
+ "name": "offenseval_tr",
50
+ "task": "text_classification",
51
+ "acc": 0.25396825396825395,
52
+ "acc_norm": 0.677437641723356
53
+ },
54
+ {
55
+ "name": "relevance_judgment",
56
+ "task": "multiple_choice",
57
+ "acc": 0.42230347349177333,
58
+ "acc_norm": 0.4227605118829982
59
+ },
60
+ {
61
+ "name": "snli_tr",
62
+ "task": "natural_language_inference",
63
+ "acc": 0.336,
64
+ "acc_norm": 0.3368
65
+ },
66
+ {
67
+ "name": "sts_tr",
68
+ "task": "text_classification",
69
+ "acc": 0.1319796954314721,
70
+ "acc_norm": 0.20667150108774474
71
+ },
72
+ {
73
+ "name": "tquad",
74
+ "task": "extractive_question_answering",
75
+ "exact_match": 0.21188340807174888,
76
+ "f1": 0.4583574684635767
77
+ },
78
+ {
79
+ "name": "turkish_plu_goal_inference",
80
+ "task": "multiple_choice",
81
+ "acc": 0.3906810035842294,
82
+ "acc_norm": 0.3906810035842294
83
+ },
84
+ {
85
+ "name": "turkish_plu_next_event_prediction",
86
+ "task": "multiple_choice",
87
+ "acc": 0.4122137404580153,
88
+ "acc_norm": 0.5389312977099237
89
+ },
90
+ {
91
+ "name": "turkish_plu_step_inference",
92
+ "task": "multiple_choice",
93
+ "acc": 0.30718954248366015,
94
+ "acc_norm": 0.4493464052287582
95
+ },
96
+ {
97
+ "name": "turkish_plu_step_ordering",
98
+ "task": "multiple_choice",
99
+ "acc": 0.5974534769833496,
100
+ "acc_norm": 0.5974534769833496
101
+ },
102
+ {
103
+ "name": "xcopa_tr",
104
+ "task": "multiple_choice",
105
+ "acc": 0.57,
106
+ "acc_norm": 0.57
107
+ },
108
+ {
109
+ "name": "xnli_tr",
110
+ "task": "natural_language_inference",
111
+ "acc": 0.457429718875502,
112
+ "acc_norm": 0.457429718875502
113
+ },
114
+ {
115
+ "name": "xquad_tr",
116
+ "task": "extractive_question_answering",
117
+ "exact_match": 0.15546218487394958,
118
+ "f1": 0.27803412264497246
119
+ },
120
+ {
121
+ "name": "gecturk_generation",
122
+ "task": "grammatical_error_correction",
123
+ "exact_match": 0.004670422263951081
124
+ },
125
+ {
126
+ "name": "mlsum_tr",
127
+ "task": "summarization",
128
+ "rouge1": 0.365518560170642,
129
+ "rouge2": 0.23287817844657932,
130
+ "rougeL": 0.3078744009567239
131
+ },
132
+ {
133
+ "name": "tr-wikihow-summ",
134
+ "task": "summarization",
135
+ "rouge1": 0.21593529633420477,
136
+ "rouge2": 0.06503735021906278,
137
+ "rougeL": 0.15632749796055023
138
+ },
139
+ {
140
+ "name": "wiki_lingua_tr",
141
+ "task": "summarization",
142
+ "rouge1": 0.2043150922082644,
143
+ "rouge2": 0.06428071184104861,
144
+ "rougeL": 0.1502017339873826
145
+ },
146
+ {
147
+ "name": "wmt-tr-en-prompt",
148
+ "task": "machine_translation",
149
+ "wer": 0.9944581931250852,
150
+ "bleu": 0.07893246404648234
151
+ },
152
+ {
153
+ "name": "xlsum_tr",
154
+ "task": "summarization",
155
+ "rouge1": 0.2718989612690207,
156
+ "rouge2": 0.12031087767355472,
157
+ "rougeL": 0.21504760362022454
158
+ }
159
+ ]
160
+ }