lastrucci01 commited on
Commit
1e73ccc
1 Parent(s): 3557e25

update README & removed bad files

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ tags:
15
  ---
16
 
17
 
18
- # [en-ss] Siswati to English Translation Model
19
 
20
  Based on M2M100 model using The ZA-gov-multilingual and Vuk'uzenzele South African multilingual corpora, and the SADiLaR Bilingual English-Siswati Corpus.
21
 
 
15
  ---
16
 
17
 
18
+ # [ss-en] Siswati to English Translation Model
19
 
20
  Based on M2M100 model using The ZA-gov-multilingual and Vuk'uzenzele South African multilingual corpora, and the SADiLaR Bilingual English-Siswati Corpus.
21
 
added_tokens.json DELETED
@@ -1,102 +0,0 @@
1
- {
2
- "__af__": 128004,
3
- "__am__": 128005,
4
- "__ar__": 128006,
5
- "__ast__": 128007,
6
- "__az__": 128008,
7
- "__ba__": 128009,
8
- "__be__": 128010,
9
- "__bg__": 128011,
10
- "__bn__": 128012,
11
- "__br__": 128013,
12
- "__bs__": 128014,
13
- "__ca__": 128015,
14
- "__ceb__": 128016,
15
- "__cs__": 128017,
16
- "__cy__": 128018,
17
- "__da__": 128019,
18
- "__de__": 128020,
19
- "__el__": 128021,
20
- "__en__": 128022,
21
- "__es__": 128023,
22
- "__et__": 128024,
23
- "__fa__": 128025,
24
- "__ff__": 128026,
25
- "__fi__": 128027,
26
- "__fr__": 128028,
27
- "__fy__": 128029,
28
- "__ga__": 128030,
29
- "__gd__": 128031,
30
- "__gl__": 128032,
31
- "__gu__": 128033,
32
- "__ha__": 128034,
33
- "__he__": 128035,
34
- "__hi__": 128036,
35
- "__hr__": 128037,
36
- "__ht__": 128038,
37
- "__hu__": 128039,
38
- "__hy__": 128040,
39
- "__id__": 128041,
40
- "__ig__": 128042,
41
- "__ilo__": 128043,
42
- "__is__": 128044,
43
- "__it__": 128045,
44
- "__ja__": 128046,
45
- "__jv__": 128047,
46
- "__ka__": 128048,
47
- "__kk__": 128049,
48
- "__km__": 128050,
49
- "__kn__": 128051,
50
- "__ko__": 128052,
51
- "__lb__": 128053,
52
- "__lg__": 128054,
53
- "__ln__": 128055,
54
- "__lo__": 128056,
55
- "__lt__": 128057,
56
- "__lv__": 128058,
57
- "__mg__": 128059,
58
- "__mk__": 128060,
59
- "__ml__": 128061,
60
- "__mn__": 128062,
61
- "__mr__": 128063,
62
- "__ms__": 128064,
63
- "__my__": 128065,
64
- "__ne__": 128066,
65
- "__nl__": 128067,
66
- "__no__": 128068,
67
- "__ns__": 128069,
68
- "__oc__": 128070,
69
- "__or__": 128071,
70
- "__pa__": 128072,
71
- "__pl__": 128073,
72
- "__ps__": 128074,
73
- "__pt__": 128075,
74
- "__ro__": 128076,
75
- "__ru__": 128077,
76
- "__sd__": 128078,
77
- "__si__": 128079,
78
- "__sk__": 128080,
79
- "__sl__": 128081,
80
- "__so__": 128082,
81
- "__sq__": 128083,
82
- "__sr__": 128084,
83
- "__ss__": 128085,
84
- "__su__": 128086,
85
- "__sv__": 128087,
86
- "__sw__": 128088,
87
- "__ta__": 128089,
88
- "__th__": 128090,
89
- "__tl__": 128091,
90
- "__tn__": 128092,
91
- "__tr__": 128093,
92
- "__uk__": 128094,
93
- "__ur__": 128095,
94
- "__uz__": 128096,
95
- "__vi__": 128097,
96
- "__wo__": 128098,
97
- "__xh__": 128099,
98
- "__yi__": 128100,
99
- "__yo__": 128101,
100
- "__zh__": 128102,
101
- "__zu__": 128103
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
all_results.json DELETED
@@ -1,22 +0,0 @@
1
- {
2
- "epoch": 3.0,
3
- "eval_bleu": 16.3977,
4
- "eval_gen_len": 42.3408,
5
- "eval_loss": 1.1407965421676636,
6
- "eval_runtime": 1872.385,
7
- "eval_samples": 13308,
8
- "eval_samples_per_second": 7.108,
9
- "eval_steps_per_second": 0.889,
10
- "predict_bleu": 16.4443,
11
- "predict_gen_len": 42.1159,
12
- "predict_loss": 1.1480013132095337,
13
- "predict_runtime": 1868.3522,
14
- "predict_samples": 13307,
15
- "predict_samples_per_second": 7.122,
16
- "predict_steps_per_second": 0.891,
17
- "train_loss": 1.3682204318087179,
18
- "train_runtime": 30390.702,
19
- "train_samples": 106459,
20
- "train_samples_per_second": 10.509,
21
- "train_steps_per_second": 1.314
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/m2m100_418M",
3
- "activation_dropout": 0.0,
4
- "activation_function": "relu",
5
- "architectures": [
6
- "M2M100ForConditionalGeneration"
7
- ],
8
- "attention_dropout": 0.1,
9
- "bos_token_id": 0,
10
- "d_model": 1024,
11
- "decoder_attention_heads": 16,
12
- "decoder_ffn_dim": 4096,
13
- "decoder_layerdrop": 0.05,
14
- "decoder_layers": 12,
15
- "decoder_start_token_id": 2,
16
- "dropout": 0.1,
17
- "early_stopping": true,
18
- "encoder_attention_heads": 16,
19
- "encoder_ffn_dim": 4096,
20
- "encoder_layerdrop": 0.05,
21
- "encoder_layers": 12,
22
- "eos_token_id": 2,
23
- "forced_bos_token_id": 128088,
24
- "gradient_checkpointing": false,
25
- "init_std": 0.02,
26
- "is_encoder_decoder": true,
27
- "max_length": 200,
28
- "max_position_embeddings": 1024,
29
- "model_type": "m2m_100",
30
- "num_beams": 5,
31
- "num_hidden_layers": 12,
32
- "pad_token_id": 1,
33
- "scale_embedding": true,
34
- "torch_dtype": "float32",
35
- "transformers_version": "4.36.2",
36
- "use_cache": true,
37
- "vocab_size": 128104
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eval_results.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "epoch": 3.0,
3
- "eval_bleu": 16.3977,
4
- "eval_gen_len": 42.3408,
5
- "eval_loss": 1.1407965421676636,
6
- "eval_runtime": 1872.385,
7
- "eval_samples": 13308,
8
- "eval_samples_per_second": 7.108,
9
- "eval_steps_per_second": 0.889
10
- }
 
 
 
 
 
 
 
 
 
 
 
generated_predictions.txt DELETED
The diff for this file is too large to render. See raw diff
 
generation_config.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "decoder_start_token_id": 2,
5
- "early_stopping": true,
6
- "eos_token_id": 2,
7
- "max_length": 200,
8
- "num_beams": 5,
9
- "pad_token_id": 1,
10
- "transformers_version": "4.36.2"
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:93370cac65e0da4791512aa2a94084afd015faa38e479a4c8bd59de1a03c0fd3
3
- size 1935649120
 
 
 
 
predict_results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "predict_bleu": 16.4443,
3
- "predict_gen_len": 42.1159,
4
- "predict_loss": 1.1480013132095337,
5
- "predict_runtime": 1868.3522,
6
- "predict_samples": 13307,
7
- "predict_samples_per_second": 7.122,
8
- "predict_steps_per_second": 0.891
9
- }
 
 
 
 
 
 
 
 
 
 
sentencepiece.bpe.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a
3
- size 2423393
 
 
 
 
special_tokens_map.json DELETED
@@ -1,109 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "__af__",
4
- "__am__",
5
- "__ar__",
6
- "__ast__",
7
- "__az__",
8
- "__ba__",
9
- "__be__",
10
- "__bg__",
11
- "__bn__",
12
- "__br__",
13
- "__bs__",
14
- "__ca__",
15
- "__ceb__",
16
- "__cs__",
17
- "__cy__",
18
- "__da__",
19
- "__de__",
20
- "__el__",
21
- "__en__",
22
- "__es__",
23
- "__et__",
24
- "__fa__",
25
- "__ff__",
26
- "__fi__",
27
- "__fr__",
28
- "__fy__",
29
- "__ga__",
30
- "__gd__",
31
- "__gl__",
32
- "__gu__",
33
- "__ha__",
34
- "__he__",
35
- "__hi__",
36
- "__hr__",
37
- "__ht__",
38
- "__hu__",
39
- "__hy__",
40
- "__id__",
41
- "__ig__",
42
- "__ilo__",
43
- "__is__",
44
- "__it__",
45
- "__ja__",
46
- "__jv__",
47
- "__ka__",
48
- "__kk__",
49
- "__km__",
50
- "__kn__",
51
- "__ko__",
52
- "__lb__",
53
- "__lg__",
54
- "__ln__",
55
- "__lo__",
56
- "__lt__",
57
- "__lv__",
58
- "__mg__",
59
- "__mk__",
60
- "__ml__",
61
- "__mn__",
62
- "__mr__",
63
- "__ms__",
64
- "__my__",
65
- "__ne__",
66
- "__nl__",
67
- "__no__",
68
- "__ns__",
69
- "__oc__",
70
- "__or__",
71
- "__pa__",
72
- "__pl__",
73
- "__ps__",
74
- "__pt__",
75
- "__ro__",
76
- "__ru__",
77
- "__sd__",
78
- "__si__",
79
- "__sk__",
80
- "__sl__",
81
- "__so__",
82
- "__sq__",
83
- "__sr__",
84
- "__ss__",
85
- "__su__",
86
- "__sv__",
87
- "__sw__",
88
- "__ta__",
89
- "__th__",
90
- "__tl__",
91
- "__tn__",
92
- "__tr__",
93
- "__uk__",
94
- "__ur__",
95
- "__uz__",
96
- "__vi__",
97
- "__wo__",
98
- "__xh__",
99
- "__yi__",
100
- "__yo__",
101
- "__zh__",
102
- "__zu__"
103
- ],
104
- "bos_token": "<s>",
105
- "eos_token": "</s>",
106
- "pad_token": "<pad>",
107
- "sep_token": "</s>",
108
- "unk_token": "<unk>"
109
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json DELETED
@@ -1,951 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "<pad>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "</s>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "128004": {
36
- "content": "__af__",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "128005": {
44
- "content": "__am__",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "128006": {
52
- "content": "__ar__",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "128007": {
60
- "content": "__ast__",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "128008": {
68
- "content": "__az__",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "128009": {
76
- "content": "__ba__",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "128010": {
84
- "content": "__be__",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": true
90
- },
91
- "128011": {
92
- "content": "__bg__",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": true
98
- },
99
- "128012": {
100
- "content": "__bn__",
101
- "lstrip": false,
102
- "normalized": false,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": true
106
- },
107
- "128013": {
108
- "content": "__br__",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": true
114
- },
115
- "128014": {
116
- "content": "__bs__",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "128015": {
124
- "content": "__ca__",
125
- "lstrip": false,
126
- "normalized": false,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "128016": {
132
- "content": "__ceb__",
133
- "lstrip": false,
134
- "normalized": false,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "128017": {
140
- "content": "__cs__",
141
- "lstrip": false,
142
- "normalized": false,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "128018": {
148
- "content": "__cy__",
149
- "lstrip": false,
150
- "normalized": false,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "128019": {
156
- "content": "__da__",
157
- "lstrip": false,
158
- "normalized": false,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": true
162
- },
163
- "128020": {
164
- "content": "__de__",
165
- "lstrip": false,
166
- "normalized": false,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": true
170
- },
171
- "128021": {
172
- "content": "__el__",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": true
178
- },
179
- "128022": {
180
- "content": "__en__",
181
- "lstrip": false,
182
- "normalized": false,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": true
186
- },
187
- "128023": {
188
- "content": "__es__",
189
- "lstrip": false,
190
- "normalized": false,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": true
194
- },
195
- "128024": {
196
- "content": "__et__",
197
- "lstrip": false,
198
- "normalized": false,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": true
202
- },
203
- "128025": {
204
- "content": "__fa__",
205
- "lstrip": false,
206
- "normalized": false,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": true
210
- },
211
- "128026": {
212
- "content": "__ff__",
213
- "lstrip": false,
214
- "normalized": false,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": true
218
- },
219
- "128027": {
220
- "content": "__fi__",
221
- "lstrip": false,
222
- "normalized": false,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": true
226
- },
227
- "128028": {
228
- "content": "__fr__",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": true
234
- },
235
- "128029": {
236
- "content": "__fy__",
237
- "lstrip": false,
238
- "normalized": false,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": true
242
- },
243
- "128030": {
244
- "content": "__ga__",
245
- "lstrip": false,
246
- "normalized": false,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": true
250
- },
251
- "128031": {
252
- "content": "__gd__",
253
- "lstrip": false,
254
- "normalized": false,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": true
258
- },
259
- "128032": {
260
- "content": "__gl__",
261
- "lstrip": false,
262
- "normalized": false,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": true
266
- },
267
- "128033": {
268
- "content": "__gu__",
269
- "lstrip": false,
270
- "normalized": false,
271
- "rstrip": false,
272
- "single_word": false,
273
- "special": true
274
- },
275
- "128034": {
276
- "content": "__ha__",
277
- "lstrip": false,
278
- "normalized": false,
279
- "rstrip": false,
280
- "single_word": false,
281
- "special": true
282
- },
283
- "128035": {
284
- "content": "__he__",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false,
289
- "special": true
290
- },
291
- "128036": {
292
- "content": "__hi__",
293
- "lstrip": false,
294
- "normalized": false,
295
- "rstrip": false,
296
- "single_word": false,
297
- "special": true
298
- },
299
- "128037": {
300
- "content": "__hr__",
301
- "lstrip": false,
302
- "normalized": false,
303
- "rstrip": false,
304
- "single_word": false,
305
- "special": true
306
- },
307
- "128038": {
308
- "content": "__ht__",
309
- "lstrip": false,
310
- "normalized": false,
311
- "rstrip": false,
312
- "single_word": false,
313
- "special": true
314
- },
315
- "128039": {
316
- "content": "__hu__",
317
- "lstrip": false,
318
- "normalized": false,
319
- "rstrip": false,
320
- "single_word": false,
321
- "special": true
322
- },
323
- "128040": {
324
- "content": "__hy__",
325
- "lstrip": false,
326
- "normalized": false,
327
- "rstrip": false,
328
- "single_word": false,
329
- "special": true
330
- },
331
- "128041": {
332
- "content": "__id__",
333
- "lstrip": false,
334
- "normalized": false,
335
- "rstrip": false,
336
- "single_word": false,
337
- "special": true
338
- },
339
- "128042": {
340
- "content": "__ig__",
341
- "lstrip": false,
342
- "normalized": false,
343
- "rstrip": false,
344
- "single_word": false,
345
- "special": true
346
- },
347
- "128043": {
348
- "content": "__ilo__",
349
- "lstrip": false,
350
- "normalized": false,
351
- "rstrip": false,
352
- "single_word": false,
353
- "special": true
354
- },
355
- "128044": {
356
- "content": "__is__",
357
- "lstrip": false,
358
- "normalized": false,
359
- "rstrip": false,
360
- "single_word": false,
361
- "special": true
362
- },
363
- "128045": {
364
- "content": "__it__",
365
- "lstrip": false,
366
- "normalized": false,
367
- "rstrip": false,
368
- "single_word": false,
369
- "special": true
370
- },
371
- "128046": {
372
- "content": "__ja__",
373
- "lstrip": false,
374
- "normalized": false,
375
- "rstrip": false,
376
- "single_word": false,
377
- "special": true
378
- },
379
- "128047": {
380
- "content": "__jv__",
381
- "lstrip": false,
382
- "normalized": false,
383
- "rstrip": false,
384
- "single_word": false,
385
- "special": true
386
- },
387
- "128048": {
388
- "content": "__ka__",
389
- "lstrip": false,
390
- "normalized": false,
391
- "rstrip": false,
392
- "single_word": false,
393
- "special": true
394
- },
395
- "128049": {
396
- "content": "__kk__",
397
- "lstrip": false,
398
- "normalized": false,
399
- "rstrip": false,
400
- "single_word": false,
401
- "special": true
402
- },
403
- "128050": {
404
- "content": "__km__",
405
- "lstrip": false,
406
- "normalized": false,
407
- "rstrip": false,
408
- "single_word": false,
409
- "special": true
410
- },
411
- "128051": {
412
- "content": "__kn__",
413
- "lstrip": false,
414
- "normalized": false,
415
- "rstrip": false,
416
- "single_word": false,
417
- "special": true
418
- },
419
- "128052": {
420
- "content": "__ko__",
421
- "lstrip": false,
422
- "normalized": false,
423
- "rstrip": false,
424
- "single_word": false,
425
- "special": true
426
- },
427
- "128053": {
428
- "content": "__lb__",
429
- "lstrip": false,
430
- "normalized": false,
431
- "rstrip": false,
432
- "single_word": false,
433
- "special": true
434
- },
435
- "128054": {
436
- "content": "__lg__",
437
- "lstrip": false,
438
- "normalized": false,
439
- "rstrip": false,
440
- "single_word": false,
441
- "special": true
442
- },
443
- "128055": {
444
- "content": "__ln__",
445
- "lstrip": false,
446
- "normalized": false,
447
- "rstrip": false,
448
- "single_word": false,
449
- "special": true
450
- },
451
- "128056": {
452
- "content": "__lo__",
453
- "lstrip": false,
454
- "normalized": false,
455
- "rstrip": false,
456
- "single_word": false,
457
- "special": true
458
- },
459
- "128057": {
460
- "content": "__lt__",
461
- "lstrip": false,
462
- "normalized": false,
463
- "rstrip": false,
464
- "single_word": false,
465
- "special": true
466
- },
467
- "128058": {
468
- "content": "__lv__",
469
- "lstrip": false,
470
- "normalized": false,
471
- "rstrip": false,
472
- "single_word": false,
473
- "special": true
474
- },
475
- "128059": {
476
- "content": "__mg__",
477
- "lstrip": false,
478
- "normalized": false,
479
- "rstrip": false,
480
- "single_word": false,
481
- "special": true
482
- },
483
- "128060": {
484
- "content": "__mk__",
485
- "lstrip": false,
486
- "normalized": false,
487
- "rstrip": false,
488
- "single_word": false,
489
- "special": true
490
- },
491
- "128061": {
492
- "content": "__ml__",
493
- "lstrip": false,
494
- "normalized": false,
495
- "rstrip": false,
496
- "single_word": false,
497
- "special": true
498
- },
499
- "128062": {
500
- "content": "__mn__",
501
- "lstrip": false,
502
- "normalized": false,
503
- "rstrip": false,
504
- "single_word": false,
505
- "special": true
506
- },
507
- "128063": {
508
- "content": "__mr__",
509
- "lstrip": false,
510
- "normalized": false,
511
- "rstrip": false,
512
- "single_word": false,
513
- "special": true
514
- },
515
- "128064": {
516
- "content": "__ms__",
517
- "lstrip": false,
518
- "normalized": false,
519
- "rstrip": false,
520
- "single_word": false,
521
- "special": true
522
- },
523
- "128065": {
524
- "content": "__my__",
525
- "lstrip": false,
526
- "normalized": false,
527
- "rstrip": false,
528
- "single_word": false,
529
- "special": true
530
- },
531
- "128066": {
532
- "content": "__ne__",
533
- "lstrip": false,
534
- "normalized": false,
535
- "rstrip": false,
536
- "single_word": false,
537
- "special": true
538
- },
539
- "128067": {
540
- "content": "__nl__",
541
- "lstrip": false,
542
- "normalized": false,
543
- "rstrip": false,
544
- "single_word": false,
545
- "special": true
546
- },
547
- "128068": {
548
- "content": "__no__",
549
- "lstrip": false,
550
- "normalized": false,
551
- "rstrip": false,
552
- "single_word": false,
553
- "special": true
554
- },
555
- "128069": {
556
- "content": "__ns__",
557
- "lstrip": false,
558
- "normalized": false,
559
- "rstrip": false,
560
- "single_word": false,
561
- "special": true
562
- },
563
- "128070": {
564
- "content": "__oc__",
565
- "lstrip": false,
566
- "normalized": false,
567
- "rstrip": false,
568
- "single_word": false,
569
- "special": true
570
- },
571
- "128071": {
572
- "content": "__or__",
573
- "lstrip": false,
574
- "normalized": false,
575
- "rstrip": false,
576
- "single_word": false,
577
- "special": true
578
- },
579
- "128072": {
580
- "content": "__pa__",
581
- "lstrip": false,
582
- "normalized": false,
583
- "rstrip": false,
584
- "single_word": false,
585
- "special": true
586
- },
587
- "128073": {
588
- "content": "__pl__",
589
- "lstrip": false,
590
- "normalized": false,
591
- "rstrip": false,
592
- "single_word": false,
593
- "special": true
594
- },
595
- "128074": {
596
- "content": "__ps__",
597
- "lstrip": false,
598
- "normalized": false,
599
- "rstrip": false,
600
- "single_word": false,
601
- "special": true
602
- },
603
- "128075": {
604
- "content": "__pt__",
605
- "lstrip": false,
606
- "normalized": false,
607
- "rstrip": false,
608
- "single_word": false,
609
- "special": true
610
- },
611
- "128076": {
612
- "content": "__ro__",
613
- "lstrip": false,
614
- "normalized": false,
615
- "rstrip": false,
616
- "single_word": false,
617
- "special": true
618
- },
619
- "128077": {
620
- "content": "__ru__",
621
- "lstrip": false,
622
- "normalized": false,
623
- "rstrip": false,
624
- "single_word": false,
625
- "special": true
626
- },
627
- "128078": {
628
- "content": "__sd__",
629
- "lstrip": false,
630
- "normalized": false,
631
- "rstrip": false,
632
- "single_word": false,
633
- "special": true
634
- },
635
- "128079": {
636
- "content": "__si__",
637
- "lstrip": false,
638
- "normalized": false,
639
- "rstrip": false,
640
- "single_word": false,
641
- "special": true
642
- },
643
- "128080": {
644
- "content": "__sk__",
645
- "lstrip": false,
646
- "normalized": false,
647
- "rstrip": false,
648
- "single_word": false,
649
- "special": true
650
- },
651
- "128081": {
652
- "content": "__sl__",
653
- "lstrip": false,
654
- "normalized": false,
655
- "rstrip": false,
656
- "single_word": false,
657
- "special": true
658
- },
659
- "128082": {
660
- "content": "__so__",
661
- "lstrip": false,
662
- "normalized": false,
663
- "rstrip": false,
664
- "single_word": false,
665
- "special": true
666
- },
667
- "128083": {
668
- "content": "__sq__",
669
- "lstrip": false,
670
- "normalized": false,
671
- "rstrip": false,
672
- "single_word": false,
673
- "special": true
674
- },
675
- "128084": {
676
- "content": "__sr__",
677
- "lstrip": false,
678
- "normalized": false,
679
- "rstrip": false,
680
- "single_word": false,
681
- "special": true
682
- },
683
- "128085": {
684
- "content": "__ss__",
685
- "lstrip": false,
686
- "normalized": false,
687
- "rstrip": false,
688
- "single_word": false,
689
- "special": true
690
- },
691
- "128086": {
692
- "content": "__su__",
693
- "lstrip": false,
694
- "normalized": false,
695
- "rstrip": false,
696
- "single_word": false,
697
- "special": true
698
- },
699
- "128087": {
700
- "content": "__sv__",
701
- "lstrip": false,
702
- "normalized": false,
703
- "rstrip": false,
704
- "single_word": false,
705
- "special": true
706
- },
707
- "128088": {
708
- "content": "__sw__",
709
- "lstrip": false,
710
- "normalized": false,
711
- "rstrip": false,
712
- "single_word": false,
713
- "special": true
714
- },
715
- "128089": {
716
- "content": "__ta__",
717
- "lstrip": false,
718
- "normalized": false,
719
- "rstrip": false,
720
- "single_word": false,
721
- "special": true
722
- },
723
- "128090": {
724
- "content": "__th__",
725
- "lstrip": false,
726
- "normalized": false,
727
- "rstrip": false,
728
- "single_word": false,
729
- "special": true
730
- },
731
- "128091": {
732
- "content": "__tl__",
733
- "lstrip": false,
734
- "normalized": false,
735
- "rstrip": false,
736
- "single_word": false,
737
- "special": true
738
- },
739
- "128092": {
740
- "content": "__tn__",
741
- "lstrip": false,
742
- "normalized": false,
743
- "rstrip": false,
744
- "single_word": false,
745
- "special": true
746
- },
747
- "128093": {
748
- "content": "__tr__",
749
- "lstrip": false,
750
- "normalized": false,
751
- "rstrip": false,
752
- "single_word": false,
753
- "special": true
754
- },
755
- "128094": {
756
- "content": "__uk__",
757
- "lstrip": false,
758
- "normalized": false,
759
- "rstrip": false,
760
- "single_word": false,
761
- "special": true
762
- },
763
- "128095": {
764
- "content": "__ur__",
765
- "lstrip": false,
766
- "normalized": false,
767
- "rstrip": false,
768
- "single_word": false,
769
- "special": true
770
- },
771
- "128096": {
772
- "content": "__uz__",
773
- "lstrip": false,
774
- "normalized": false,
775
- "rstrip": false,
776
- "single_word": false,
777
- "special": true
778
- },
779
- "128097": {
780
- "content": "__vi__",
781
- "lstrip": false,
782
- "normalized": false,
783
- "rstrip": false,
784
- "single_word": false,
785
- "special": true
786
- },
787
- "128098": {
788
- "content": "__wo__",
789
- "lstrip": false,
790
- "normalized": false,
791
- "rstrip": false,
792
- "single_word": false,
793
- "special": true
794
- },
795
- "128099": {
796
- "content": "__xh__",
797
- "lstrip": false,
798
- "normalized": false,
799
- "rstrip": false,
800
- "single_word": false,
801
- "special": true
802
- },
803
- "128100": {
804
- "content": "__yi__",
805
- "lstrip": false,
806
- "normalized": false,
807
- "rstrip": false,
808
- "single_word": false,
809
- "special": true
810
- },
811
- "128101": {
812
- "content": "__yo__",
813
- "lstrip": false,
814
- "normalized": false,
815
- "rstrip": false,
816
- "single_word": false,
817
- "special": true
818
- },
819
- "128102": {
820
- "content": "__zh__",
821
- "lstrip": false,
822
- "normalized": false,
823
- "rstrip": false,
824
- "single_word": false,
825
- "special": true
826
- },
827
- "128103": {
828
- "content": "__zu__",
829
- "lstrip": false,
830
- "normalized": false,
831
- "rstrip": false,
832
- "single_word": false,
833
- "special": true
834
- }
835
- },
836
- "additional_special_tokens": [
837
- "__af__",
838
- "__am__",
839
- "__ar__",
840
- "__ast__",
841
- "__az__",
842
- "__ba__",
843
- "__be__",
844
- "__bg__",
845
- "__bn__",
846
- "__br__",
847
- "__bs__",
848
- "__ca__",
849
- "__ceb__",
850
- "__cs__",
851
- "__cy__",
852
- "__da__",
853
- "__de__",
854
- "__el__",
855
- "__en__",
856
- "__es__",
857
- "__et__",
858
- "__fa__",
859
- "__ff__",
860
- "__fi__",
861
- "__fr__",
862
- "__fy__",
863
- "__ga__",
864
- "__gd__",
865
- "__gl__",
866
- "__gu__",
867
- "__ha__",
868
- "__he__",
869
- "__hi__",
870
- "__hr__",
871
- "__ht__",
872
- "__hu__",
873
- "__hy__",
874
- "__id__",
875
- "__ig__",
876
- "__ilo__",
877
- "__is__",
878
- "__it__",
879
- "__ja__",
880
- "__jv__",
881
- "__ka__",
882
- "__kk__",
883
- "__km__",
884
- "__kn__",
885
- "__ko__",
886
- "__lb__",
887
- "__lg__",
888
- "__ln__",
889
- "__lo__",
890
- "__lt__",
891
- "__lv__",
892
- "__mg__",
893
- "__mk__",
894
- "__ml__",
895
- "__mn__",
896
- "__mr__",
897
- "__ms__",
898
- "__my__",
899
- "__ne__",
900
- "__nl__",
901
- "__no__",
902
- "__ns__",
903
- "__oc__",
904
- "__or__",
905
- "__pa__",
906
- "__pl__",
907
- "__ps__",
908
- "__pt__",
909
- "__ro__",
910
- "__ru__",
911
- "__sd__",
912
- "__si__",
913
- "__sk__",
914
- "__sl__",
915
- "__so__",
916
- "__sq__",
917
- "__sr__",
918
- "__ss__",
919
- "__su__",
920
- "__sv__",
921
- "__sw__",
922
- "__ta__",
923
- "__th__",
924
- "__tl__",
925
- "__tn__",
926
- "__tr__",
927
- "__uk__",
928
- "__ur__",
929
- "__uz__",
930
- "__vi__",
931
- "__wo__",
932
- "__xh__",
933
- "__yi__",
934
- "__yo__",
935
- "__zh__",
936
- "__zu__"
937
- ],
938
- "bos_token": "<s>",
939
- "clean_up_tokenization_spaces": true,
940
- "eos_token": "</s>",
941
- "language_codes": "m2m100",
942
- "model_max_length": 1024,
943
- "num_madeup_words": 8,
944
- "pad_token": "<pad>",
945
- "sep_token": "</s>",
946
- "sp_model_kwargs": {},
947
- "src_lang": "en",
948
- "tgt_lang": "ss",
949
- "tokenizer_class": "M2M100Tokenizer",
950
- "unk_token": "<unk>"
951
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
train_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 3.0,
3
- "train_loss": 1.3682204318087179,
4
- "train_runtime": 30390.702,
5
- "train_samples": 106459,
6
- "train_samples_per_second": 10.509,
7
- "train_steps_per_second": 1.314
8
- }
 
 
 
 
 
 
 
 
 
trainer_state.json DELETED
@@ -1,504 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "eval_steps": 500,
6
- "global_step": 39924,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.04,
13
- "learning_rate": 4.9373810239454966e-05,
14
- "loss": 3.827,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.08,
19
- "learning_rate": 4.874762047890993e-05,
20
- "loss": 2.8494,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.11,
25
- "learning_rate": 4.8121430718364894e-05,
26
- "loss": 2.4906,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.15,
31
- "learning_rate": 4.749524095781986e-05,
32
- "loss": 2.2834,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.19,
37
- "learning_rate": 4.686905119727483e-05,
38
- "loss": 2.1856,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.23,
43
- "learning_rate": 4.624286143672979e-05,
44
- "loss": 2.0726,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.26,
49
- "learning_rate": 4.5616671676184756e-05,
50
- "loss": 1.9825,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 0.3,
55
- "learning_rate": 4.499048191563971e-05,
56
- "loss": 1.9294,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.34,
61
- "learning_rate": 4.436429215509468e-05,
62
- "loss": 1.8662,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 0.38,
67
- "learning_rate": 4.373810239454965e-05,
68
- "loss": 1.808,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 0.41,
73
- "learning_rate": 4.311191263400461e-05,
74
- "loss": 1.7765,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 0.45,
79
- "learning_rate": 4.2485722873459575e-05,
80
- "loss": 1.7502,
81
- "step": 6000
82
- },
83
- {
84
- "epoch": 0.49,
85
- "learning_rate": 4.185953311291454e-05,
86
- "loss": 1.7174,
87
- "step": 6500
88
- },
89
- {
90
- "epoch": 0.53,
91
- "learning_rate": 4.12333433523695e-05,
92
- "loss": 1.7021,
93
- "step": 7000
94
- },
95
- {
96
- "epoch": 0.56,
97
- "learning_rate": 4.060715359182447e-05,
98
- "loss": 1.6772,
99
- "step": 7500
100
- },
101
- {
102
- "epoch": 0.6,
103
- "learning_rate": 3.9980963831279437e-05,
104
- "loss": 1.641,
105
- "step": 8000
106
- },
107
- {
108
- "epoch": 0.64,
109
- "learning_rate": 3.9354774070734394e-05,
110
- "loss": 1.6182,
111
- "step": 8500
112
- },
113
- {
114
- "epoch": 0.68,
115
- "learning_rate": 3.8728584310189364e-05,
116
- "loss": 1.5966,
117
- "step": 9000
118
- },
119
- {
120
- "epoch": 0.71,
121
- "learning_rate": 3.810239454964433e-05,
122
- "loss": 1.6164,
123
- "step": 9500
124
- },
125
- {
126
- "epoch": 0.75,
127
- "learning_rate": 3.747620478909929e-05,
128
- "loss": 1.5659,
129
- "step": 10000
130
- },
131
- {
132
- "epoch": 0.79,
133
- "learning_rate": 3.6850015028554255e-05,
134
- "loss": 1.5416,
135
- "step": 10500
136
- },
137
- {
138
- "epoch": 0.83,
139
- "learning_rate": 3.622382526800922e-05,
140
- "loss": 1.5337,
141
- "step": 11000
142
- },
143
- {
144
- "epoch": 0.86,
145
- "learning_rate": 3.559763550746418e-05,
146
- "loss": 1.5203,
147
- "step": 11500
148
- },
149
- {
150
- "epoch": 0.9,
151
- "learning_rate": 3.497144574691915e-05,
152
- "loss": 1.5121,
153
- "step": 12000
154
- },
155
- {
156
- "epoch": 0.94,
157
- "learning_rate": 3.434525598637412e-05,
158
- "loss": 1.4809,
159
- "step": 12500
160
- },
161
- {
162
- "epoch": 0.98,
163
- "learning_rate": 3.3719066225829074e-05,
164
- "loss": 1.462,
165
- "step": 13000
166
- },
167
- {
168
- "epoch": 1.01,
169
- "learning_rate": 3.309287646528404e-05,
170
- "loss": 1.4146,
171
- "step": 13500
172
- },
173
- {
174
- "epoch": 1.05,
175
- "learning_rate": 3.246668670473901e-05,
176
- "loss": 1.2905,
177
- "step": 14000
178
- },
179
- {
180
- "epoch": 1.09,
181
- "learning_rate": 3.184049694419397e-05,
182
- "loss": 1.306,
183
- "step": 14500
184
- },
185
- {
186
- "epoch": 1.13,
187
- "learning_rate": 3.1214307183648936e-05,
188
- "loss": 1.2827,
189
- "step": 15000
190
- },
191
- {
192
- "epoch": 1.16,
193
- "learning_rate": 3.05881174231039e-05,
194
- "loss": 1.2569,
195
- "step": 15500
196
- },
197
- {
198
- "epoch": 1.2,
199
- "learning_rate": 2.9961927662558864e-05,
200
- "loss": 1.2956,
201
- "step": 16000
202
- },
203
- {
204
- "epoch": 1.24,
205
- "learning_rate": 2.9335737902013828e-05,
206
- "loss": 1.2905,
207
- "step": 16500
208
- },
209
- {
210
- "epoch": 1.28,
211
- "learning_rate": 2.8709548141468788e-05,
212
- "loss": 1.2648,
213
- "step": 17000
214
- },
215
- {
216
- "epoch": 1.31,
217
- "learning_rate": 2.808335838092376e-05,
218
- "loss": 1.2648,
219
- "step": 17500
220
- },
221
- {
222
- "epoch": 1.35,
223
- "learning_rate": 2.745716862037872e-05,
224
- "loss": 1.2784,
225
- "step": 18000
226
- },
227
- {
228
- "epoch": 1.39,
229
- "learning_rate": 2.6830978859833683e-05,
230
- "loss": 1.249,
231
- "step": 18500
232
- },
233
- {
234
- "epoch": 1.43,
235
- "learning_rate": 2.620478909928865e-05,
236
- "loss": 1.2447,
237
- "step": 19000
238
- },
239
- {
240
- "epoch": 1.47,
241
- "learning_rate": 2.5578599338743614e-05,
242
- "loss": 1.2556,
243
- "step": 19500
244
- },
245
- {
246
- "epoch": 1.5,
247
- "learning_rate": 2.495240957819858e-05,
248
- "loss": 1.2455,
249
- "step": 20000
250
- },
251
- {
252
- "epoch": 1.54,
253
- "learning_rate": 2.432621981765354e-05,
254
- "loss": 1.2428,
255
- "step": 20500
256
- },
257
- {
258
- "epoch": 1.58,
259
- "learning_rate": 2.370003005710851e-05,
260
- "loss": 1.2478,
261
- "step": 21000
262
- },
263
- {
264
- "epoch": 1.62,
265
- "learning_rate": 2.307384029656347e-05,
266
- "loss": 1.2246,
267
- "step": 21500
268
- },
269
- {
270
- "epoch": 1.65,
271
- "learning_rate": 2.2447650536018436e-05,
272
- "loss": 1.2013,
273
- "step": 22000
274
- },
275
- {
276
- "epoch": 1.69,
277
- "learning_rate": 2.18214607754734e-05,
278
- "loss": 1.2323,
279
- "step": 22500
280
- },
281
- {
282
- "epoch": 1.73,
283
- "learning_rate": 2.1195271014928364e-05,
284
- "loss": 1.2043,
285
- "step": 23000
286
- },
287
- {
288
- "epoch": 1.77,
289
- "learning_rate": 2.056908125438333e-05,
290
- "loss": 1.1874,
291
- "step": 23500
292
- },
293
- {
294
- "epoch": 1.8,
295
- "learning_rate": 1.9942891493838295e-05,
296
- "loss": 1.2216,
297
- "step": 24000
298
- },
299
- {
300
- "epoch": 1.84,
301
- "learning_rate": 1.931670173329326e-05,
302
- "loss": 1.1723,
303
- "step": 24500
304
- },
305
- {
306
- "epoch": 1.88,
307
- "learning_rate": 1.8690511972748222e-05,
308
- "loss": 1.1971,
309
- "step": 25000
310
- },
311
- {
312
- "epoch": 1.92,
313
- "learning_rate": 1.8064322212203186e-05,
314
- "loss": 1.1829,
315
- "step": 25500
316
- },
317
- {
318
- "epoch": 1.95,
319
- "learning_rate": 1.743813245165815e-05,
320
- "loss": 1.183,
321
- "step": 26000
322
- },
323
- {
324
- "epoch": 1.99,
325
- "learning_rate": 1.6811942691113117e-05,
326
- "loss": 1.1776,
327
- "step": 26500
328
- },
329
- {
330
- "epoch": 2.03,
331
- "learning_rate": 1.618575293056808e-05,
332
- "loss": 1.0416,
333
- "step": 27000
334
- },
335
- {
336
- "epoch": 2.07,
337
- "learning_rate": 1.5559563170023044e-05,
338
- "loss": 1.0001,
339
- "step": 27500
340
- },
341
- {
342
- "epoch": 2.1,
343
- "learning_rate": 1.4933373409478008e-05,
344
- "loss": 1.0103,
345
- "step": 28000
346
- },
347
- {
348
- "epoch": 2.14,
349
- "learning_rate": 1.4307183648932974e-05,
350
- "loss": 1.015,
351
- "step": 28500
352
- },
353
- {
354
- "epoch": 2.18,
355
- "learning_rate": 1.368099388838794e-05,
356
- "loss": 0.9958,
357
- "step": 29000
358
- },
359
- {
360
- "epoch": 2.22,
361
- "learning_rate": 1.3054804127842901e-05,
362
- "loss": 1.0253,
363
- "step": 29500
364
- },
365
- {
366
- "epoch": 2.25,
367
- "learning_rate": 1.2428614367297867e-05,
368
- "loss": 1.0064,
369
- "step": 30000
370
- },
371
- {
372
- "epoch": 2.29,
373
- "learning_rate": 1.180242460675283e-05,
374
- "loss": 1.013,
375
- "step": 30500
376
- },
377
- {
378
- "epoch": 2.33,
379
- "learning_rate": 1.1176234846207794e-05,
380
- "loss": 0.9971,
381
- "step": 31000
382
- },
383
- {
384
- "epoch": 2.37,
385
- "learning_rate": 1.055004508566276e-05,
386
- "loss": 0.9968,
387
- "step": 31500
388
- },
389
- {
390
- "epoch": 2.4,
391
- "learning_rate": 9.923855325117724e-06,
392
- "loss": 1.0013,
393
- "step": 32000
394
- },
395
- {
396
- "epoch": 2.44,
397
- "learning_rate": 9.297665564572689e-06,
398
- "loss": 1.0178,
399
- "step": 32500
400
- },
401
- {
402
- "epoch": 2.48,
403
- "learning_rate": 8.671475804027653e-06,
404
- "loss": 0.9937,
405
- "step": 33000
406
- },
407
- {
408
- "epoch": 2.52,
409
- "learning_rate": 8.045286043482618e-06,
410
- "loss": 0.9857,
411
- "step": 33500
412
- },
413
- {
414
- "epoch": 2.55,
415
- "learning_rate": 7.419096282937582e-06,
416
- "loss": 0.9964,
417
- "step": 34000
418
- },
419
- {
420
- "epoch": 2.59,
421
- "learning_rate": 6.792906522392546e-06,
422
- "loss": 0.9831,
423
- "step": 34500
424
- },
425
- {
426
- "epoch": 2.63,
427
- "learning_rate": 6.1667167618475106e-06,
428
- "loss": 0.9746,
429
- "step": 35000
430
- },
431
- {
432
- "epoch": 2.67,
433
- "learning_rate": 5.540527001302475e-06,
434
- "loss": 0.9872,
435
- "step": 35500
436
- },
437
- {
438
- "epoch": 2.71,
439
- "learning_rate": 4.914337240757439e-06,
440
- "loss": 0.9678,
441
- "step": 36000
442
- },
443
- {
444
- "epoch": 2.74,
445
- "learning_rate": 4.288147480212404e-06,
446
- "loss": 0.9645,
447
- "step": 36500
448
- },
449
- {
450
- "epoch": 2.78,
451
- "learning_rate": 3.661957719667368e-06,
452
- "loss": 0.9716,
453
- "step": 37000
454
- },
455
- {
456
- "epoch": 2.82,
457
- "learning_rate": 3.035767959122333e-06,
458
- "loss": 0.9723,
459
- "step": 37500
460
- },
461
- {
462
- "epoch": 2.86,
463
- "learning_rate": 2.409578198577297e-06,
464
- "loss": 0.9694,
465
- "step": 38000
466
- },
467
- {
468
- "epoch": 2.89,
469
- "learning_rate": 1.7833884380322613e-06,
470
- "loss": 0.9739,
471
- "step": 38500
472
- },
473
- {
474
- "epoch": 2.93,
475
- "learning_rate": 1.157198677487226e-06,
476
- "loss": 0.9837,
477
- "step": 39000
478
- },
479
- {
480
- "epoch": 2.97,
481
- "learning_rate": 5.310089169421901e-07,
482
- "loss": 0.9514,
483
- "step": 39500
484
- },
485
- {
486
- "epoch": 3.0,
487
- "step": 39924,
488
- "total_flos": 3.493213683491635e+16,
489
- "train_loss": 1.3682204318087179,
490
- "train_runtime": 30390.702,
491
- "train_samples_per_second": 10.509,
492
- "train_steps_per_second": 1.314
493
- }
494
- ],
495
- "logging_steps": 500,
496
- "max_steps": 39924,
497
- "num_input_tokens_seen": 0,
498
- "num_train_epochs": 3,
499
- "save_steps": 10000,
500
- "total_flos": 3.493213683491635e+16,
501
- "train_batch_size": 8,
502
- "trial_name": null,
503
- "trial_params": null
504
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f18045caba5a3c7460fc6c8dec8693c82dbacd2c16e451c6d82a83228661c8a
3
- size 4856
 
 
 
 
vocab.json DELETED
The diff for this file is too large to render. See raw diff