Rahul-G commited on
Commit
d16d660
1 Parent(s): 7018c69

Training in progress, epoch 1

Browse files
config.json ADDED
@@ -0,0 +1,1446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "xlm-mlm-100-1280",
3
+ "accumulate_gradients": 4,
4
+ "ae_steps": [],
5
+ "amp": 2,
6
+ "architectures": [
7
+ "XLMForSequenceClassification"
8
+ ],
9
+ "asm": false,
10
+ "attention_dropout": 0.1,
11
+ "batch_size": 16,
12
+ "beam_size": 1,
13
+ "bos_index": 0,
14
+ "bos_token_id": 0,
15
+ "bptt": 256,
16
+ "bt_src_langs": [],
17
+ "bt_steps": [],
18
+ "causal": false,
19
+ "clip_grad_norm": 1.0,
20
+ "clm_steps": [],
21
+ "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"",
22
+ "context_size": 0,
23
+ "data_path": "/private/home/aconneau/projects/XLM/data/wiki/100/175k",
24
+ "debug": false,
25
+ "debug_slurm": false,
26
+ "debug_train": false,
27
+ "dropout": 0.1,
28
+ "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
29
+ "emb_dim": 1280,
30
+ "embed_init_std": 0.02209708691207961,
31
+ "encoder_only": true,
32
+ "end_n_top": 5,
33
+ "eos_index": 1,
34
+ "epoch_size": 200000,
35
+ "eval_bleu": false,
36
+ "eval_only": false,
37
+ "exp_id": "16656234",
38
+ "exp_name": "xlm_17_100_big.3",
39
+ "fp16": true,
40
+ "gelu_activation": true,
41
+ "global_rank": 0,
42
+ "group_by_size": true,
43
+ "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
44
+ "id2label": {
45
+ "Negative": 0,
46
+ "Neutral": 1,
47
+ "Positive": 2
48
+ },
49
+ "id2lang": {
50
+ "0": "af",
51
+ "1": "als",
52
+ "10": "be",
53
+ "11": "bg",
54
+ "12": "bn",
55
+ "13": "br",
56
+ "14": "bs",
57
+ "15": "ca",
58
+ "16": "ceb",
59
+ "17": "ckb",
60
+ "18": "cs",
61
+ "19": "cy",
62
+ "2": "am",
63
+ "20": "da",
64
+ "21": "de",
65
+ "22": "el",
66
+ "23": "en",
67
+ "24": "eo",
68
+ "25": "es",
69
+ "26": "et",
70
+ "27": "eu",
71
+ "28": "fa",
72
+ "29": "fi",
73
+ "3": "an",
74
+ "30": "fr",
75
+ "31": "fy",
76
+ "32": "ga",
77
+ "33": "gan",
78
+ "34": "gl",
79
+ "35": "gu",
80
+ "36": "he",
81
+ "37": "hi",
82
+ "38": "hr",
83
+ "39": "hu",
84
+ "4": "ang",
85
+ "40": "hy",
86
+ "41": "ia",
87
+ "42": "id",
88
+ "43": "is",
89
+ "44": "it",
90
+ "45": "ja",
91
+ "46": "jv",
92
+ "47": "ka",
93
+ "48": "kk",
94
+ "49": "kn",
95
+ "5": "ar",
96
+ "50": "ko",
97
+ "51": "ku",
98
+ "52": "la",
99
+ "53": "lb",
100
+ "54": "lt",
101
+ "55": "lv",
102
+ "56": "mk",
103
+ "57": "ml",
104
+ "58": "mn",
105
+ "59": "mr",
106
+ "6": "arz",
107
+ "60": "ms",
108
+ "61": "my",
109
+ "62": "nds",
110
+ "63": "ne",
111
+ "64": "nl",
112
+ "65": "nn",
113
+ "66": "no",
114
+ "67": "oc",
115
+ "68": "pl",
116
+ "69": "pt",
117
+ "7": "ast",
118
+ "70": "ro",
119
+ "71": "ru",
120
+ "72": "scn",
121
+ "73": "sco",
122
+ "74": "sh",
123
+ "75": "si",
124
+ "76": "simple",
125
+ "77": "sk",
126
+ "78": "sl",
127
+ "79": "sq",
128
+ "8": "az",
129
+ "80": "sr",
130
+ "81": "sv",
131
+ "82": "sw",
132
+ "83": "ta",
133
+ "84": "te",
134
+ "85": "th",
135
+ "86": "tl",
136
+ "87": "tr",
137
+ "88": "tt",
138
+ "89": "uk",
139
+ "9": "bar",
140
+ "90": "ur",
141
+ "91": "uz",
142
+ "92": "vi",
143
+ "93": "war",
144
+ "94": "wuu",
145
+ "95": "yi",
146
+ "96": "zh",
147
+ "97": "zh_classical",
148
+ "98": "zh_min_nan",
149
+ "99": "zh_yue"
150
+ },
151
+ "init_std": 0.02,
152
+ "is_encoder": true,
153
+ "is_master": true,
154
+ "is_slurm_job": true,
155
+ "label2id": {
156
+ "Negative": 0,
157
+ "Neutral": 1,
158
+ "Positive": 2
159
+ },
160
+ "lambda_ae": 1.0,
161
+ "lambda_ae_config": null,
162
+ "lambda_bt": 1.0,
163
+ "lambda_bt_config": null,
164
+ "lambda_clm": 1.0,
165
+ "lambda_clm_config": null,
166
+ "lambda_mlm": 1.0,
167
+ "lambda_mlm_config": null,
168
+ "lambda_mt": 1.0,
169
+ "lambda_mt_config": null,
170
+ "lambda_pc": 1.0,
171
+ "lambda_pc_config": null,
172
+ "lang2id": {
173
+ "af": 0,
174
+ "als": 1,
175
+ "am": 2,
176
+ "an": 3,
177
+ "ang": 4,
178
+ "ar": 5,
179
+ "arz": 6,
180
+ "ast": 7,
181
+ "az": 8,
182
+ "bar": 9,
183
+ "be": 10,
184
+ "bg": 11,
185
+ "bn": 12,
186
+ "br": 13,
187
+ "bs": 14,
188
+ "ca": 15,
189
+ "ceb": 16,
190
+ "ckb": 17,
191
+ "cs": 18,
192
+ "cy": 19,
193
+ "da": 20,
194
+ "de": 21,
195
+ "el": 22,
196
+ "en": 23,
197
+ "eo": 24,
198
+ "es": 25,
199
+ "et": 26,
200
+ "eu": 27,
201
+ "fa": 28,
202
+ "fi": 29,
203
+ "fr": 30,
204
+ "fy": 31,
205
+ "ga": 32,
206
+ "gan": 33,
207
+ "gl": 34,
208
+ "gu": 35,
209
+ "he": 36,
210
+ "hi": 37,
211
+ "hr": 38,
212
+ "hu": 39,
213
+ "hy": 40,
214
+ "ia": 41,
215
+ "id": 42,
216
+ "is": 43,
217
+ "it": 44,
218
+ "ja": 45,
219
+ "jv": 46,
220
+ "ka": 47,
221
+ "kk": 48,
222
+ "kn": 49,
223
+ "ko": 50,
224
+ "ku": 51,
225
+ "la": 52,
226
+ "lb": 53,
227
+ "lt": 54,
228
+ "lv": 55,
229
+ "mk": 56,
230
+ "ml": 57,
231
+ "mn": 58,
232
+ "mr": 59,
233
+ "ms": 60,
234
+ "my": 61,
235
+ "nds": 62,
236
+ "ne": 63,
237
+ "nl": 64,
238
+ "nn": 65,
239
+ "no": 66,
240
+ "oc": 67,
241
+ "pl": 68,
242
+ "pt": 69,
243
+ "ro": 70,
244
+ "ru": 71,
245
+ "scn": 72,
246
+ "sco": 73,
247
+ "sh": 74,
248
+ "si": 75,
249
+ "simple": 76,
250
+ "sk": 77,
251
+ "sl": 78,
252
+ "sq": 79,
253
+ "sr": 80,
254
+ "sv": 81,
255
+ "sw": 82,
256
+ "ta": 83,
257
+ "te": 84,
258
+ "th": 85,
259
+ "tl": 86,
260
+ "tr": 87,
261
+ "tt": 88,
262
+ "uk": 89,
263
+ "ur": 90,
264
+ "uz": 91,
265
+ "vi": 92,
266
+ "war": 93,
267
+ "wuu": 94,
268
+ "yi": 95,
269
+ "zh": 96,
270
+ "zh_classical": 97,
271
+ "zh_min_nan": 98,
272
+ "zh_yue": 99
273
+ },
274
+ "lang_id": 0,
275
+ "langs": [
276
+ "en",
277
+ "es",
278
+ "fr",
279
+ "de",
280
+ "zh",
281
+ "ru",
282
+ "pt",
283
+ "it",
284
+ "ar",
285
+ "ja",
286
+ "id",
287
+ "tr",
288
+ "nl",
289
+ "pl",
290
+ "simple",
291
+ "fa",
292
+ "vi",
293
+ "sv",
294
+ "ko",
295
+ "he",
296
+ "ro",
297
+ "no",
298
+ "hi",
299
+ "uk",
300
+ "cs",
301
+ "fi",
302
+ "hu",
303
+ "th",
304
+ "da",
305
+ "ca",
306
+ "el",
307
+ "bg",
308
+ "sr",
309
+ "ms",
310
+ "bn",
311
+ "hr",
312
+ "sl",
313
+ "zh_yue",
314
+ "az",
315
+ "sk",
316
+ "eo",
317
+ "ta",
318
+ "sh",
319
+ "lt",
320
+ "et",
321
+ "ml",
322
+ "la",
323
+ "bs",
324
+ "sq",
325
+ "arz",
326
+ "af",
327
+ "ka",
328
+ "mr",
329
+ "eu",
330
+ "tl",
331
+ "ang",
332
+ "gl",
333
+ "nn",
334
+ "ur",
335
+ "kk",
336
+ "be",
337
+ "hy",
338
+ "te",
339
+ "lv",
340
+ "mk",
341
+ "zh_classical",
342
+ "als",
343
+ "is",
344
+ "wuu",
345
+ "my",
346
+ "sco",
347
+ "mn",
348
+ "ceb",
349
+ "ast",
350
+ "cy",
351
+ "kn",
352
+ "br",
353
+ "an",
354
+ "gu",
355
+ "bar",
356
+ "uz",
357
+ "lb",
358
+ "ne",
359
+ "si",
360
+ "war",
361
+ "jv",
362
+ "ga",
363
+ "zh_min_nan",
364
+ "oc",
365
+ "ku",
366
+ "sw",
367
+ "nds",
368
+ "ckb",
369
+ "ia",
370
+ "yi",
371
+ "fy",
372
+ "scn",
373
+ "gan",
374
+ "tt",
375
+ "am"
376
+ ],
377
+ "layer_norm_eps": 1e-12,
378
+ "lg_sampling_factor": 0.7,
379
+ "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
380
+ "local_rank": 0,
381
+ "mask_index": 5,
382
+ "mask_token_id": 0,
383
+ "master_addr": "learnfair0332",
384
+ "master_port": 11363,
385
+ "max_batch_size": 0,
386
+ "max_epoch": 100000,
387
+ "max_len": 200,
388
+ "max_position_embeddings": 512,
389
+ "max_vocab": 200000,
390
+ "min_count": 0,
391
+ "mlm_steps": [
392
+ [
393
+ "en",
394
+ null
395
+ ],
396
+ [
397
+ "es",
398
+ null
399
+ ],
400
+ [
401
+ "fr",
402
+ null
403
+ ],
404
+ [
405
+ "de",
406
+ null
407
+ ],
408
+ [
409
+ "zh",
410
+ null
411
+ ],
412
+ [
413
+ "ru",
414
+ null
415
+ ],
416
+ [
417
+ "pt",
418
+ null
419
+ ],
420
+ [
421
+ "it",
422
+ null
423
+ ],
424
+ [
425
+ "ar",
426
+ null
427
+ ],
428
+ [
429
+ "ja",
430
+ null
431
+ ],
432
+ [
433
+ "id",
434
+ null
435
+ ],
436
+ [
437
+ "tr",
438
+ null
439
+ ],
440
+ [
441
+ "nl",
442
+ null
443
+ ],
444
+ [
445
+ "pl",
446
+ null
447
+ ],
448
+ [
449
+ "simple",
450
+ null
451
+ ],
452
+ [
453
+ "fa",
454
+ null
455
+ ],
456
+ [
457
+ "vi",
458
+ null
459
+ ],
460
+ [
461
+ "sv",
462
+ null
463
+ ],
464
+ [
465
+ "ko",
466
+ null
467
+ ],
468
+ [
469
+ "he",
470
+ null
471
+ ],
472
+ [
473
+ "ro",
474
+ null
475
+ ],
476
+ [
477
+ "no",
478
+ null
479
+ ],
480
+ [
481
+ "hi",
482
+ null
483
+ ],
484
+ [
485
+ "uk",
486
+ null
487
+ ],
488
+ [
489
+ "cs",
490
+ null
491
+ ],
492
+ [
493
+ "fi",
494
+ null
495
+ ],
496
+ [
497
+ "hu",
498
+ null
499
+ ],
500
+ [
501
+ "th",
502
+ null
503
+ ],
504
+ [
505
+ "da",
506
+ null
507
+ ],
508
+ [
509
+ "ca",
510
+ null
511
+ ],
512
+ [
513
+ "el",
514
+ null
515
+ ],
516
+ [
517
+ "bg",
518
+ null
519
+ ],
520
+ [
521
+ "sr",
522
+ null
523
+ ],
524
+ [
525
+ "ms",
526
+ null
527
+ ],
528
+ [
529
+ "bn",
530
+ null
531
+ ],
532
+ [
533
+ "hr",
534
+ null
535
+ ],
536
+ [
537
+ "sl",
538
+ null
539
+ ],
540
+ [
541
+ "zh_yue",
542
+ null
543
+ ],
544
+ [
545
+ "az",
546
+ null
547
+ ],
548
+ [
549
+ "sk",
550
+ null
551
+ ],
552
+ [
553
+ "eo",
554
+ null
555
+ ],
556
+ [
557
+ "ta",
558
+ null
559
+ ],
560
+ [
561
+ "sh",
562
+ null
563
+ ],
564
+ [
565
+ "lt",
566
+ null
567
+ ],
568
+ [
569
+ "et",
570
+ null
571
+ ],
572
+ [
573
+ "ml",
574
+ null
575
+ ],
576
+ [
577
+ "la",
578
+ null
579
+ ],
580
+ [
581
+ "bs",
582
+ null
583
+ ],
584
+ [
585
+ "sq",
586
+ null
587
+ ],
588
+ [
589
+ "arz",
590
+ null
591
+ ],
592
+ [
593
+ "af",
594
+ null
595
+ ],
596
+ [
597
+ "ka",
598
+ null
599
+ ],
600
+ [
601
+ "mr",
602
+ null
603
+ ],
604
+ [
605
+ "eu",
606
+ null
607
+ ],
608
+ [
609
+ "tl",
610
+ null
611
+ ],
612
+ [
613
+ "ang",
614
+ null
615
+ ],
616
+ [
617
+ "gl",
618
+ null
619
+ ],
620
+ [
621
+ "nn",
622
+ null
623
+ ],
624
+ [
625
+ "ur",
626
+ null
627
+ ],
628
+ [
629
+ "kk",
630
+ null
631
+ ],
632
+ [
633
+ "be",
634
+ null
635
+ ],
636
+ [
637
+ "hy",
638
+ null
639
+ ],
640
+ [
641
+ "te",
642
+ null
643
+ ],
644
+ [
645
+ "lv",
646
+ null
647
+ ],
648
+ [
649
+ "mk",
650
+ null
651
+ ],
652
+ [
653
+ "zh_classical",
654
+ null
655
+ ],
656
+ [
657
+ "als",
658
+ null
659
+ ],
660
+ [
661
+ "is",
662
+ null
663
+ ],
664
+ [
665
+ "wuu",
666
+ null
667
+ ],
668
+ [
669
+ "my",
670
+ null
671
+ ],
672
+ [
673
+ "sco",
674
+ null
675
+ ],
676
+ [
677
+ "mn",
678
+ null
679
+ ],
680
+ [
681
+ "ceb",
682
+ null
683
+ ],
684
+ [
685
+ "ast",
686
+ null
687
+ ],
688
+ [
689
+ "cy",
690
+ null
691
+ ],
692
+ [
693
+ "kn",
694
+ null
695
+ ],
696
+ [
697
+ "br",
698
+ null
699
+ ],
700
+ [
701
+ "an",
702
+ null
703
+ ],
704
+ [
705
+ "gu",
706
+ null
707
+ ],
708
+ [
709
+ "bar",
710
+ null
711
+ ],
712
+ [
713
+ "uz",
714
+ null
715
+ ],
716
+ [
717
+ "lb",
718
+ null
719
+ ],
720
+ [
721
+ "ne",
722
+ null
723
+ ],
724
+ [
725
+ "si",
726
+ null
727
+ ],
728
+ [
729
+ "war",
730
+ null
731
+ ],
732
+ [
733
+ "jv",
734
+ null
735
+ ],
736
+ [
737
+ "ga",
738
+ null
739
+ ],
740
+ [
741
+ "zh_min_nan",
742
+ null
743
+ ],
744
+ [
745
+ "oc",
746
+ null
747
+ ],
748
+ [
749
+ "ku",
750
+ null
751
+ ],
752
+ [
753
+ "sw",
754
+ null
755
+ ],
756
+ [
757
+ "nds",
758
+ null
759
+ ],
760
+ [
761
+ "ckb",
762
+ null
763
+ ],
764
+ [
765
+ "ia",
766
+ null
767
+ ],
768
+ [
769
+ "yi",
770
+ null
771
+ ],
772
+ [
773
+ "fy",
774
+ null
775
+ ],
776
+ [
777
+ "scn",
778
+ null
779
+ ],
780
+ [
781
+ "gan",
782
+ null
783
+ ],
784
+ [
785
+ "tt",
786
+ null
787
+ ],
788
+ [
789
+ "am",
790
+ null
791
+ ]
792
+ ],
793
+ "model_type": "xlm",
794
+ "mono_dataset": {
795
+ "af": {
796
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth",
797
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth",
798
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth"
799
+ },
800
+ "als": {
801
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth",
802
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth",
803
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth"
804
+ },
805
+ "am": {
806
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth",
807
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth",
808
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth"
809
+ },
810
+ "an": {
811
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth",
812
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth",
813
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth"
814
+ },
815
+ "ang": {
816
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth",
817
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth",
818
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth"
819
+ },
820
+ "ar": {
821
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth",
822
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth",
823
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth"
824
+ },
825
+ "arz": {
826
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth",
827
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth",
828
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth"
829
+ },
830
+ "ast": {
831
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth",
832
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth",
833
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth"
834
+ },
835
+ "az": {
836
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth",
837
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth",
838
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth"
839
+ },
840
+ "bar": {
841
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth",
842
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth",
843
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth"
844
+ },
845
+ "be": {
846
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth",
847
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth",
848
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth"
849
+ },
850
+ "bg": {
851
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth",
852
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth",
853
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth"
854
+ },
855
+ "bn": {
856
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth",
857
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth",
858
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth"
859
+ },
860
+ "br": {
861
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth",
862
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth",
863
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth"
864
+ },
865
+ "bs": {
866
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth",
867
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth",
868
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth"
869
+ },
870
+ "ca": {
871
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth",
872
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth",
873
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth"
874
+ },
875
+ "ceb": {
876
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth",
877
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth",
878
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth"
879
+ },
880
+ "ckb": {
881
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth",
882
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth",
883
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth"
884
+ },
885
+ "cs": {
886
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth",
887
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth",
888
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth"
889
+ },
890
+ "cy": {
891
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth",
892
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth",
893
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth"
894
+ },
895
+ "da": {
896
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth",
897
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth",
898
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth"
899
+ },
900
+ "de": {
901
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth",
902
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth",
903
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth"
904
+ },
905
+ "el": {
906
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth",
907
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth",
908
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth"
909
+ },
910
+ "en": {
911
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth",
912
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth",
913
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth"
914
+ },
915
+ "eo": {
916
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth",
917
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth",
918
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth"
919
+ },
920
+ "es": {
921
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth",
922
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth",
923
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth"
924
+ },
925
+ "et": {
926
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth",
927
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth",
928
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth"
929
+ },
930
+ "eu": {
931
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth",
932
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth",
933
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth"
934
+ },
935
+ "fa": {
936
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth",
937
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth",
938
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth"
939
+ },
940
+ "fi": {
941
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth",
942
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth",
943
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth"
944
+ },
945
+ "fr": {
946
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth",
947
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth",
948
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth"
949
+ },
950
+ "fy": {
951
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth",
952
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth",
953
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth"
954
+ },
955
+ "ga": {
956
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth",
957
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth",
958
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth"
959
+ },
960
+ "gan": {
961
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth",
962
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth",
963
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth"
964
+ },
965
+ "gl": {
966
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth",
967
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth",
968
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth"
969
+ },
970
+ "gu": {
971
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth",
972
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth",
973
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth"
974
+ },
975
+ "he": {
976
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth",
977
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth",
978
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth"
979
+ },
980
+ "hi": {
981
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth",
982
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth",
983
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth"
984
+ },
985
+ "hr": {
986
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth",
987
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth",
988
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth"
989
+ },
990
+ "hu": {
991
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth",
992
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth",
993
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth"
994
+ },
995
+ "hy": {
996
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth",
997
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth",
998
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth"
999
+ },
1000
+ "ia": {
1001
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth",
1002
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth",
1003
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth"
1004
+ },
1005
+ "id": {
1006
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth",
1007
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth",
1008
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth"
1009
+ },
1010
+ "is": {
1011
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth",
1012
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth",
1013
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth"
1014
+ },
1015
+ "it": {
1016
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth",
1017
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth",
1018
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth"
1019
+ },
1020
+ "ja": {
1021
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth",
1022
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth",
1023
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth"
1024
+ },
1025
+ "jv": {
1026
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth",
1027
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth",
1028
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth"
1029
+ },
1030
+ "ka": {
1031
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth",
1032
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth",
1033
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth"
1034
+ },
1035
+ "kk": {
1036
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth",
1037
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth",
1038
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth"
1039
+ },
1040
+ "kn": {
1041
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth",
1042
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth",
1043
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth"
1044
+ },
1045
+ "ko": {
1046
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth",
1047
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth",
1048
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth"
1049
+ },
1050
+ "ku": {
1051
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth",
1052
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth",
1053
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth"
1054
+ },
1055
+ "la": {
1056
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth",
1057
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth",
1058
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth"
1059
+ },
1060
+ "lb": {
1061
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth",
1062
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth",
1063
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth"
1064
+ },
1065
+ "lt": {
1066
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth",
1067
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth",
1068
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth"
1069
+ },
1070
+ "lv": {
1071
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth",
1072
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth",
1073
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth"
1074
+ },
1075
+ "mk": {
1076
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth",
1077
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth",
1078
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth"
1079
+ },
1080
+ "ml": {
1081
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth",
1082
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth",
1083
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth"
1084
+ },
1085
+ "mn": {
1086
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth",
1087
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth",
1088
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth"
1089
+ },
1090
+ "mr": {
1091
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth",
1092
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth",
1093
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth"
1094
+ },
1095
+ "ms": {
1096
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth",
1097
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth",
1098
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth"
1099
+ },
1100
+ "my": {
1101
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth",
1102
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth",
1103
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth"
1104
+ },
1105
+ "nds": {
1106
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth",
1107
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth",
1108
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth"
1109
+ },
1110
+ "ne": {
1111
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth",
1112
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth",
1113
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth"
1114
+ },
1115
+ "nl": {
1116
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth",
1117
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth",
1118
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth"
1119
+ },
1120
+ "nn": {
1121
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth",
1122
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth",
1123
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth"
1124
+ },
1125
+ "no": {
1126
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth",
1127
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth",
1128
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth"
1129
+ },
1130
+ "oc": {
1131
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth",
1132
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth",
1133
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth"
1134
+ },
1135
+ "pl": {
1136
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth",
1137
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth",
1138
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth"
1139
+ },
1140
+ "pt": {
1141
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth",
1142
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth",
1143
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth"
1144
+ },
1145
+ "ro": {
1146
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth",
1147
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth",
1148
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth"
1149
+ },
1150
+ "ru": {
1151
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth",
1152
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth",
1153
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth"
1154
+ },
1155
+ "scn": {
1156
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth",
1157
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth",
1158
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth"
1159
+ },
1160
+ "sco": {
1161
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth",
1162
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth",
1163
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth"
1164
+ },
1165
+ "sh": {
1166
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth",
1167
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth",
1168
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth"
1169
+ },
1170
+ "si": {
1171
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth",
1172
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth",
1173
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth"
1174
+ },
1175
+ "simple": {
1176
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth",
1177
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth",
1178
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth"
1179
+ },
1180
+ "sk": {
1181
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth",
1182
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth",
1183
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth"
1184
+ },
1185
+ "sl": {
1186
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth",
1187
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth",
1188
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth"
1189
+ },
1190
+ "sq": {
1191
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth",
1192
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth",
1193
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth"
1194
+ },
1195
+ "sr": {
1196
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth",
1197
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth",
1198
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth"
1199
+ },
1200
+ "sv": {
1201
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth",
1202
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth",
1203
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth"
1204
+ },
1205
+ "sw": {
1206
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth",
1207
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth",
1208
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth"
1209
+ },
1210
+ "ta": {
1211
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth",
1212
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth",
1213
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth"
1214
+ },
1215
+ "te": {
1216
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth",
1217
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth",
1218
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth"
1219
+ },
1220
+ "th": {
1221
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth",
1222
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth",
1223
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth"
1224
+ },
1225
+ "tl": {
1226
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth",
1227
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth",
1228
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth"
1229
+ },
1230
+ "tr": {
1231
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth",
1232
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth",
1233
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth"
1234
+ },
1235
+ "tt": {
1236
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth",
1237
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth",
1238
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth"
1239
+ },
1240
+ "uk": {
1241
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth",
1242
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth",
1243
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth"
1244
+ },
1245
+ "ur": {
1246
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth",
1247
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth",
1248
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth"
1249
+ },
1250
+ "uz": {
1251
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth",
1252
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth",
1253
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth"
1254
+ },
1255
+ "vi": {
1256
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth",
1257
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth",
1258
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth"
1259
+ },
1260
+ "war": {
1261
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth",
1262
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth",
1263
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth"
1264
+ },
1265
+ "wuu": {
1266
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth",
1267
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth",
1268
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth"
1269
+ },
1270
+ "yi": {
1271
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth",
1272
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth",
1273
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth"
1274
+ },
1275
+ "zh": {
1276
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth",
1277
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth",
1278
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth"
1279
+ },
1280
+ "zh_classical": {
1281
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth",
1282
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth",
1283
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth"
1284
+ },
1285
+ "zh_min_nan": {
1286
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth",
1287
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth",
1288
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth"
1289
+ },
1290
+ "zh_yue": {
1291
+ "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth",
1292
+ "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth",
1293
+ "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth"
1294
+ }
1295
+ },
1296
+ "mono_list": [
1297
+ "en",
1298
+ "es",
1299
+ "fr",
1300
+ "de",
1301
+ "zh",
1302
+ "ru",
1303
+ "pt",
1304
+ "it",
1305
+ "ar",
1306
+ "ja",
1307
+ "id",
1308
+ "tr",
1309
+ "nl",
1310
+ "pl",
1311
+ "simple",
1312
+ "fa",
1313
+ "vi",
1314
+ "sv",
1315
+ "ko",
1316
+ "he",
1317
+ "ro",
1318
+ "no",
1319
+ "hi",
1320
+ "uk",
1321
+ "cs",
1322
+ "fi",
1323
+ "hu",
1324
+ "th",
1325
+ "da",
1326
+ "ca",
1327
+ "el",
1328
+ "bg",
1329
+ "sr",
1330
+ "ms",
1331
+ "bn",
1332
+ "hr",
1333
+ "sl",
1334
+ "zh_yue",
1335
+ "az",
1336
+ "sk",
1337
+ "eo",
1338
+ "ta",
1339
+ "sh",
1340
+ "lt",
1341
+ "et",
1342
+ "ml",
1343
+ "la",
1344
+ "bs",
1345
+ "sq",
1346
+ "arz",
1347
+ "af",
1348
+ "ka",
1349
+ "mr",
1350
+ "eu",
1351
+ "tl",
1352
+ "ang",
1353
+ "gl",
1354
+ "nn",
1355
+ "ur",
1356
+ "kk",
1357
+ "be",
1358
+ "hy",
1359
+ "te",
1360
+ "lv",
1361
+ "mk",
1362
+ "zh_classical",
1363
+ "als",
1364
+ "is",
1365
+ "wuu",
1366
+ "my",
1367
+ "sco",
1368
+ "mn",
1369
+ "ceb",
1370
+ "ast",
1371
+ "cy",
1372
+ "kn",
1373
+ "br",
1374
+ "an",
1375
+ "gu",
1376
+ "bar",
1377
+ "uz",
1378
+ "lb",
1379
+ "ne",
1380
+ "si",
1381
+ "war",
1382
+ "jv",
1383
+ "ga",
1384
+ "zh_min_nan",
1385
+ "oc",
1386
+ "ku",
1387
+ "sw",
1388
+ "nds",
1389
+ "ckb",
1390
+ "ia",
1391
+ "yi",
1392
+ "fy",
1393
+ "scn",
1394
+ "gan",
1395
+ "tt",
1396
+ "am"
1397
+ ],
1398
+ "mt_steps": [],
1399
+ "multi_gpu": true,
1400
+ "multi_node": true,
1401
+ "n_gpu_per_node": 8,
1402
+ "n_heads": 16,
1403
+ "n_langs": 100,
1404
+ "n_layers": 16,
1405
+ "n_nodes": 4,
1406
+ "node_id": 0,
1407
+ "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
1408
+ "pad_index": 2,
1409
+ "pad_token_id": 2,
1410
+ "para_dataset": {},
1411
+ "para_list": [],
1412
+ "pc_steps": [],
1413
+ "ref_paths": {},
1414
+ "reload_checkpoint": "",
1415
+ "reload_emb": "",
1416
+ "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
1417
+ "sample_alpha": 0.5,
1418
+ "save_periodic": 0,
1419
+ "share_inout_emb": true,
1420
+ "sinusoidal_embeddings": false,
1421
+ "split_data": true,
1422
+ "start_n_top": 5,
1423
+ "stopping_criterion": "_valid_zh_mlm_ppl,25",
1424
+ "summary_activation": null,
1425
+ "summary_first_dropout": 0.1,
1426
+ "summary_proj_to_labels": true,
1427
+ "summary_type": "first",
1428
+ "summary_use_proj": true,
1429
+ "tokens_per_batch": -1,
1430
+ "torch_dtype": "float32",
1431
+ "transformers_version": "4.35.0.dev0",
1432
+ "unk_index": 3,
1433
+ "use_lang_emb": false,
1434
+ "use_memory": false,
1435
+ "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
1436
+ "vocab_size": 200000,
1437
+ "word_blank": 0.0,
1438
+ "word_dropout": 0.0,
1439
+ "word_keep": 0.1,
1440
+ "word_mask": 0.8,
1441
+ "word_mask_keep_rand": "0.8,0.1,0.1",
1442
+ "word_pred": 0.15,
1443
+ "word_rand": 0.1,
1444
+ "word_shuffle": 0.0,
1445
+ "world_size": 32
1446
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae24d827afe44bd9be563bedae1f07bf0e0932c3b7bfafac387c08e754c678ed
3
+ size 2286089381
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<special0>",
4
+ "<special1>",
5
+ "<special2>",
6
+ "<special3>",
7
+ "<special4>",
8
+ "<special5>",
9
+ "<special6>",
10
+ "<special7>",
11
+ "<special8>",
12
+ "<special9>"
13
+ ],
14
+ "bos_token": "<s>",
15
+ "cls_token": "</s>",
16
+ "mask_token": "<special1>",
17
+ "pad_token": "<pad>",
18
+ "sep_token": "</s>",
19
+ "unk_token": "<unk>"
20
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<special0>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<special1>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<special2>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<special3>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<special4>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "<special5>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "<special6>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "<special7>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "<special8>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "<special9>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ }
115
+ },
116
+ "additional_special_tokens": [
117
+ "<special0>",
118
+ "<special1>",
119
+ "<special2>",
120
+ "<special3>",
121
+ "<special4>",
122
+ "<special5>",
123
+ "<special6>",
124
+ "<special7>",
125
+ "<special8>",
126
+ "<special9>"
127
+ ],
128
+ "bos_token": "<s>",
129
+ "clean_up_tokenization_spaces": true,
130
+ "cls_token": "</s>",
131
+ "do_lowercase_and_remove_accent": false,
132
+ "id2lang": {
133
+ "0": "af",
134
+ "1": "als",
135
+ "10": "be",
136
+ "11": "bg",
137
+ "12": "bn",
138
+ "13": "br",
139
+ "14": "bs",
140
+ "15": "ca",
141
+ "16": "ceb",
142
+ "17": "ckb",
143
+ "18": "cs",
144
+ "19": "cy",
145
+ "2": "am",
146
+ "20": "da",
147
+ "21": "de",
148
+ "22": "el",
149
+ "23": "en",
150
+ "24": "eo",
151
+ "25": "es",
152
+ "26": "et",
153
+ "27": "eu",
154
+ "28": "fa",
155
+ "29": "fi",
156
+ "3": "an",
157
+ "30": "fr",
158
+ "31": "fy",
159
+ "32": "ga",
160
+ "33": "gan",
161
+ "34": "gl",
162
+ "35": "gu",
163
+ "36": "he",
164
+ "37": "hi",
165
+ "38": "hr",
166
+ "39": "hu",
167
+ "4": "ang",
168
+ "40": "hy",
169
+ "41": "ia",
170
+ "42": "id",
171
+ "43": "is",
172
+ "44": "it",
173
+ "45": "ja",
174
+ "46": "jv",
175
+ "47": "ka",
176
+ "48": "kk",
177
+ "49": "kn",
178
+ "5": "ar",
179
+ "50": "ko",
180
+ "51": "ku",
181
+ "52": "la",
182
+ "53": "lb",
183
+ "54": "lt",
184
+ "55": "lv",
185
+ "56": "mk",
186
+ "57": "ml",
187
+ "58": "mn",
188
+ "59": "mr",
189
+ "6": "arz",
190
+ "60": "ms",
191
+ "61": "my",
192
+ "62": "nds",
193
+ "63": "ne",
194
+ "64": "nl",
195
+ "65": "nn",
196
+ "66": "no",
197
+ "67": "oc",
198
+ "68": "pl",
199
+ "69": "pt",
200
+ "7": "ast",
201
+ "70": "ro",
202
+ "71": "ru",
203
+ "72": "scn",
204
+ "73": "sco",
205
+ "74": "sh",
206
+ "75": "si",
207
+ "76": "simple",
208
+ "77": "sk",
209
+ "78": "sl",
210
+ "79": "sq",
211
+ "8": "az",
212
+ "80": "sr",
213
+ "81": "sv",
214
+ "82": "sw",
215
+ "83": "ta",
216
+ "84": "te",
217
+ "85": "th",
218
+ "86": "tl",
219
+ "87": "tr",
220
+ "88": "tt",
221
+ "89": "uk",
222
+ "9": "bar",
223
+ "90": "ur",
224
+ "91": "uz",
225
+ "92": "vi",
226
+ "93": "war",
227
+ "94": "wuu",
228
+ "95": "yi",
229
+ "96": "zh",
230
+ "97": "zh_classical",
231
+ "98": "zh_min_nan",
232
+ "99": "zh_yue"
233
+ },
234
+ "lang2id": {
235
+ "af": 0,
236
+ "als": 1,
237
+ "am": 2,
238
+ "an": 3,
239
+ "ang": 4,
240
+ "ar": 5,
241
+ "arz": 6,
242
+ "ast": 7,
243
+ "az": 8,
244
+ "bar": 9,
245
+ "be": 10,
246
+ "bg": 11,
247
+ "bn": 12,
248
+ "br": 13,
249
+ "bs": 14,
250
+ "ca": 15,
251
+ "ceb": 16,
252
+ "ckb": 17,
253
+ "cs": 18,
254
+ "cy": 19,
255
+ "da": 20,
256
+ "de": 21,
257
+ "el": 22,
258
+ "en": 23,
259
+ "eo": 24,
260
+ "es": 25,
261
+ "et": 26,
262
+ "eu": 27,
263
+ "fa": 28,
264
+ "fi": 29,
265
+ "fr": 30,
266
+ "fy": 31,
267
+ "ga": 32,
268
+ "gan": 33,
269
+ "gl": 34,
270
+ "gu": 35,
271
+ "he": 36,
272
+ "hi": 37,
273
+ "hr": 38,
274
+ "hu": 39,
275
+ "hy": 40,
276
+ "ia": 41,
277
+ "id": 42,
278
+ "is": 43,
279
+ "it": 44,
280
+ "ja": 45,
281
+ "jv": 46,
282
+ "ka": 47,
283
+ "kk": 48,
284
+ "kn": 49,
285
+ "ko": 50,
286
+ "ku": 51,
287
+ "la": 52,
288
+ "lb": 53,
289
+ "lt": 54,
290
+ "lv": 55,
291
+ "mk": 56,
292
+ "ml": 57,
293
+ "mn": 58,
294
+ "mr": 59,
295
+ "ms": 60,
296
+ "my": 61,
297
+ "nds": 62,
298
+ "ne": 63,
299
+ "nl": 64,
300
+ "nn": 65,
301
+ "no": 66,
302
+ "oc": 67,
303
+ "pl": 68,
304
+ "pt": 69,
305
+ "ro": 70,
306
+ "ru": 71,
307
+ "scn": 72,
308
+ "sco": 73,
309
+ "sh": 74,
310
+ "si": 75,
311
+ "simple": 76,
312
+ "sk": 77,
313
+ "sl": 78,
314
+ "sq": 79,
315
+ "sr": 80,
316
+ "sv": 81,
317
+ "sw": 82,
318
+ "ta": 83,
319
+ "te": 84,
320
+ "th": 85,
321
+ "tl": 86,
322
+ "tr": 87,
323
+ "tt": 88,
324
+ "uk": 89,
325
+ "ur": 90,
326
+ "uz": 91,
327
+ "vi": 92,
328
+ "war": 93,
329
+ "wuu": 94,
330
+ "yi": 95,
331
+ "zh": 96,
332
+ "zh_classical": 97,
333
+ "zh_min_nan": 98,
334
+ "zh_yue": 99
335
+ },
336
+ "mask_token": "<special1>",
337
+ "model_max_length": 512,
338
+ "pad_token": "<pad>",
339
+ "sep_token": "</s>",
340
+ "tokenizer_class": "XLMTokenizer",
341
+ "unk_token": "<unk>"
342
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a64cc5860823f06b16233f851d41951f57016914299318d64215d5bbcfc93b
3
+ size 4091
vocab.json ADDED
The diff for this file is too large to render. See raw diff