Rahul-G's picture
Training in progress, epoch 1
d16d660
{
"_name_or_path": "xlm-mlm-100-1280",
"accumulate_gradients": 4,
"ae_steps": [],
"amp": 2,
"architectures": [
"XLMForSequenceClassification"
],
"asm": false,
"attention_dropout": 0.1,
"batch_size": 16,
"beam_size": 1,
"bos_index": 0,
"bos_token_id": 0,
"bptt": 256,
"bt_src_langs": [],
"bt_steps": [],
"causal": false,
"clip_grad_norm": 1.0,
"clm_steps": [],
"command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"",
"context_size": 0,
"data_path": "/private/home/aconneau/projects/XLM/data/wiki/100/175k",
"debug": false,
"debug_slurm": false,
"debug_train": false,
"dropout": 0.1,
"dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234",
"emb_dim": 1280,
"embed_init_std": 0.02209708691207961,
"encoder_only": true,
"end_n_top": 5,
"eos_index": 1,
"epoch_size": 200000,
"eval_bleu": false,
"eval_only": false,
"exp_id": "16656234",
"exp_name": "xlm_17_100_big.3",
"fp16": true,
"gelu_activation": true,
"global_rank": 0,
"group_by_size": true,
"hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses",
"id2label": {
"Negative": 0,
"Neutral": 1,
"Positive": 2
},
"id2lang": {
"0": "af",
"1": "als",
"10": "be",
"11": "bg",
"12": "bn",
"13": "br",
"14": "bs",
"15": "ca",
"16": "ceb",
"17": "ckb",
"18": "cs",
"19": "cy",
"2": "am",
"20": "da",
"21": "de",
"22": "el",
"23": "en",
"24": "eo",
"25": "es",
"26": "et",
"27": "eu",
"28": "fa",
"29": "fi",
"3": "an",
"30": "fr",
"31": "fy",
"32": "ga",
"33": "gan",
"34": "gl",
"35": "gu",
"36": "he",
"37": "hi",
"38": "hr",
"39": "hu",
"4": "ang",
"40": "hy",
"41": "ia",
"42": "id",
"43": "is",
"44": "it",
"45": "ja",
"46": "jv",
"47": "ka",
"48": "kk",
"49": "kn",
"5": "ar",
"50": "ko",
"51": "ku",
"52": "la",
"53": "lb",
"54": "lt",
"55": "lv",
"56": "mk",
"57": "ml",
"58": "mn",
"59": "mr",
"6": "arz",
"60": "ms",
"61": "my",
"62": "nds",
"63": "ne",
"64": "nl",
"65": "nn",
"66": "no",
"67": "oc",
"68": "pl",
"69": "pt",
"7": "ast",
"70": "ro",
"71": "ru",
"72": "scn",
"73": "sco",
"74": "sh",
"75": "si",
"76": "simple",
"77": "sk",
"78": "sl",
"79": "sq",
"8": "az",
"80": "sr",
"81": "sv",
"82": "sw",
"83": "ta",
"84": "te",
"85": "th",
"86": "tl",
"87": "tr",
"88": "tt",
"89": "uk",
"9": "bar",
"90": "ur",
"91": "uz",
"92": "vi",
"93": "war",
"94": "wuu",
"95": "yi",
"96": "zh",
"97": "zh_classical",
"98": "zh_min_nan",
"99": "zh_yue"
},
"init_std": 0.02,
"is_encoder": true,
"is_master": true,
"is_slurm_job": true,
"label2id": {
"Negative": 0,
"Neutral": 1,
"Positive": 2
},
"lambda_ae": 1.0,
"lambda_ae_config": null,
"lambda_bt": 1.0,
"lambda_bt_config": null,
"lambda_clm": 1.0,
"lambda_clm_config": null,
"lambda_mlm": 1.0,
"lambda_mlm_config": null,
"lambda_mt": 1.0,
"lambda_mt_config": null,
"lambda_pc": 1.0,
"lambda_pc_config": null,
"lang2id": {
"af": 0,
"als": 1,
"am": 2,
"an": 3,
"ang": 4,
"ar": 5,
"arz": 6,
"ast": 7,
"az": 8,
"bar": 9,
"be": 10,
"bg": 11,
"bn": 12,
"br": 13,
"bs": 14,
"ca": 15,
"ceb": 16,
"ckb": 17,
"cs": 18,
"cy": 19,
"da": 20,
"de": 21,
"el": 22,
"en": 23,
"eo": 24,
"es": 25,
"et": 26,
"eu": 27,
"fa": 28,
"fi": 29,
"fr": 30,
"fy": 31,
"ga": 32,
"gan": 33,
"gl": 34,
"gu": 35,
"he": 36,
"hi": 37,
"hr": 38,
"hu": 39,
"hy": 40,
"ia": 41,
"id": 42,
"is": 43,
"it": 44,
"ja": 45,
"jv": 46,
"ka": 47,
"kk": 48,
"kn": 49,
"ko": 50,
"ku": 51,
"la": 52,
"lb": 53,
"lt": 54,
"lv": 55,
"mk": 56,
"ml": 57,
"mn": 58,
"mr": 59,
"ms": 60,
"my": 61,
"nds": 62,
"ne": 63,
"nl": 64,
"nn": 65,
"no": 66,
"oc": 67,
"pl": 68,
"pt": 69,
"ro": 70,
"ru": 71,
"scn": 72,
"sco": 73,
"sh": 74,
"si": 75,
"simple": 76,
"sk": 77,
"sl": 78,
"sq": 79,
"sr": 80,
"sv": 81,
"sw": 82,
"ta": 83,
"te": 84,
"th": 85,
"tl": 86,
"tr": 87,
"tt": 88,
"uk": 89,
"ur": 90,
"uz": 91,
"vi": 92,
"war": 93,
"wuu": 94,
"yi": 95,
"zh": 96,
"zh_classical": 97,
"zh_min_nan": 98,
"zh_yue": 99
},
"lang_id": 0,
"langs": [
"en",
"es",
"fr",
"de",
"zh",
"ru",
"pt",
"it",
"ar",
"ja",
"id",
"tr",
"nl",
"pl",
"simple",
"fa",
"vi",
"sv",
"ko",
"he",
"ro",
"no",
"hi",
"uk",
"cs",
"fi",
"hu",
"th",
"da",
"ca",
"el",
"bg",
"sr",
"ms",
"bn",
"hr",
"sl",
"zh_yue",
"az",
"sk",
"eo",
"ta",
"sh",
"lt",
"et",
"ml",
"la",
"bs",
"sq",
"arz",
"af",
"ka",
"mr",
"eu",
"tl",
"ang",
"gl",
"nn",
"ur",
"kk",
"be",
"hy",
"te",
"lv",
"mk",
"zh_classical",
"als",
"is",
"wuu",
"my",
"sco",
"mn",
"ceb",
"ast",
"cy",
"kn",
"br",
"an",
"gu",
"bar",
"uz",
"lb",
"ne",
"si",
"war",
"jv",
"ga",
"zh_min_nan",
"oc",
"ku",
"sw",
"nds",
"ckb",
"ia",
"yi",
"fy",
"scn",
"gan",
"tt",
"am"
],
"layer_norm_eps": 1e-12,
"lg_sampling_factor": 0.7,
"lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am",
"local_rank": 0,
"mask_index": 5,
"mask_token_id": 0,
"master_addr": "learnfair0332",
"master_port": 11363,
"max_batch_size": 0,
"max_epoch": 100000,
"max_len": 200,
"max_position_embeddings": 512,
"max_vocab": 200000,
"min_count": 0,
"mlm_steps": [
[
"en",
null
],
[
"es",
null
],
[
"fr",
null
],
[
"de",
null
],
[
"zh",
null
],
[
"ru",
null
],
[
"pt",
null
],
[
"it",
null
],
[
"ar",
null
],
[
"ja",
null
],
[
"id",
null
],
[
"tr",
null
],
[
"nl",
null
],
[
"pl",
null
],
[
"simple",
null
],
[
"fa",
null
],
[
"vi",
null
],
[
"sv",
null
],
[
"ko",
null
],
[
"he",
null
],
[
"ro",
null
],
[
"no",
null
],
[
"hi",
null
],
[
"uk",
null
],
[
"cs",
null
],
[
"fi",
null
],
[
"hu",
null
],
[
"th",
null
],
[
"da",
null
],
[
"ca",
null
],
[
"el",
null
],
[
"bg",
null
],
[
"sr",
null
],
[
"ms",
null
],
[
"bn",
null
],
[
"hr",
null
],
[
"sl",
null
],
[
"zh_yue",
null
],
[
"az",
null
],
[
"sk",
null
],
[
"eo",
null
],
[
"ta",
null
],
[
"sh",
null
],
[
"lt",
null
],
[
"et",
null
],
[
"ml",
null
],
[
"la",
null
],
[
"bs",
null
],
[
"sq",
null
],
[
"arz",
null
],
[
"af",
null
],
[
"ka",
null
],
[
"mr",
null
],
[
"eu",
null
],
[
"tl",
null
],
[
"ang",
null
],
[
"gl",
null
],
[
"nn",
null
],
[
"ur",
null
],
[
"kk",
null
],
[
"be",
null
],
[
"hy",
null
],
[
"te",
null
],
[
"lv",
null
],
[
"mk",
null
],
[
"zh_classical",
null
],
[
"als",
null
],
[
"is",
null
],
[
"wuu",
null
],
[
"my",
null
],
[
"sco",
null
],
[
"mn",
null
],
[
"ceb",
null
],
[
"ast",
null
],
[
"cy",
null
],
[
"kn",
null
],
[
"br",
null
],
[
"an",
null
],
[
"gu",
null
],
[
"bar",
null
],
[
"uz",
null
],
[
"lb",
null
],
[
"ne",
null
],
[
"si",
null
],
[
"war",
null
],
[
"jv",
null
],
[
"ga",
null
],
[
"zh_min_nan",
null
],
[
"oc",
null
],
[
"ku",
null
],
[
"sw",
null
],
[
"nds",
null
],
[
"ckb",
null
],
[
"ia",
null
],
[
"yi",
null
],
[
"fy",
null
],
[
"scn",
null
],
[
"gan",
null
],
[
"tt",
null
],
[
"am",
null
]
],
"model_type": "xlm",
"mono_dataset": {
"af": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth"
},
"als": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth"
},
"am": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth"
},
"an": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth"
},
"ang": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth"
},
"ar": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth"
},
"arz": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth"
},
"ast": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth"
},
"az": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth"
},
"bar": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth"
},
"be": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth"
},
"bg": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth"
},
"bn": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth"
},
"br": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth"
},
"bs": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth"
},
"ca": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth"
},
"ceb": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth"
},
"ckb": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth"
},
"cs": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth"
},
"cy": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth"
},
"da": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth"
},
"de": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth"
},
"el": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth"
},
"en": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth"
},
"eo": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth"
},
"es": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth"
},
"et": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth"
},
"eu": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth"
},
"fa": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth"
},
"fi": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth"
},
"fr": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth"
},
"fy": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth"
},
"ga": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth"
},
"gan": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth"
},
"gl": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth"
},
"gu": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth"
},
"he": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth"
},
"hi": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth"
},
"hr": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth"
},
"hu": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth"
},
"hy": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth"
},
"ia": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth"
},
"id": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth"
},
"is": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth"
},
"it": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth"
},
"ja": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth"
},
"jv": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth"
},
"ka": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth"
},
"kk": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth"
},
"kn": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth"
},
"ko": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth"
},
"ku": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth"
},
"la": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth"
},
"lb": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth"
},
"lt": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth"
},
"lv": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth"
},
"mk": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth"
},
"ml": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth"
},
"mn": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth"
},
"mr": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth"
},
"ms": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth"
},
"my": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth"
},
"nds": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth"
},
"ne": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth"
},
"nl": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth"
},
"nn": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth"
},
"no": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth"
},
"oc": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth"
},
"pl": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth"
},
"pt": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth"
},
"ro": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth"
},
"ru": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth"
},
"scn": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth"
},
"sco": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth"
},
"sh": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth"
},
"si": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth"
},
"simple": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth"
},
"sk": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth"
},
"sl": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth"
},
"sq": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth"
},
"sr": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth"
},
"sv": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth"
},
"sw": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth"
},
"ta": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth"
},
"te": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth"
},
"th": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth"
},
"tl": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth"
},
"tr": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth"
},
"tt": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth"
},
"uk": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth"
},
"ur": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth"
},
"uz": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth"
},
"vi": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth"
},
"war": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth"
},
"wuu": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth"
},
"yi": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth"
},
"zh": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth"
},
"zh_classical": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth"
},
"zh_min_nan": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth"
},
"zh_yue": {
"test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth",
"train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth",
"valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth"
}
},
"mono_list": [
"en",
"es",
"fr",
"de",
"zh",
"ru",
"pt",
"it",
"ar",
"ja",
"id",
"tr",
"nl",
"pl",
"simple",
"fa",
"vi",
"sv",
"ko",
"he",
"ro",
"no",
"hi",
"uk",
"cs",
"fi",
"hu",
"th",
"da",
"ca",
"el",
"bg",
"sr",
"ms",
"bn",
"hr",
"sl",
"zh_yue",
"az",
"sk",
"eo",
"ta",
"sh",
"lt",
"et",
"ml",
"la",
"bs",
"sq",
"arz",
"af",
"ka",
"mr",
"eu",
"tl",
"ang",
"gl",
"nn",
"ur",
"kk",
"be",
"hy",
"te",
"lv",
"mk",
"zh_classical",
"als",
"is",
"wuu",
"my",
"sco",
"mn",
"ceb",
"ast",
"cy",
"kn",
"br",
"an",
"gu",
"bar",
"uz",
"lb",
"ne",
"si",
"war",
"jv",
"ga",
"zh_min_nan",
"oc",
"ku",
"sw",
"nds",
"ckb",
"ia",
"yi",
"fy",
"scn",
"gan",
"tt",
"am"
],
"mt_steps": [],
"multi_gpu": true,
"multi_node": true,
"n_gpu_per_node": 8,
"n_heads": 16,
"n_langs": 100,
"n_layers": 16,
"n_nodes": 4,
"node_id": 0,
"optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001",
"pad_index": 2,
"pad_token_id": 2,
"para_dataset": {},
"para_list": [],
"pc_steps": [],
"ref_paths": {},
"reload_checkpoint": "",
"reload_emb": "",
"reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth",
"sample_alpha": 0.5,
"save_periodic": 0,
"share_inout_emb": true,
"sinusoidal_embeddings": false,
"split_data": true,
"start_n_top": 5,
"stopping_criterion": "_valid_zh_mlm_ppl,25",
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "first",
"summary_use_proj": true,
"tokens_per_batch": -1,
"torch_dtype": "float32",
"transformers_version": "4.35.0.dev0",
"unk_index": 3,
"use_lang_emb": false,
"use_memory": false,
"validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl",
"vocab_size": 200000,
"word_blank": 0.0,
"word_dropout": 0.0,
"word_keep": 0.1,
"word_mask": 0.8,
"word_mask_keep_rand": "0.8,0.1,0.1",
"word_pred": 0.15,
"word_rand": 0.1,
"word_shuffle": 0.0,
"world_size": 32
}