{ "_name_or_path": "xlm-mlm-100-1280", "accumulate_gradients": 4, "ae_steps": [], "amp": 2, "architectures": [ "XLMForSequenceClassification" ], "asm": false, "attention_dropout": 0.1, "batch_size": 16, "beam_size": 1, "bos_index": 0, "bos_token_id": 0, "bptt": 256, "bt_src_langs": [], "bt_steps": [], "causal": false, "clip_grad_norm": 1.0, "clm_steps": [], "command": "python /private/home/aconneau/workdir/xlm_17_100_big.3/2019_08_10_19_23_42/train.py --n_heads 16 --bt_steps '' --max_vocab 200000 --word_mask_keep_rand '0.8,0.1,0.1' --use_lang_emb false --data_path '/private/home/aconneau/projects/XLM/data/wiki/100/175k' --save_periodic 0 --max_len 200 --bptt 256 --ae_steps '' --fp16 true --share_inout_emb true --sinusoidal_embeddings false --word_shuffle 0 --tokens_per_batch '-1' --accumulate_gradients 4 --validation_metrics '_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl' --attention_dropout '0.1' --split_data true --max_epoch 100000 --stopping_criterion '_valid_zh_mlm_ppl,25' --dump_path '/checkpoint/aconneau/dumped' --epoch_size 200000 --word_blank 0 --gelu_activation true --n_layers 16 --optimizer 'adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001' --mlm_steps 'en,es,fr,de,zh,ru,pt,it,ar,ja,id,tr,nl,pl,simple,fa,vi,sv,ko,he,ro,no,hi,uk,cs,fi,hu,th,da,ca,el,bg,sr,ms,bn,hr,sl,zh_yue,az,sk,eo,ta,sh,lt,et,ml,la,bs,sq,arz,af,ka,mr,eu,tl,ang,gl,nn,ur,kk,be,hy,te,lv,mk,zh_classical,als,is,wuu,my,sco,mn,ceb,ast,cy,kn,br,an,gu,bar,uz,lb,ne,si,war,jv,ga,zh_min_nan,oc,ku,sw,nds,ckb,ia,yi,fy,scn,gan,tt,am' --eval_bleu false --dropout '0.1' --mt_steps '' --batch_size 16 --word_dropout 0 --reload_model '/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth' --min_count 0 --amp 2 --group_by_size true --asm false --sample_alpha '0.5' --word_pred '0.15' --clip_grad_norm 1 --emb_dim 1280 --encoder_only true --lgs 'en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am' --clm_steps '' --exp_name 'xlm_17_100_big.3' --lg_sampling_factor '0.7' --eval_only false --exp_id 16656234 --master_port 11363 --exp_id \"16656234\"", "context_size": 0, "data_path": "/private/home/aconneau/projects/XLM/data/wiki/100/175k", "debug": false, "debug_slurm": false, "debug_train": false, "dropout": 0.1, "dump_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234", "emb_dim": 1280, "embed_init_std": 0.02209708691207961, "encoder_only": true, "end_n_top": 5, "eos_index": 1, "epoch_size": 200000, "eval_bleu": false, "eval_only": false, "exp_id": "16656234", "exp_name": "xlm_17_100_big.3", "fp16": true, "gelu_activation": true, "global_rank": 0, "group_by_size": true, "hyp_path": "/checkpoint/aconneau/dumped/xlm_17_100_big.3/16656234/hypotheses", "id2label": { "NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2 }, "id2lang": { "0": "af", "1": "als", "10": "be", "11": "bg", "12": "bn", "13": "br", "14": "bs", "15": "ca", "16": "ceb", "17": "ckb", "18": "cs", "19": "cy", "2": "am", "20": "da", "21": "de", "22": "el", "23": "en", "24": "eo", "25": "es", "26": "et", "27": "eu", "28": "fa", "29": "fi", "3": "an", "30": "fr", "31": "fy", "32": "ga", "33": "gan", "34": "gl", "35": "gu", "36": "he", "37": "hi", "38": "hr", "39": "hu", "4": "ang", "40": "hy", "41": "ia", "42": "id", "43": "is", "44": "it", "45": "ja", "46": "jv", "47": "ka", "48": "kk", "49": "kn", "5": "ar", "50": "ko", "51": "ku", "52": "la", "53": "lb", "54": "lt", "55": "lv", "56": "mk", "57": "ml", "58": "mn", "59": "mr", "6": "arz", "60": "ms", "61": "my", "62": "nds", "63": "ne", "64": "nl", "65": "nn", "66": "no", "67": "oc", "68": "pl", "69": "pt", "7": "ast", "70": "ro", "71": "ru", "72": "scn", "73": "sco", "74": "sh", "75": "si", "76": "simple", "77": "sk", "78": "sl", "79": "sq", "8": "az", "80": "sr", "81": "sv", "82": "sw", "83": "ta", "84": "te", "85": "th", "86": "tl", "87": "tr", "88": "tt", "89": "uk", "9": "bar", "90": "ur", "91": "uz", "92": "vi", "93": "war", "94": "wuu", "95": "yi", "96": "zh", "97": "zh_classical", "98": "zh_min_nan", "99": "zh_yue" }, "init_std": 0.02, "is_encoder": true, "is_master": true, "is_slurm_job": true, "label2id": { "NEGATIVE": 0, "NEUTRAL": 1, "POSITIVE": 2 }, "lambda_ae": 1.0, "lambda_ae_config": null, "lambda_bt": 1.0, "lambda_bt_config": null, "lambda_clm": 1.0, "lambda_clm_config": null, "lambda_mlm": 1.0, "lambda_mlm_config": null, "lambda_mt": 1.0, "lambda_mt_config": null, "lambda_pc": 1.0, "lambda_pc_config": null, "lang2id": { "af": 0, "als": 1, "am": 2, "an": 3, "ang": 4, "ar": 5, "arz": 6, "ast": 7, "az": 8, "bar": 9, "be": 10, "bg": 11, "bn": 12, "br": 13, "bs": 14, "ca": 15, "ceb": 16, "ckb": 17, "cs": 18, "cy": 19, "da": 20, "de": 21, "el": 22, "en": 23, "eo": 24, "es": 25, "et": 26, "eu": 27, "fa": 28, "fi": 29, "fr": 30, "fy": 31, "ga": 32, "gan": 33, "gl": 34, "gu": 35, "he": 36, "hi": 37, "hr": 38, "hu": 39, "hy": 40, "ia": 41, "id": 42, "is": 43, "it": 44, "ja": 45, "jv": 46, "ka": 47, "kk": 48, "kn": 49, "ko": 50, "ku": 51, "la": 52, "lb": 53, "lt": 54, "lv": 55, "mk": 56, "ml": 57, "mn": 58, "mr": 59, "ms": 60, "my": 61, "nds": 62, "ne": 63, "nl": 64, "nn": 65, "no": 66, "oc": 67, "pl": 68, "pt": 69, "ro": 70, "ru": 71, "scn": 72, "sco": 73, "sh": 74, "si": 75, "simple": 76, "sk": 77, "sl": 78, "sq": 79, "sr": 80, "sv": 81, "sw": 82, "ta": 83, "te": 84, "th": 85, "tl": 86, "tr": 87, "tt": 88, "uk": 89, "ur": 90, "uz": 91, "vi": 92, "war": 93, "wuu": 94, "yi": 95, "zh": 96, "zh_classical": 97, "zh_min_nan": 98, "zh_yue": 99 }, "lang_id": 0, "langs": [ "en", "es", "fr", "de", "zh", "ru", "pt", "it", "ar", "ja", "id", "tr", "nl", "pl", "simple", "fa", "vi", "sv", "ko", "he", "ro", "no", "hi", "uk", "cs", "fi", "hu", "th", "da", "ca", "el", "bg", "sr", "ms", "bn", "hr", "sl", "zh_yue", "az", "sk", "eo", "ta", "sh", "lt", "et", "ml", "la", "bs", "sq", "arz", "af", "ka", "mr", "eu", "tl", "ang", "gl", "nn", "ur", "kk", "be", "hy", "te", "lv", "mk", "zh_classical", "als", "is", "wuu", "my", "sco", "mn", "ceb", "ast", "cy", "kn", "br", "an", "gu", "bar", "uz", "lb", "ne", "si", "war", "jv", "ga", "zh_min_nan", "oc", "ku", "sw", "nds", "ckb", "ia", "yi", "fy", "scn", "gan", "tt", "am" ], "layer_norm_eps": 1e-12, "lg_sampling_factor": 0.7, "lgs": "en-es-fr-de-zh-ru-pt-it-ar-ja-id-tr-nl-pl-simple-fa-vi-sv-ko-he-ro-no-hi-uk-cs-fi-hu-th-da-ca-el-bg-sr-ms-bn-hr-sl-zh_yue-az-sk-eo-ta-sh-lt-et-ml-la-bs-sq-arz-af-ka-mr-eu-tl-ang-gl-nn-ur-kk-be-hy-te-lv-mk-zh_classical-als-is-wuu-my-sco-mn-ceb-ast-cy-kn-br-an-gu-bar-uz-lb-ne-si-war-jv-ga-zh_min_nan-oc-ku-sw-nds-ckb-ia-yi-fy-scn-gan-tt-am", "local_rank": 0, "mask_index": 5, "mask_token_id": 0, "master_addr": "learnfair0332", "master_port": 11363, "max_batch_size": 0, "max_epoch": 100000, "max_len": 200, "max_position_embeddings": 512, "max_vocab": 200000, "min_count": 0, "mlm_steps": [ [ "en", null ], [ "es", null ], [ "fr", null ], [ "de", null ], [ "zh", null ], [ "ru", null ], [ "pt", null ], [ "it", null ], [ "ar", null ], [ "ja", null ], [ "id", null ], [ "tr", null ], [ "nl", null ], [ "pl", null ], [ "simple", null ], [ "fa", null ], [ "vi", null ], [ "sv", null ], [ "ko", null ], [ "he", null ], [ "ro", null ], [ "no", null ], [ "hi", null ], [ "uk", null ], [ "cs", null ], [ "fi", null ], [ "hu", null ], [ "th", null ], [ "da", null ], [ "ca", null ], [ "el", null ], [ "bg", null ], [ "sr", null ], [ "ms", null ], [ "bn", null ], [ "hr", null ], [ "sl", null ], [ "zh_yue", null ], [ "az", null ], [ "sk", null ], [ "eo", null ], [ "ta", null ], [ "sh", null ], [ "lt", null ], [ "et", null ], [ "ml", null ], [ "la", null ], [ "bs", null ], [ "sq", null ], [ "arz", null ], [ "af", null ], [ "ka", null ], [ "mr", null ], [ "eu", null ], [ "tl", null ], [ "ang", null ], [ "gl", null ], [ "nn", null ], [ "ur", null ], [ "kk", null ], [ "be", null ], [ "hy", null ], [ "te", null ], [ "lv", null ], [ "mk", null ], [ "zh_classical", null ], [ "als", null ], [ "is", null ], [ "wuu", null ], [ "my", null ], [ "sco", null ], [ "mn", null ], [ "ceb", null ], [ "ast", null ], [ "cy", null ], [ "kn", null ], [ "br", null ], [ "an", null ], [ "gu", null ], [ "bar", null ], [ "uz", null ], [ "lb", null ], [ "ne", null ], [ "si", null ], [ "war", null ], [ "jv", null ], [ "ga", null ], [ "zh_min_nan", null ], [ "oc", null ], [ "ku", null ], [ "sw", null ], [ "nds", null ], [ "ckb", null ], [ "ia", null ], [ "yi", null ], [ "fy", null ], [ "scn", null ], [ "gan", null ], [ "tt", null ], [ "am", null ] ], "model_type": "xlm", "mono_dataset": { "af": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.af.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.af.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.af.pth" }, "als": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.als.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.als.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.als.pth" }, "am": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.am.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.am.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.am.pth" }, "an": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.an.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.an.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.an.pth" }, "ang": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ang.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ang.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ang.pth" }, "ar": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ar.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ar.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ar.pth" }, "arz": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.arz.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.arz.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.arz.pth" }, "ast": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ast.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ast.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ast.pth" }, "az": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.az.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.az.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.az.pth" }, "bar": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bar.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bar.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bar.pth" }, "be": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.be.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.be.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.be.pth" }, "bg": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bg.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bg.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bg.pth" }, "bn": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bn.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bn.pth" }, "br": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.br.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.br.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.br.pth" }, "bs": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.bs.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.bs.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.bs.pth" }, "ca": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ca.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ca.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ca.pth" }, "ceb": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ceb.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ceb.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ceb.pth" }, "ckb": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ckb.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ckb.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ckb.pth" }, "cs": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cs.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cs.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cs.pth" }, "cy": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.cy.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.cy.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.cy.pth" }, "da": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.da.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.da.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.da.pth" }, "de": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.de.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.de.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.de.pth" }, "el": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.el.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.el.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.el.pth" }, "en": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.en.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.en.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.en.pth" }, "eo": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eo.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eo.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eo.pth" }, "es": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.es.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.es.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.es.pth" }, "et": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.et.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.et.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.et.pth" }, "eu": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.eu.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.eu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.eu.pth" }, "fa": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fa.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fa.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fa.pth" }, "fi": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fi.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fi.pth" }, "fr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fr.pth" }, "fy": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.fy.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.fy.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.fy.pth" }, "ga": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ga.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ga.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ga.pth" }, "gan": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gan.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gan.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gan.pth" }, "gl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gl.pth" }, "gu": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.gu.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.gu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.gu.pth" }, "he": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.he.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.he.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.he.pth" }, "hi": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hi.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hi.pth" }, "hr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hr.pth" }, "hu": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hu.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hu.pth" }, "hy": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.hy.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.hy.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.hy.pth" }, "ia": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ia.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ia.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ia.pth" }, "id": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.id.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.id.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.id.pth" }, "is": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.is.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.is.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.is.pth" }, "it": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.it.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.it.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.it.pth" }, "ja": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ja.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ja.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ja.pth" }, "jv": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.jv.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.jv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.jv.pth" }, "ka": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ka.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ka.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ka.pth" }, "kk": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kk.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kk.pth" }, "kn": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.kn.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.kn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.kn.pth" }, "ko": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ko.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ko.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ko.pth" }, "ku": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ku.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ku.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ku.pth" }, "la": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.la.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.la.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.la.pth" }, "lb": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lb.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lb.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lb.pth" }, "lt": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lt.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lt.pth" }, "lv": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.lv.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.lv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.lv.pth" }, "mk": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mk.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mk.pth" }, "ml": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ml.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ml.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ml.pth" }, "mn": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mn.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mn.pth" }, "mr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.mr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.mr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.mr.pth" }, "ms": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ms.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ms.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ms.pth" }, "my": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.my.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.my.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.my.pth" }, "nds": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nds.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nds.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nds.pth" }, "ne": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ne.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ne.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ne.pth" }, "nl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nl.pth" }, "nn": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.nn.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.nn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.nn.pth" }, "no": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.no.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.no.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.no.pth" }, "oc": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.oc.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.oc.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.oc.pth" }, "pl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pl.pth" }, "pt": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.pt.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.pt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.pt.pth" }, "ro": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ro.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ro.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ro.pth" }, "ru": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ru.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ru.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ru.pth" }, "scn": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.scn.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.scn.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.scn.pth" }, "sco": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sco.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sco.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sco.pth" }, "sh": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sh.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sh.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sh.pth" }, "si": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.si.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.si.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.si.pth" }, "simple": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.simple.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.simple.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.simple.pth" }, "sk": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sk.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sk.pth" }, "sl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sl.pth" }, "sq": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sq.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sq.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sq.pth" }, "sr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sr.pth" }, "sv": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sv.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sv.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sv.pth" }, "sw": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.sw.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.sw.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.sw.pth" }, "ta": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ta.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ta.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ta.pth" }, "te": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.te.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.te.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.te.pth" }, "th": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.th.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.th.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.th.pth" }, "tl": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tl.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tl.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tl.pth" }, "tr": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tr.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tr.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tr.pth" }, "tt": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.tt.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.tt.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.tt.pth" }, "uk": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uk.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uk.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uk.pth" }, "ur": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.ur.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.ur.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.ur.pth" }, "uz": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.uz.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.uz.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.uz.pth" }, "vi": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.vi.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.vi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.vi.pth" }, "war": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.war.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.war.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.war.pth" }, "wuu": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.wuu.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.wuu.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.wuu.pth" }, "yi": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.yi.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.yi.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.yi.pth" }, "zh": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh.pth" }, "zh_classical": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_classical.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_classical.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_classical.pth" }, "zh_min_nan": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_min_nan.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_min_nan.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_min_nan.pth" }, "zh_yue": { "test": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/test.zh_yue.pth", "train": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/train.zh_yue.pth", "valid": "/private/home/aconneau/projects/XLM/data/wiki/100/175k/valid.zh_yue.pth" } }, "mono_list": [ "en", "es", "fr", "de", "zh", "ru", "pt", "it", "ar", "ja", "id", "tr", "nl", "pl", "simple", "fa", "vi", "sv", "ko", "he", "ro", "no", "hi", "uk", "cs", "fi", "hu", "th", "da", "ca", "el", "bg", "sr", "ms", "bn", "hr", "sl", "zh_yue", "az", "sk", "eo", "ta", "sh", "lt", "et", "ml", "la", "bs", "sq", "arz", "af", "ka", "mr", "eu", "tl", "ang", "gl", "nn", "ur", "kk", "be", "hy", "te", "lv", "mk", "zh_classical", "als", "is", "wuu", "my", "sco", "mn", "ceb", "ast", "cy", "kn", "br", "an", "gu", "bar", "uz", "lb", "ne", "si", "war", "jv", "ga", "zh_min_nan", "oc", "ku", "sw", "nds", "ckb", "ia", "yi", "fy", "scn", "gan", "tt", "am" ], "mt_steps": [], "multi_gpu": true, "multi_node": true, "n_gpu_per_node": 8, "n_heads": 16, "n_langs": 100, "n_layers": 16, "n_nodes": 4, "node_id": 0, "optimizer": "adam_inverse_sqrt,lr=0.00005,warmup_updates=30000,beta1=0.9,beta2=0.999,weight_decay=0.01,eps=0.000001", "pad_index": 2, "pad_token_id": 2, "para_dataset": {}, "para_list": [], "pc_steps": [], "ref_paths": {}, "reload_checkpoint": "", "reload_emb": "", "reload_model": "/checkpoint/aconneau/dumped/xlm_17_100_240_big_model_upper.2/14884511/best-valid_zh_mlm_ppl.pth", "sample_alpha": 0.5, "save_periodic": 0, "share_inout_emb": true, "sinusoidal_embeddings": false, "split_data": true, "start_n_top": 5, "stopping_criterion": "_valid_zh_mlm_ppl,25", "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "first", "summary_use_proj": true, "tokens_per_batch": -1, "torch_dtype": "float32", "transformers_version": "4.35.0.dev0", "unk_index": 3, "use_lang_emb": false, "use_memory": false, "validation_metrics": "_valid_en_mlm_ppl,_valid_mlm_ppl,_valid_zh_mlm_ppl", "vocab_size": 200000, "word_blank": 0.0, "word_dropout": 0.0, "word_keep": 0.1, "word_mask": 0.8, "word_mask_keep_rand": "0.8,0.1,0.1", "word_pred": 0.15, "word_rand": 0.1, "word_shuffle": 0.0, "world_size": 32 }