|
{ |
|
"added_tokens_decoder": { |
|
"0": { |
|
"content": "<s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"1": { |
|
"content": "<pad>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"2": { |
|
"content": "</s>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"3": { |
|
"content": "<unk>", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269484": { |
|
"content": "<mask>", |
|
"lstrip": true, |
|
"normalized": true, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269485": { |
|
"content": "ace_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269486": { |
|
"content": "ace_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269487": { |
|
"content": "acm_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269488": { |
|
"content": "acq_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269489": { |
|
"content": "aeb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269490": { |
|
"content": "afr_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269491": { |
|
"content": "ajp_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269492": { |
|
"content": "aka_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269493": { |
|
"content": "als_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269494": { |
|
"content": "amh_Ethi", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269495": { |
|
"content": "apc_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269496": { |
|
"content": "arb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269497": { |
|
"content": "ars_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269498": { |
|
"content": "ary_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269499": { |
|
"content": "arz_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269500": { |
|
"content": "asm_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269501": { |
|
"content": "ast_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269502": { |
|
"content": "awa_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269503": { |
|
"content": "ayr_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269504": { |
|
"content": "azb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269505": { |
|
"content": "azj_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269506": { |
|
"content": "bak_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269507": { |
|
"content": "bam_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269508": { |
|
"content": "ban_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269509": { |
|
"content": "bel_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269510": { |
|
"content": "bem_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269511": { |
|
"content": "ben_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269512": { |
|
"content": "bho_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269513": { |
|
"content": "bjn_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269514": { |
|
"content": "bjn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269515": { |
|
"content": "bod_Tibt", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269516": { |
|
"content": "bos_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269517": { |
|
"content": "bug_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269518": { |
|
"content": "bul_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269519": { |
|
"content": "bxr_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269520": { |
|
"content": "cat_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269521": { |
|
"content": "ceb_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269522": { |
|
"content": "ces_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269523": { |
|
"content": "cjk_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269524": { |
|
"content": "ckb_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269525": { |
|
"content": "crh_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269526": { |
|
"content": "cym_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269527": { |
|
"content": "dan_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269528": { |
|
"content": "deu_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269529": { |
|
"content": "dik_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269530": { |
|
"content": "dyu_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269531": { |
|
"content": "dzo_Tibt", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269532": { |
|
"content": "ell_Grek", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269533": { |
|
"content": "eng_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269534": { |
|
"content": "epo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269535": { |
|
"content": "est_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269536": { |
|
"content": "eus_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269537": { |
|
"content": "ewe_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269538": { |
|
"content": "fao_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269539": { |
|
"content": "fij_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269540": { |
|
"content": "fin_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269541": { |
|
"content": "fon_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269542": { |
|
"content": "fra_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269543": { |
|
"content": "fur_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269544": { |
|
"content": "fuv_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269545": { |
|
"content": "gaz_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269546": { |
|
"content": "gla_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269547": { |
|
"content": "gle_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269548": { |
|
"content": "glg_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269549": { |
|
"content": "grn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269550": { |
|
"content": "guj_Gujr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269551": { |
|
"content": "hat_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269552": { |
|
"content": "hau_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269553": { |
|
"content": "heb_Hebr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269554": { |
|
"content": "hin_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269555": { |
|
"content": "hne_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269556": { |
|
"content": "hrv_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269557": { |
|
"content": "hun_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269558": { |
|
"content": "hye_Armn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269559": { |
|
"content": "ibo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269560": { |
|
"content": "ilo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269561": { |
|
"content": "ind_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269562": { |
|
"content": "isl_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269563": { |
|
"content": "ita_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269564": { |
|
"content": "jav_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269565": { |
|
"content": "jpn_Jpan", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269566": { |
|
"content": "kab_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269567": { |
|
"content": "kac_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269568": { |
|
"content": "kam_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269569": { |
|
"content": "kan_Knda", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269570": { |
|
"content": "kas_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269571": { |
|
"content": "kas_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269572": { |
|
"content": "kat_Geor", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269573": { |
|
"content": "kaz_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269574": { |
|
"content": "kbp_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269575": { |
|
"content": "kea_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269576": { |
|
"content": "khk_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269577": { |
|
"content": "khm_Khmr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269578": { |
|
"content": "kik_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269579": { |
|
"content": "kin_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269580": { |
|
"content": "kir_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269581": { |
|
"content": "kmb_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269582": { |
|
"content": "kmr_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269583": { |
|
"content": "knc_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269584": { |
|
"content": "knc_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269585": { |
|
"content": "kon_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269586": { |
|
"content": "kor_Hang", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269587": { |
|
"content": "lao_Laoo", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269588": { |
|
"content": "lij_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269589": { |
|
"content": "lim_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269590": { |
|
"content": "lin_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269591": { |
|
"content": "lit_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269592": { |
|
"content": "lmo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269593": { |
|
"content": "ltg_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269594": { |
|
"content": "ltz_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269595": { |
|
"content": "lua_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269596": { |
|
"content": "lug_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269597": { |
|
"content": "luo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269598": { |
|
"content": "lus_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269599": { |
|
"content": "lvs_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269600": { |
|
"content": "mag_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269601": { |
|
"content": "mai_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269602": { |
|
"content": "mal_Mlym", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269603": { |
|
"content": "mar_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269604": { |
|
"content": "min_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269605": { |
|
"content": "mkd_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269606": { |
|
"content": "mlt_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269607": { |
|
"content": "mni_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269608": { |
|
"content": "mos_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269609": { |
|
"content": "mri_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269610": { |
|
"content": "mya_Mymr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269611": { |
|
"content": "nld_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269612": { |
|
"content": "nno_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269613": { |
|
"content": "nob_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269614": { |
|
"content": "npi_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269615": { |
|
"content": "nso_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269616": { |
|
"content": "nus_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269617": { |
|
"content": "nya_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269618": { |
|
"content": "oci_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269619": { |
|
"content": "ory_Orya", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269620": { |
|
"content": "pag_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269621": { |
|
"content": "pan_Guru", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269622": { |
|
"content": "pap_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269623": { |
|
"content": "pbt_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269624": { |
|
"content": "pes_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269625": { |
|
"content": "plt_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269626": { |
|
"content": "pol_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269627": { |
|
"content": "por_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269628": { |
|
"content": "prs_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269629": { |
|
"content": "quy_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269630": { |
|
"content": "ron_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269631": { |
|
"content": "run_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269632": { |
|
"content": "rus_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269633": { |
|
"content": "sag_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269634": { |
|
"content": "san_Deva", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269635": { |
|
"content": "sat_Beng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269636": { |
|
"content": "scn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269637": { |
|
"content": "shn_Mymr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269638": { |
|
"content": "sin_Sinh", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269639": { |
|
"content": "slk_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269640": { |
|
"content": "slv_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269641": { |
|
"content": "smo_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269642": { |
|
"content": "sna_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269643": { |
|
"content": "snd_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269644": { |
|
"content": "som_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269645": { |
|
"content": "sot_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269646": { |
|
"content": "spa_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269647": { |
|
"content": "srd_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269648": { |
|
"content": "srp_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269649": { |
|
"content": "ssw_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269650": { |
|
"content": "sun_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269651": { |
|
"content": "swe_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269652": { |
|
"content": "swh_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269653": { |
|
"content": "szl_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269654": { |
|
"content": "tam_Taml", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269655": { |
|
"content": "taq_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269656": { |
|
"content": "taq_Tfng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269657": { |
|
"content": "tat_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269658": { |
|
"content": "tel_Telu", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269659": { |
|
"content": "tgk_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269660": { |
|
"content": "tgl_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269661": { |
|
"content": "tha_Thai", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269662": { |
|
"content": "tir_Ethi", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269663": { |
|
"content": "tpi_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269664": { |
|
"content": "tsn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269665": { |
|
"content": "tso_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269666": { |
|
"content": "tuk_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269667": { |
|
"content": "tum_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269668": { |
|
"content": "tur_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269669": { |
|
"content": "twi_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269670": { |
|
"content": "tzm_Tfng", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269671": { |
|
"content": "uig_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269672": { |
|
"content": "ukr_Cyrl", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269673": { |
|
"content": "umb_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269674": { |
|
"content": "urd_Arab", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269675": { |
|
"content": "uzn_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269676": { |
|
"content": "vec_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269677": { |
|
"content": "vie_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269678": { |
|
"content": "war_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269679": { |
|
"content": "wol_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269680": { |
|
"content": "xho_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269681": { |
|
"content": "ydd_Hebr", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269682": { |
|
"content": "yor_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269683": { |
|
"content": "yue_Hant", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269684": { |
|
"content": "zho_Hans", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269685": { |
|
"content": "zho_Hant", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269686": { |
|
"content": "zsm_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
}, |
|
"269687": { |
|
"content": "zul_Latn", |
|
"lstrip": false, |
|
"normalized": false, |
|
"rstrip": false, |
|
"single_word": false, |
|
"special": true |
|
} |
|
}, |
|
"additional_special_tokens": [ |
|
"ace_Arab", |
|
"ace_Latn", |
|
"acm_Arab", |
|
"acq_Arab", |
|
"aeb_Arab", |
|
"afr_Latn", |
|
"ajp_Arab", |
|
"aka_Latn", |
|
"als_Latn", |
|
"amh_Ethi", |
|
"apc_Arab", |
|
"arb_Arab", |
|
"ars_Arab", |
|
"ary_Arab", |
|
"arz_Arab", |
|
"asm_Beng", |
|
"ast_Latn", |
|
"awa_Deva", |
|
"ayr_Latn", |
|
"azb_Arab", |
|
"azj_Latn", |
|
"bak_Cyrl", |
|
"bam_Latn", |
|
"ban_Latn", |
|
"bel_Cyrl", |
|
"bem_Latn", |
|
"ben_Beng", |
|
"bho_Deva", |
|
"bjn_Arab", |
|
"bjn_Latn", |
|
"bod_Tibt", |
|
"bos_Latn", |
|
"bug_Latn", |
|
"bul_Cyrl", |
|
"bxr_Cyrl", |
|
"cat_Latn", |
|
"ceb_Latn", |
|
"ces_Latn", |
|
"cjk_Latn", |
|
"ckb_Arab", |
|
"crh_Latn", |
|
"cym_Latn", |
|
"dan_Latn", |
|
"deu_Latn", |
|
"dik_Latn", |
|
"dyu_Latn", |
|
"dzo_Tibt", |
|
"ell_Grek", |
|
"eng_Latn", |
|
"epo_Latn", |
|
"est_Latn", |
|
"eus_Latn", |
|
"ewe_Latn", |
|
"fao_Latn", |
|
"fij_Latn", |
|
"fin_Latn", |
|
"fon_Latn", |
|
"fra_Latn", |
|
"fur_Latn", |
|
"fuv_Latn", |
|
"gaz_Latn", |
|
"gla_Latn", |
|
"gle_Latn", |
|
"glg_Latn", |
|
"grn_Latn", |
|
"guj_Gujr", |
|
"hat_Latn", |
|
"hau_Latn", |
|
"heb_Hebr", |
|
"hin_Deva", |
|
"hne_Deva", |
|
"hrv_Latn", |
|
"hun_Latn", |
|
"hye_Armn", |
|
"ibo_Latn", |
|
"ilo_Latn", |
|
"ind_Latn", |
|
"isl_Latn", |
|
"ita_Latn", |
|
"jav_Latn", |
|
"jpn_Jpan", |
|
"kab_Latn", |
|
"kac_Latn", |
|
"kam_Latn", |
|
"kan_Knda", |
|
"kas_Arab", |
|
"kas_Deva", |
|
"kat_Geor", |
|
"kaz_Cyrl", |
|
"kbp_Latn", |
|
"kea_Latn", |
|
"khk_Cyrl", |
|
"khm_Khmr", |
|
"kik_Latn", |
|
"kin_Latn", |
|
"kir_Cyrl", |
|
"kmb_Latn", |
|
"kmr_Latn", |
|
"knc_Arab", |
|
"knc_Latn", |
|
"kon_Latn", |
|
"kor_Hang", |
|
"lao_Laoo", |
|
"lij_Latn", |
|
"lim_Latn", |
|
"lin_Latn", |
|
"lit_Latn", |
|
"lmo_Latn", |
|
"ltg_Latn", |
|
"ltz_Latn", |
|
"lua_Latn", |
|
"lug_Latn", |
|
"luo_Latn", |
|
"lus_Latn", |
|
"lvs_Latn", |
|
"mag_Deva", |
|
"mai_Deva", |
|
"mal_Mlym", |
|
"mar_Deva", |
|
"min_Latn", |
|
"mkd_Cyrl", |
|
"mlt_Latn", |
|
"mni_Beng", |
|
"mos_Latn", |
|
"mri_Latn", |
|
"mya_Mymr", |
|
"nld_Latn", |
|
"nno_Latn", |
|
"nob_Latn", |
|
"npi_Deva", |
|
"nso_Latn", |
|
"nus_Latn", |
|
"nya_Latn", |
|
"oci_Latn", |
|
"ory_Orya", |
|
"pag_Latn", |
|
"pan_Guru", |
|
"pap_Latn", |
|
"pbt_Arab", |
|
"pes_Arab", |
|
"plt_Latn", |
|
"pol_Latn", |
|
"por_Latn", |
|
"prs_Arab", |
|
"quy_Latn", |
|
"ron_Latn", |
|
"run_Latn", |
|
"rus_Cyrl", |
|
"sag_Latn", |
|
"san_Deva", |
|
"sat_Beng", |
|
"scn_Latn", |
|
"shn_Mymr", |
|
"sin_Sinh", |
|
"slk_Latn", |
|
"slv_Latn", |
|
"smo_Latn", |
|
"sna_Latn", |
|
"snd_Arab", |
|
"som_Latn", |
|
"sot_Latn", |
|
"spa_Latn", |
|
"srd_Latn", |
|
"srp_Cyrl", |
|
"ssw_Latn", |
|
"sun_Latn", |
|
"swe_Latn", |
|
"swh_Latn", |
|
"szl_Latn", |
|
"tam_Taml", |
|
"taq_Latn", |
|
"taq_Tfng", |
|
"tat_Cyrl", |
|
"tel_Telu", |
|
"tgk_Cyrl", |
|
"tgl_Latn", |
|
"tha_Thai", |
|
"tir_Ethi", |
|
"tpi_Latn", |
|
"tsn_Latn", |
|
"tso_Latn", |
|
"tuk_Latn", |
|
"tum_Latn", |
|
"tur_Latn", |
|
"twi_Latn", |
|
"tzm_Tfng", |
|
"uig_Arab", |
|
"ukr_Cyrl", |
|
"umb_Latn", |
|
"urd_Arab", |
|
"uzn_Latn", |
|
"vec_Latn", |
|
"vie_Latn", |
|
"war_Latn", |
|
"wol_Latn", |
|
"xho_Latn", |
|
"ydd_Hebr", |
|
"yor_Latn", |
|
"yue_Hant", |
|
"zho_Hans", |
|
"zho_Hant", |
|
"zsm_Latn", |
|
"zul_Latn" |
|
], |
|
"bos_token": "<s>", |
|
"clean_up_tokenization_spaces": true, |
|
"cls_token": "<s>", |
|
"eos_token": "</s>", |
|
"legacy_behaviour": false, |
|
"mask_token": "<mask>", |
|
"model_max_length": 1024, |
|
"pad_token": "<pad>", |
|
"sep_token": "</s>", |
|
"sp_model_kwargs": {}, |
|
"src_lang": "rus_Cyrl", |
|
"tgt_lang": null, |
|
"tokenizer_class": "NllbTokenizer", |
|
"unk_token": "<unk>" |
|
} |
|
|