{ | |
"add_prefix_space": true, | |
"added_tokens_decoder": { | |
"0": { | |
"content": "<pad>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"1": { | |
"content": "<unk>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"2": { | |
"content": "<s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"3": { | |
"content": "</s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256001": { | |
"content": "__ace__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256002": { | |
"content": "__ace_Latn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256003": { | |
"content": "__acm__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256004": { | |
"content": "__acq__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256005": { | |
"content": "__aeb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256006": { | |
"content": "__afr__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256007": { | |
"content": "__ajp__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256008": { | |
"content": "__aka__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256009": { | |
"content": "__amh__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256010": { | |
"content": "__apc__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256011": { | |
"content": "__arb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256012": { | |
"content": "__ars__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256013": { | |
"content": "__ary__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256014": { | |
"content": "__arz__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256015": { | |
"content": "__asm__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256016": { | |
"content": "__ast__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256017": { | |
"content": "__awa__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256018": { | |
"content": "__ayr__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256019": { | |
"content": "__azb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256020": { | |
"content": "__azj__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256021": { | |
"content": "__bak__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256022": { | |
"content": "__bam__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256023": { | |
"content": "__ban__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256024": { | |
"content": "__bel__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256025": { | |
"content": "__bem__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256026": { | |
"content": "__ben__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256027": { | |
"content": "__bho__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256028": { | |
"content": "__bjn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256029": { | |
"content": "__bjn_Latn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256030": { | |
"content": "__bod__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256031": { | |
"content": "__bos__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256032": { | |
"content": "__bug__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256033": { | |
"content": "__bul__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256034": { | |
"content": "__cat__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256035": { | |
"content": "__ceb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256036": { | |
"content": "__ces__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256037": { | |
"content": "__cjk__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256038": { | |
"content": "__ckb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256039": { | |
"content": "__crh__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256040": { | |
"content": "__cym__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256041": { | |
"content": "__dan__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256042": { | |
"content": "__deu__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256043": { | |
"content": "__dik__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256044": { | |
"content": "__dyu__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256045": { | |
"content": "__dzo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256046": { | |
"content": "__ell__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256047": { | |
"content": "__eng__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256048": { | |
"content": "__epo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256049": { | |
"content": "__est__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256050": { | |
"content": "__eus__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256051": { | |
"content": "__ewe__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256052": { | |
"content": "__fao__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256053": { | |
"content": "__pes__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256054": { | |
"content": "__fij__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256055": { | |
"content": "__fin__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256056": { | |
"content": "__fon__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256057": { | |
"content": "__fra__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256058": { | |
"content": "__fur__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256059": { | |
"content": "__fuv__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256060": { | |
"content": "__gla__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256061": { | |
"content": "__gle__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256062": { | |
"content": "__glg__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256063": { | |
"content": "__grn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256064": { | |
"content": "__guj__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256065": { | |
"content": "__hat__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256066": { | |
"content": "__hau__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256067": { | |
"content": "__heb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256068": { | |
"content": "__hin__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256069": { | |
"content": "__hne__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256070": { | |
"content": "__hrv__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256071": { | |
"content": "__hun__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256072": { | |
"content": "__hye__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256073": { | |
"content": "__ibo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256074": { | |
"content": "__ilo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256075": { | |
"content": "__ind__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256076": { | |
"content": "__isl__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256077": { | |
"content": "__ita__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256078": { | |
"content": "__jav__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256079": { | |
"content": "__jpn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256080": { | |
"content": "__kab__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256081": { | |
"content": "__kac__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256082": { | |
"content": "__kam__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256083": { | |
"content": "__kan__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256084": { | |
"content": "__kas__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256085": { | |
"content": "__kas_Deva__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256086": { | |
"content": "__kat__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256087": { | |
"content": "__knc__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256088": { | |
"content": "__knc_Latn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256089": { | |
"content": "__kaz__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256090": { | |
"content": "__kbp__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256091": { | |
"content": "__kea__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256092": { | |
"content": "__khm__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256093": { | |
"content": "__kik__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256094": { | |
"content": "__kin__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256095": { | |
"content": "__kir__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256096": { | |
"content": "__kmb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256097": { | |
"content": "__kon__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256098": { | |
"content": "__kor__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256099": { | |
"content": "__kmr__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256100": { | |
"content": "__lao__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256101": { | |
"content": "__lvs__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256102": { | |
"content": "__lij__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256103": { | |
"content": "__lim__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256104": { | |
"content": "__lin__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256105": { | |
"content": "__lit__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256106": { | |
"content": "__lmo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256107": { | |
"content": "__ltg__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256108": { | |
"content": "__ltz__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256109": { | |
"content": "__lua__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256110": { | |
"content": "__lug__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256111": { | |
"content": "__luo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256112": { | |
"content": "__lus__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256113": { | |
"content": "__mag__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256114": { | |
"content": "__mai__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256115": { | |
"content": "__mal__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256116": { | |
"content": "__mar__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256117": { | |
"content": "__min__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256118": { | |
"content": "__mkd__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256119": { | |
"content": "__plt__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256120": { | |
"content": "__mlt__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256121": { | |
"content": "__mni__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256122": { | |
"content": "__khk__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256123": { | |
"content": "__mos__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256124": { | |
"content": "__mri__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256125": { | |
"content": "__zsm__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256126": { | |
"content": "__mya__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256127": { | |
"content": "__nld__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256128": { | |
"content": "__nno__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256129": { | |
"content": "__nob__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256130": { | |
"content": "__npi__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256131": { | |
"content": "__nso__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256132": { | |
"content": "__nus__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256133": { | |
"content": "__nya__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256134": { | |
"content": "__oci__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256135": { | |
"content": "__gaz__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256136": { | |
"content": "__ory__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256137": { | |
"content": "__pag__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256138": { | |
"content": "__pan__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256139": { | |
"content": "__pap__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256140": { | |
"content": "__pol__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256141": { | |
"content": "__por__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256142": { | |
"content": "__prs__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256143": { | |
"content": "__pbt__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256144": { | |
"content": "__quy__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256145": { | |
"content": "__ron__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256146": { | |
"content": "__run__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256147": { | |
"content": "__rus__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256148": { | |
"content": "__sag__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256149": { | |
"content": "__san__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256150": { | |
"content": "__sat__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256151": { | |
"content": "__scn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256152": { | |
"content": "__shn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256153": { | |
"content": "__sin__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256154": { | |
"content": "__slk__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256155": { | |
"content": "__slv__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256156": { | |
"content": "__smo__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256157": { | |
"content": "__sna__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256158": { | |
"content": "__snd__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256159": { | |
"content": "__som__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256160": { | |
"content": "__sot__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256161": { | |
"content": "__spa__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256162": { | |
"content": "__als__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256163": { | |
"content": "__srd__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256164": { | |
"content": "__srp__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256165": { | |
"content": "__ssw__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256166": { | |
"content": "__sun__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256167": { | |
"content": "__swe__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256168": { | |
"content": "__swh__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256169": { | |
"content": "__szl__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256170": { | |
"content": "__tam__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256171": { | |
"content": "__tat__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256172": { | |
"content": "__tel__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256173": { | |
"content": "__tgk__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256174": { | |
"content": "__tgl__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256175": { | |
"content": "__tha__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256176": { | |
"content": "__tir__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256177": { | |
"content": "__taq__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256178": { | |
"content": "__taq_Tfng__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256179": { | |
"content": "__tpi__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256180": { | |
"content": "__tsn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256181": { | |
"content": "__tso__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256182": { | |
"content": "__tuk__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256183": { | |
"content": "__tum__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256184": { | |
"content": "__tur__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256185": { | |
"content": "__twi__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256186": { | |
"content": "__tzm__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256187": { | |
"content": "__uig__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256188": { | |
"content": "__ukr__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256189": { | |
"content": "__umb__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256190": { | |
"content": "__urd__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256191": { | |
"content": "__uzn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256192": { | |
"content": "__vec__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256193": { | |
"content": "__vie__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256194": { | |
"content": "__war__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256195": { | |
"content": "__wol__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256196": { | |
"content": "__xho__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256197": { | |
"content": "__ydd__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256198": { | |
"content": "__yor__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256199": { | |
"content": "__yue__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256200": { | |
"content": "__cmn__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256201": { | |
"content": "__cmn_Hant__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
}, | |
"256202": { | |
"content": "__zul__", | |
"lstrip": true, | |
"normalized": false, | |
"rstrip": true, | |
"single_word": false, | |
"special": true | |
} | |
}, | |
"additional_special_tokens": [ | |
"<pad>", | |
"<unk>", | |
"<s>", | |
"</s>", | |
"__ace__", | |
"__ace_Latn__", | |
"__acm__", | |
"__acq__", | |
"__aeb__", | |
"__afr__", | |
"__ajp__", | |
"__aka__", | |
"__amh__", | |
"__apc__", | |
"__arb__", | |
"__ars__", | |
"__ary__", | |
"__arz__", | |
"__asm__", | |
"__ast__", | |
"__awa__", | |
"__ayr__", | |
"__azb__", | |
"__azj__", | |
"__bak__", | |
"__bam__", | |
"__ban__", | |
"__bel__", | |
"__bem__", | |
"__ben__", | |
"__bho__", | |
"__bjn__", | |
"__bjn_Latn__", | |
"__bod__", | |
"__bos__", | |
"__bug__", | |
"__bul__", | |
"__cat__", | |
"__ceb__", | |
"__ces__", | |
"__cjk__", | |
"__ckb__", | |
"__crh__", | |
"__cym__", | |
"__dan__", | |
"__deu__", | |
"__dik__", | |
"__dyu__", | |
"__dzo__", | |
"__ell__", | |
"__eng__", | |
"__epo__", | |
"__est__", | |
"__eus__", | |
"__ewe__", | |
"__fao__", | |
"__pes__", | |
"__fij__", | |
"__fin__", | |
"__fon__", | |
"__fra__", | |
"__fur__", | |
"__fuv__", | |
"__gla__", | |
"__gle__", | |
"__glg__", | |
"__grn__", | |
"__guj__", | |
"__hat__", | |
"__hau__", | |
"__heb__", | |
"__hin__", | |
"__hne__", | |
"__hrv__", | |
"__hun__", | |
"__hye__", | |
"__ibo__", | |
"__ilo__", | |
"__ind__", | |
"__isl__", | |
"__ita__", | |
"__jav__", | |
"__jpn__", | |
"__kab__", | |
"__kac__", | |
"__kam__", | |
"__kan__", | |
"__kas__", | |
"__kas_Deva__", | |
"__kat__", | |
"__knc__", | |
"__knc_Latn__", | |
"__kaz__", | |
"__kbp__", | |
"__kea__", | |
"__khm__", | |
"__kik__", | |
"__kin__", | |
"__kir__", | |
"__kmb__", | |
"__kon__", | |
"__kor__", | |
"__kmr__", | |
"__lao__", | |
"__lvs__", | |
"__lij__", | |
"__lim__", | |
"__lin__", | |
"__lit__", | |
"__lmo__", | |
"__ltg__", | |
"__ltz__", | |
"__lua__", | |
"__lug__", | |
"__luo__", | |
"__lus__", | |
"__mag__", | |
"__mai__", | |
"__mal__", | |
"__mar__", | |
"__min__", | |
"__mkd__", | |
"__plt__", | |
"__mlt__", | |
"__mni__", | |
"__khk__", | |
"__mos__", | |
"__mri__", | |
"__zsm__", | |
"__mya__", | |
"__nld__", | |
"__nno__", | |
"__nob__", | |
"__npi__", | |
"__nso__", | |
"__nus__", | |
"__nya__", | |
"__oci__", | |
"__gaz__", | |
"__ory__", | |
"__pag__", | |
"__pan__", | |
"__pap__", | |
"__pol__", | |
"__por__", | |
"__prs__", | |
"__pbt__", | |
"__quy__", | |
"__ron__", | |
"__run__", | |
"__rus__", | |
"__sag__", | |
"__san__", | |
"__sat__", | |
"__scn__", | |
"__shn__", | |
"__sin__", | |
"__slk__", | |
"__slv__", | |
"__smo__", | |
"__sna__", | |
"__snd__", | |
"__som__", | |
"__sot__", | |
"__spa__", | |
"__als__", | |
"__srd__", | |
"__srp__", | |
"__ssw__", | |
"__sun__", | |
"__swe__", | |
"__swh__", | |
"__szl__", | |
"__tam__", | |
"__tat__", | |
"__tel__", | |
"__tgk__", | |
"__tgl__", | |
"__tha__", | |
"__tir__", | |
"__taq__", | |
"__taq_Tfng__", | |
"__tpi__", | |
"__tsn__", | |
"__tso__", | |
"__tuk__", | |
"__tum__", | |
"__tur__", | |
"__twi__", | |
"__tzm__", | |
"__uig__", | |
"__ukr__", | |
"__umb__", | |
"__urd__", | |
"__uzn__", | |
"__vec__", | |
"__vie__", | |
"__war__", | |
"__wol__", | |
"__xho__", | |
"__ydd__", | |
"__yor__", | |
"__yue__", | |
"__cmn__", | |
"__cmn_Hant__", | |
"__zul__" | |
], | |
"bos_token": "<s>", | |
"clean_up_tokenization_spaces": true, | |
"cls_token": "<s>", | |
"eos_token": "</s>", | |
"model_max_length": 1000000000000000019884624838656, | |
"pad_token": "<pad>", | |
"processor_class": "SeamlessM4TProcessor", | |
"sep_token": "</s>", | |
"sp_model_kwargs": {}, | |
"src_lang": "__dan__", | |
"tgt_lang": "__fra__", | |
"tokenizer_class": "SeamlessM4TTokenizer", | |
"unk_token": "<unk>" | |
} | |