mt5-hihi / tokenizer_config.json
balaramas's picture
Training in progress, step 500
f21c78c verified
{
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250000": {
"content": "▁<extra_id_99>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250001": {
"content": "▁<extra_id_98>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250002": {
"content": "▁<extra_id_97>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250003": {
"content": "▁<extra_id_96>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250004": {
"content": "▁<extra_id_95>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250005": {
"content": "▁<extra_id_94>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250006": {
"content": "▁<extra_id_93>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250007": {
"content": "▁<extra_id_92>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250008": {
"content": "▁<extra_id_91>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250009": {
"content": "▁<extra_id_90>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250010": {
"content": "▁<extra_id_89>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250011": {
"content": "▁<extra_id_88>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250012": {
"content": "▁<extra_id_87>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250013": {
"content": "▁<extra_id_86>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250014": {
"content": "▁<extra_id_85>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250015": {
"content": "▁<extra_id_84>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250016": {
"content": "▁<extra_id_83>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250017": {
"content": "▁<extra_id_82>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250018": {
"content": "▁<extra_id_81>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250019": {
"content": "▁<extra_id_80>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250020": {
"content": "▁<extra_id_79>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250021": {
"content": "▁<extra_id_78>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250022": {
"content": "▁<extra_id_77>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250023": {
"content": "▁<extra_id_76>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250024": {
"content": "▁<extra_id_75>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250025": {
"content": "▁<extra_id_74>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250026": {
"content": "▁<extra_id_73>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250027": {
"content": "▁<extra_id_72>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250028": {
"content": "▁<extra_id_71>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250029": {
"content": "▁<extra_id_70>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250030": {
"content": "▁<extra_id_69>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250031": {
"content": "▁<extra_id_68>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250032": {
"content": "▁<extra_id_67>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250033": {
"content": "▁<extra_id_66>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250034": {
"content": "▁<extra_id_65>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250035": {
"content": "▁<extra_id_64>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250036": {
"content": "▁<extra_id_63>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250037": {
"content": "▁<extra_id_62>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250038": {
"content": "▁<extra_id_61>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250039": {
"content": "▁<extra_id_60>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250040": {
"content": "▁<extra_id_59>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250041": {
"content": "▁<extra_id_58>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250042": {
"content": "▁<extra_id_57>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250043": {
"content": "▁<extra_id_56>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"250044": {
"content": "▁<extra_id_55>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"▁<extra_id_64>",
"▁<extra_id_95>",
"▁<extra_id_92>",
"▁<extra_id_57>",
"▁<extra_id_66>",
"▁<extra_id_59>",
"▁<extra_id_55>",
"▁<extra_id_69>",
"▁<extra_id_89>",
"▁<extra_id_72>",
"▁<extra_id_56>",
"▁<extra_id_78>",
"▁<extra_id_90>",
"▁<extra_id_98>",
"▁<extra_id_62>",
"▁<extra_id_99>",
"▁<extra_id_70>",
"▁<extra_id_94>",
"▁<extra_id_86>",
"▁<extra_id_79>",
"▁<extra_id_58>",
"▁<extra_id_65>",
"▁<extra_id_76>",
"▁<extra_id_85>",
"▁<extra_id_60>",
"▁<extra_id_82>",
"▁<extra_id_63>",
"▁<extra_id_75>",
"▁<extra_id_71>",
"▁<extra_id_88>",
"▁<extra_id_68>",
"▁<extra_id_80>",
"▁<extra_id_96>",
"▁<extra_id_81>",
"▁<extra_id_67>",
"▁<extra_id_77>",
"▁<extra_id_93>",
"▁<extra_id_83>",
"▁<extra_id_84>",
"▁<extra_id_97>",
"▁<extra_id_61>",
"▁<extra_id_91>",
"▁<extra_id_87>",
"▁<extra_id_73>",
"▁<extra_id_74>"
],
"clean_up_tokenization_spaces": true,
"eos_token": "</s>",
"extra_ids": 0,
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"sp_model_kwargs": {},
"tokenizer_class": "T5Tokenizer",
"unk_token": "<unk>"
}