ITT-AF-PLM-1.4B_v0.2 / tokenizer_config.json
ITT-AF's picture
Upload tokenizer
0c86a0e verified
raw
history blame
9.82 kB
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<||bos||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<||pad||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<||unk||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<||unused1||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "<||unused2||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<||unused3||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "<||unused4||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "<||unused5||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "<||unused6||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"10": {
"content": "<||unused7||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"11": {
"content": "<||unused8||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"12": {
"content": "<||unused9||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"13": {
"content": "<||unused10||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"14": {
"content": "<||unused11||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"15": {
"content": "<||unused12||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"16": {
"content": "<||unused13||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"17": {
"content": "<||unused14||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"18": {
"content": "<||unused15||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"19": {
"content": "<||unused16||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"20": {
"content": "<||unused17||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"21": {
"content": "<||unused18||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"22": {
"content": "<||unused19||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"23": {
"content": "<||unused20||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"24": {
"content": "<||unused21||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"25": {
"content": "<||unused22||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"26": {
"content": "<||unused23||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"27": {
"content": "<||unused24||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"28": {
"content": "<||unused25||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"29": {
"content": "<||unused26||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"30": {
"content": "<||unused27||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"31": {
"content": "<||unused28||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32": {
"content": "<||unused29||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"33": {
"content": "<||unused30||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"34": {
"content": "<||unused31||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"35": {
"content": "<||unused32||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"36": {
"content": "<||unused33||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"37": {
"content": "<||unused34||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"38": {
"content": "<||unused35||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"39": {
"content": "<||unused36||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"40": {
"content": "<||unused37||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"41": {
"content": "<||unused38||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"42": {
"content": "<||unused39||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"43": {
"content": "<||unused40||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"44": {
"content": "<||unused41||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"45": {
"content": "<||unused42||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46": {
"content": "<||unused43||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"47": {
"content": "<||unused44||>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<||unused1||>",
"<||unused2||>",
"<||unused3||>",
"<||unused4||>",
"<||unused5||>",
"<||unused6||>",
"<||unused7||>",
"<||unused8||>",
"<||unused9||>",
"<||unused10||>",
"<||unused11||>",
"<||unused12||>",
"<||unused13||>",
"<||unused14||>",
"<||unused15||>",
"<||unused16||>",
"<||unused17||>",
"<||unused18||>",
"<||unused19||>",
"<||unused20||>",
"<||unused21||>",
"<||unused22||>",
"<||unused23||>",
"<||unused24||>",
"<||unused25||>",
"<||unused26||>",
"<||unused27||>",
"<||unused28||>",
"<||unused29||>",
"<||unused30||>",
"<||unused31||>",
"<||unused32||>",
"<||unused33||>",
"<||unused34||>",
"<||unused35||>",
"<||unused36||>",
"<||unused37||>",
"<||unused38||>",
"<||unused39||>",
"<||unused40||>",
"<||unused41||>",
"<||unused42||>",
"<||unused43||>",
"<||unused44||>"
],
"bos_token": "<||bos||>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 8192,
"pad_token": "<||pad||>",
"padding_side": "right",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<||unk||>"
}