er-test / tokenizer_config.json
joheras's picture
Upload tokenizer
6fb0053 verified
{
"added_tokens_decoder": {
"0": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"100": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"101": {
"content": "[CLS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"102": {
"content": "[SEP]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"103": {
"content": "[MASK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"119547": {
"content": "<S:PHY>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119548": {
"content": "</S:PHY>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119549": {
"content": "<O:PHY>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119550": {
"content": "</O:PHY>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119551": {
"content": "<S:DEV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119552": {
"content": "</S:DEV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119553": {
"content": "<O:DEV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119554": {
"content": "</O:DEV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119555": {
"content": "<S:CHE>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119556": {
"content": "</S:CHE>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119557": {
"content": "<O:CHE>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119558": {
"content": "</O:CHE>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119559": {
"content": "<S:Qua>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119560": {
"content": "</S:Qua>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119561": {
"content": "<O:Qua>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119562": {
"content": "</O:Qua>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119563": {
"content": "<S:CON>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119564": {
"content": "</S:CON>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119565": {
"content": "<O:CON>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119566": {
"content": "</O:CON>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119567": {
"content": "<S:Dos>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119568": {
"content": "</S:Dos>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119569": {
"content": "<O:Dos>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119570": {
"content": "</O:Dos>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119571": {
"content": "<S:Foo>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119572": {
"content": "</S:Foo>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119573": {
"content": "<O:Foo>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119574": {
"content": "</O:Foo>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119575": {
"content": "<S:Age>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119576": {
"content": "</S:Age>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119577": {
"content": "<O:Age>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119578": {
"content": "</O:Age>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119579": {
"content": "<S:Rou>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119580": {
"content": "</S:Rou>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119581": {
"content": "<O:Rou>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119582": {
"content": "</O:Rou>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119583": {
"content": "<S:LIV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119584": {
"content": "</S:LIV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119585": {
"content": "<O:LIV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119586": {
"content": "</O:LIV>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119587": {
"content": "<S:Obs>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119588": {
"content": "</S:Obs>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119589": {
"content": "<O:Obs>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119590": {
"content": "</O:Obs>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119591": {
"content": "<S:GEN>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119592": {
"content": "</S:GEN>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119593": {
"content": "<O:GEN>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119594": {
"content": "</O:GEN>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119595": {
"content": "<S:Fre>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119596": {
"content": "</S:Fre>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119597": {
"content": "<O:Fre>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119598": {
"content": "</O:Fre>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119599": {
"content": "<S:PRO>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119600": {
"content": "</S:PRO>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119601": {
"content": "<O:PRO>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119602": {
"content": "</O:PRO>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119603": {
"content": "<S:For>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119604": {
"content": "</S:For>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119605": {
"content": "<O:For>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119606": {
"content": "</O:For>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119607": {
"content": "<S:Dur>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119608": {
"content": "</S:Dur>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119609": {
"content": "<O:Dur>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119610": {
"content": "</O:Dur>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119611": {
"content": "<S:DIS>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119612": {
"content": "</S:DIS>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119613": {
"content": "<O:DIS>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119614": {
"content": "</O:DIS>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119615": {
"content": "<S:ANA>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119616": {
"content": "</S:ANA>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119617": {
"content": "<O:ANA>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"119618": {
"content": "</O:ANA>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"clean_up_tokenization_spaces": true,
"cls_token": "[CLS]",
"do_lower_case": false,
"mask_token": "[MASK]",
"model_max_length": 512,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"strip_accents": null,
"tokenize_chinese_chars": true,
"tokenizer_class": "BertTokenizer",
"unk_token": "[UNK]"
}