{ | |
"do_lower_case":true, | |
"remove_space":true, | |
"keep_accents":true, | |
"bos_token": "[CLS]", | |
"eos_token": "[SEP]", | |
"unk_token": "<unk>", | |
"sep_token": "[SEP]", | |
"pad_token": "<pad>", | |
"cls_token": "[CLS]", | |
"mask_token":{ | |
"content":"[MASK]", | |
"single_word":false, | |
"lstrip":true, | |
"rstrip":false, | |
"normalized":false, | |
"__type":"AddedToken" | |
}, | |
"tokenize_chinese_chars":false, | |
"tokenizer_class": "DistilBertJapaneseTokenizer", | |
"word_tokenizer_type": "mecab", | |
"subword_tokenizer_type": "sentencepiece", | |
"mecab_kwargs": { | |
"mecab_dic": "unidic_lite" | |
}, | |
"auto_map": { | |
"AutoTokenizer": [ | |
"distilbert_japanese_tokenizer.DistilBertJapaneseTokenizer", | |
null | |
] | |
} | |
} |