conan1024hao
commited on
Commit
•
d0eec08
1
Parent(s):
69fc5aa
support sentencepiece tokenizer
Browse files- tokenizer_config.json +1 -1
- vocab.txt +0 -0
tokenizer_config.json
CHANGED
@@ -20,6 +20,6 @@
|
|
20 |
"special_tokens_map_file": null,
|
21 |
"tokenizer_class": "BertJapaneseTokenizer",
|
22 |
"word_tokenizer_type": "jumanpp",
|
23 |
-
"subword_tokenizer_type": "
|
24 |
"jumanpp_kwargs": {}
|
25 |
}
|
|
|
20 |
"special_tokens_map_file": null,
|
21 |
"tokenizer_class": "BertJapaneseTokenizer",
|
22 |
"word_tokenizer_type": "jumanpp",
|
23 |
+
"subword_tokenizer_type": "sentencepiece",
|
24 |
"jumanpp_kwargs": {}
|
25 |
}
|
vocab.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|