finiteautomata commited on
Commit
924fc4c
1 Parent(s): 2be86ba

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1 +1,3 @@
1
- {"<mask>": 64000}
 
 
 
1
+ {
2
+ "<mask>": 64000
3
+ }
special_tokens_map.json CHANGED
@@ -1 +1,9 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": "<mask>"}
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
tokenizer_config.json CHANGED
@@ -1 +1,14 @@
1
- {"normalization": false, "bos_token": "<s>", "eos_token": "</s>", "sep_token": "</s>", "cls_token": "<s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "model_max_length": 128, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "vinai/bertweet-base"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "model_max_length": 128,
7
+ "name_or_path": "vinai/bertweet-base",
8
+ "normalization": false,
9
+ "pad_token": "<pad>",
10
+ "sep_token": "</s>",
11
+ "special_tokens_map_file": null,
12
+ "tokenizer_class": "BertweetTokenizer",
13
+ "unk_token": "<unk>"
14
+ }