add tokenizer
Browse files- added_tokens.json +1 -3
- special_tokens_map.json +1 -9
- tokenizer_config.json +1 -16
added_tokens.json
CHANGED
@@ -1,3 +1 @@
|
|
1 |
-
{
|
2 |
-
"[MASK]": 128000
|
3 |
-
}
|
|
|
1 |
+
{"[MASK]": 128000}
|
|
|
|
special_tokens_map.json
CHANGED
@@ -1,9 +1 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "[CLS]",
|
3 |
-
"cls_token": "[CLS]",
|
4 |
-
"eos_token": "[SEP]",
|
5 |
-
"mask_token": "[MASK]",
|
6 |
-
"pad_token": "[PAD]",
|
7 |
-
"sep_token": "[SEP]",
|
8 |
-
"unk_token": "[UNK]"
|
9 |
-
}
|
|
|
1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer_config.json
CHANGED
@@ -1,16 +1 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "[CLS]",
|
3 |
-
"cls_token": "[CLS]",
|
4 |
-
"do_lower_case": false,
|
5 |
-
"eos_token": "[SEP]",
|
6 |
-
"mask_token": "[MASK]",
|
7 |
-
"name_or_path": "deberta-v3-large-wnut2017",
|
8 |
-
"pad_token": "[PAD]",
|
9 |
-
"sep_token": "[SEP]",
|
10 |
-
"sp_model_kwargs": {},
|
11 |
-
"special_tokens_map_file": null,
|
12 |
-
"split_by_punct": false,
|
13 |
-
"tokenizer_class": "DebertaV2Tokenizer",
|
14 |
-
"unk_token": "[UNK]",
|
15 |
-
"vocab_type": "spm"
|
16 |
-
}
|
|
|
1 |
+
{"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "sp_model_kwargs": {}, "name_or_path": "deberta-v3-large-wnut2017", "special_tokens_map_file": null, "vocab_type": "spm", "tokenizer_file": "deberta-v3-large-wnut2017/tokenizer.json", "tokenizer_class": "DebertaV2Tokenizer"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|