Upload tokenizer
Browse files- tokenizer.json +4 -4
- tokenizer_config.json +1 -1
tokenizer.json
CHANGED
@@ -958,8 +958,8 @@
|
|
958 |
{
|
959 |
"type": "Metaspace",
|
960 |
"replacement": "▁",
|
961 |
-
"
|
962 |
-
"
|
963 |
}
|
964 |
]
|
965 |
},
|
@@ -1020,8 +1020,8 @@
|
|
1020 |
"decoder": {
|
1021 |
"type": "Metaspace",
|
1022 |
"replacement": "▁",
|
1023 |
-
"
|
1024 |
-
"
|
1025 |
},
|
1026 |
"model": {
|
1027 |
"type": "Unigram",
|
|
|
958 |
{
|
959 |
"type": "Metaspace",
|
960 |
"replacement": "▁",
|
961 |
+
"add_prefix_space": true,
|
962 |
+
"prepend_scheme": "always"
|
963 |
}
|
964 |
]
|
965 |
},
|
|
|
1020 |
"decoder": {
|
1021 |
"type": "Metaspace",
|
1022 |
"replacement": "▁",
|
1023 |
+
"add_prefix_space": true,
|
1024 |
+
"prepend_scheme": "always"
|
1025 |
},
|
1026 |
"model": {
|
1027 |
"type": "Unigram",
|
tokenizer_config.json
CHANGED
@@ -930,7 +930,7 @@
|
|
930 |
"clean_up_tokenization_spaces": true,
|
931 |
"eos_token": "</s>",
|
932 |
"extra_ids": 100,
|
933 |
-
"model_max_length":
|
934 |
"pad_token": "<pad>",
|
935 |
"tokenizer_class": "T5Tokenizer",
|
936 |
"unk_token": "<unk>"
|
|
|
930 |
"clean_up_tokenization_spaces": true,
|
931 |
"eos_token": "</s>",
|
932 |
"extra_ids": 100,
|
933 |
+
"model_max_length": 512,
|
934 |
"pad_token": "<pad>",
|
935 |
"tokenizer_class": "T5Tokenizer",
|
936 |
"unk_token": "<unk>"
|