Upload tokenizer
Browse files- tokenizer.json +7 -16
tokenizer.json
CHANGED
@@ -6943,23 +6943,13 @@
|
|
6943 |
"special": true
|
6944 |
}
|
6945 |
],
|
6946 |
-
"normalizer":
|
6947 |
-
|
6948 |
-
"
|
6949 |
-
|
6950 |
-
|
6951 |
-
|
6952 |
-
},
|
6953 |
-
{
|
6954 |
-
"type": "Replace",
|
6955 |
-
"pattern": {
|
6956 |
-
"String": " "
|
6957 |
-
},
|
6958 |
-
"content": "▁"
|
6959 |
-
}
|
6960 |
-
]
|
6961 |
},
|
6962 |
-
"pre_tokenizer": null,
|
6963 |
"post_processor": {
|
6964 |
"type": "TemplateProcessing",
|
6965 |
"single": [
|
@@ -7046,6 +7036,7 @@
|
|
7046 |
"end_of_word_suffix": null,
|
7047 |
"fuse_unk": true,
|
7048 |
"byte_fallback": true,
|
|
|
7049 |
"vocab": {
|
7050 |
"<unk>": 0,
|
7051 |
"<s>": 1,
|
|
|
6943 |
"special": true
|
6944 |
}
|
6945 |
],
|
6946 |
+
"normalizer": null,
|
6947 |
+
"pre_tokenizer": {
|
6948 |
+
"type": "Metaspace",
|
6949 |
+
"replacement": "▁",
|
6950 |
+
"prepend_scheme": "first",
|
6951 |
+
"split": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6952 |
},
|
|
|
6953 |
"post_processor": {
|
6954 |
"type": "TemplateProcessing",
|
6955 |
"single": [
|
|
|
7036 |
"end_of_word_suffix": null,
|
7037 |
"fuse_unk": true,
|
7038 |
"byte_fallback": true,
|
7039 |
+
"ignore_merges": false,
|
7040 |
"vocab": {
|
7041 |
"<unk>": 0,
|
7042 |
"<s>": 1,
|