The tokenizer adds a special token '<|im_end|>' to solve the problem of non-stop generation when encountering <|im_end|>.
Browse files- tokenizer.json +9 -0
tokenizer.json
CHANGED
@@ -2306,6 +2306,15 @@
|
|
2306 |
"rstrip": false,
|
2307 |
"normalized": false,
|
2308 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2309 |
}
|
2310 |
],
|
2311 |
"normalizer": null,
|
|
|
2306 |
"rstrip": false,
|
2307 |
"normalized": false,
|
2308 |
"special": true
|
2309 |
+
},
|
2310 |
+
{
|
2311 |
+
"id": 128256,
|
2312 |
+
"content": "<|im_end|>",
|
2313 |
+
"single_word": false,
|
2314 |
+
"lstrip": false,
|
2315 |
+
"rstrip": false,
|
2316 |
+
"normalized": false,
|
2317 |
+
"special": true
|
2318 |
}
|
2319 |
],
|
2320 |
"normalizer": null,
|