Sync with official tokenizer_config.json
#3
by
so-anyway
- opened
- tokenizer_config.json +3 -3
tokenizer_config.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"added_tokens_decoder": {
|
3 |
"120000": {
|
4 |
"content": "<|eos|>",
|
@@ -211,10 +212,9 @@
|
|
211 |
},
|
212 |
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
|
213 |
"clean_up_tokenization_spaces": true,
|
214 |
-
"eos_token": "<|turn_end
|
215 |
-
"extra_special_tokens": {},
|
216 |
"model_max_length": 32768,
|
217 |
"pad_token": "<|pad|>",
|
218 |
"padding_side": "right",
|
219 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
220 |
-
}
|
|
|
1 |
{
|
2 |
+
"add_bos_token": false,
|
3 |
"added_tokens_decoder": {
|
4 |
"120000": {
|
5 |
"content": "<|eos|>",
|
|
|
212 |
},
|
213 |
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
|
214 |
"clean_up_tokenization_spaces": true,
|
215 |
+
"eos_token": "<|turn_end|>",
|
|
|
216 |
"model_max_length": 32768,
|
217 |
"pad_token": "<|pad|>",
|
218 |
"padding_side": "right",
|
219 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
220 |
+
}
|