so-anyway commited on
Commit
7d452dc
1 Parent(s): b28973c

Sync with official tokenizer_config.json

Browse files

Hi,
The official ``tokenizer_config.json`` got updated and I was wondering if you could update it here too?
It has some fixes in it. Thank you.

Files changed (1) hide show
  1. tokenizer_config.json +3 -3
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "120000": {
4
  "content": "<|eos|>",
@@ -211,10 +212,9 @@
211
  },
212
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
213
  "clean_up_tokenization_spaces": true,
214
- "eos_token": "<|turn_end>",
215
- "extra_special_tokens": {},
216
  "model_max_length": 32768,
217
  "pad_token": "<|pad|>",
218
  "padding_side": "right",
219
  "tokenizer_class": "PreTrainedTokenizerFast"
220
- }
 
1
  {
2
+ "add_bos_token": false,
3
  "added_tokens_decoder": {
4
  "120000": {
5
  "content": "<|eos|>",
 
212
  },
213
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
214
  "clean_up_tokenization_spaces": true,
215
+ "eos_token": "<|turn_end|>",
 
216
  "model_max_length": 32768,
217
  "pad_token": "<|pad|>",
218
  "padding_side": "right",
219
  "tokenizer_class": "PreTrainedTokenizerFast"
220
+ }