Megrez-3B-Omni / tokenizer_config.json
lizhiyuan
update model
a951ae0
{
"add_bos_token": false,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"120000": {
"content": "<|eos|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120001": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120002": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120003": {
"content": "<|role_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120004": {
"content": "<|role_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120005": {
"content": "<|turn_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120006": {
"content": "<|code_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120007": {
"content": "<|code_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120008": {
"content": "<|commit_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120009": {
"content": "<|commit_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120010": {
"content": "<|diff_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120011": {
"content": "<|diff_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120012": {
"content": "<|code_execution_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120013": {
"content": "<|code_execution_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120014": {
"content": "<|image_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120015": {
"content": "<|image_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120016": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120017": {
"content": "<|video_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120018": {
"content": "<|video_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120019": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120020": {
"content": "<|audio_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120021": {
"content": "<|audio_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120022": {
"content": "<|audio_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120023": {
"content": "<|function_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120024": {
"content": "<|function_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120025": {
"content": "<|slice_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120026": {
"content": "<|slice_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120027": {
"content": "<|image_id_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"120028": {
"content": "<|image_id_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"auto_map": {
"AutoProcessor": "processing_megrezo.MegrezOProcessor",
"AutoTokenizer": [
"tokenizer_wrapper.LlamaTokenizerWrapper",
null
]
},
"bos_token": null,
"chat_template": "{% set audio_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>{% endif %}<|role_start|>{{ message['role'] }}<|role_end|>{% if message['content'] is string %}{{ message['content'] }}{% else %}{% if 'image' in message['content'] %}{% if message['content']['image'] is sequence and message['content']['image'] is not string %}{% for image in message['content']['image'] %}(<image>./</image>)\n{% endfor %}{% else %}(<image>./</image>)\n{% endif %}{% endif %}{% if 'audio' in message['content'] %}{% if message['content']['audio'] is sequence and message['content']['audio'] is not string %}{% for audio in message['content']['audio'] %}{% set audio_count.value = audio_count.value + 1 %}Audio {{ audio_count.value }}: (<audio>./</audio>)\n{% endfor %}{% else %}{% set audio_count.value = audio_count.value + 1 %}Audio {{ audio_count.value }}: (<audio>./</audio>)\n{% endif %}{% endif %}{% if 'text' in message['content'] and message['content']['text'] %}{{ message['content']['text'] }}{% endif %}{% endif %}<|turn_end|>{% endfor %}{% if add_generation_prompt %}<|role_start|>assistant<|role_end|>{% endif %}",
"clean_up_tokenization_spaces": true,
"eos_token": "<|turn_end|>",
"legacy": true,
"model_max_length": 4096,
"processor_class": "MegrezOProcessor",
"tokenizer_class": "LlamaTokenizerWrapper",
"unk_token": "<|unk|>",
"pad_token": "<|pad|>",
"use_default_system_prompt": false
}