BUAADreamer commited on
Commit
cbb55de
1 Parent(s): 6ca20df

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -2,7 +2,14 @@
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
  "<|im_end|>",
5
- "<|im_sep|>"
 
 
 
 
 
 
 
6
  ],
7
  "bos_token": {
8
  "content": "<|startoftext|>",
 
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
  "<|im_end|>",
5
+ "<|im_sep|>",
6
+ {
7
+ "content": "###",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ }
13
  ],
14
  "bos_token": {
15
  "content": "<|startoftext|>",
tokenizer.json CHANGED
@@ -57,6 +57,15 @@
57
  "normalized": false,
58
  "special": true
59
  },
 
 
 
 
 
 
 
 
 
60
  {
61
  "id": 64000,
62
  "content": "<image>",
 
57
  "normalized": false,
58
  "special": true
59
  },
60
+ {
61
+ "id": 8308,
62
+ "content": "###",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ },
69
  {
70
  "id": 64000,
71
  "content": "<image>",
tokenizer_config.json CHANGED
@@ -51,6 +51,14 @@
51
  "single_word": false,
52
  "special": true
53
  },
 
 
 
 
 
 
 
 
54
  "64000": {
55
  "content": "<image>",
56
  "lstrip": false,
@@ -71,19 +79,21 @@
71
  "additional_special_tokens": [
72
  "<|im_start|>",
73
  "<|im_end|>",
74
- "<|im_sep|>"
 
75
  ],
76
  "bos_token": "<|startoftext|>",
77
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
78
  "clean_up_tokenization_spaces": false,
79
  "eos_token": "<|endoftext|>",
80
  "legacy": true,
81
  "model_max_length": 4096,
82
  "pad_token": "<pad>",
83
- "padding_side": "right",
84
  "processor_class": "LlavaProcessor",
85
  "sp_model_kwargs": {},
86
  "spaces_between_special_tokens": false,
 
87
  "tokenizer_class": "LlamaTokenizer",
88
  "unk_token": "<unk>",
89
  "use_default_system_prompt": true
 
51
  "single_word": false,
52
  "special": true
53
  },
54
+ "8308": {
55
+ "content": "###",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
  "64000": {
63
  "content": "<image>",
64
  "lstrip": false,
 
79
  "additional_special_tokens": [
80
  "<|im_start|>",
81
  "<|im_end|>",
82
+ "<|im_sep|>",
83
+ "###"
84
  ],
85
  "bos_token": "<|startoftext|>",
86
+ "chat_template": "{% set system_message = 'This is a chat between an inquisitive human and an AI assistant. Assume the role of the AI assistant. Read all the images carefully, and respond to the human\\'s questions with informative, helpful, detailed and polite answers.这是一个好奇的人类和一个人工智能助手之间的对话。假设你扮演这个AI助手的角色。仔细阅读所有的图像,并对人类的问题做出信息丰富、有帮助、详细的和礼貌的回答。\\n\\n' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '### Human: ' + content + '\\n### Assistant:' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content }}{% endif %}{% endfor %}",
87
  "clean_up_tokenization_spaces": false,
88
  "eos_token": "<|endoftext|>",
89
  "legacy": true,
90
  "model_max_length": 4096,
91
  "pad_token": "<pad>",
92
+ "padding_side": "left",
93
  "processor_class": "LlavaProcessor",
94
  "sp_model_kwargs": {},
95
  "spaces_between_special_tokens": false,
96
+ "split_special_tokens": false,
97
  "tokenizer_class": "LlamaTokenizer",
98
  "unk_token": "<unk>",
99
  "use_default_system_prompt": true