rrjin commited on
Commit
e698fdb
1 Parent(s): 58a878d

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -24
tokenizer_config.json CHANGED
@@ -8,34 +8,14 @@
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
- },
12
- "250680": {
13
- "content": "<|im_start|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "250681": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
  }
28
  },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": "<|im_start|>",
34
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": true,
36
- "eos_token": "<|im_end|>",
37
  "model_max_length": 1024,
38
- "pad_token": "<|im_end|>",
39
  "tokenizer_class": "GPT2Tokenizer",
40
  "unk_token": "<|endoftext|>"
41
  }
 
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
12
  },
13
+ "bos_token": "<|endoftext|>",
14
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{{ system_message }}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% elif (message['role'] == 'assistant') != (loop.index0 % 2 == 1) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{'\n\n' + 'Human: ' + message['content']}}{% elif message['role'] == 'assistant' %}{{'\n\n' + 'Assistant: ' + message['content']}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] == 'user' %}{{ '\n\n' + 'Assistant:' }}{% endif %}",
 
 
 
 
15
  "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
  "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
  "tokenizer_class": "GPT2Tokenizer",
20
  "unk_token": "<|endoftext|>"
21
  }