danielhanchen commited on
Commit
a2446d5
·
verified ·
1 Parent(s): 3e3e29f

Add files using upload-large-folder tool

Browse files
added_tokens.json CHANGED
@@ -9,6 +9,5 @@
9
  "<|tool_response|>": 200027,
10
  "<|tool|>": 200023,
11
  "<|user|>": 200021,
12
- "<|PAD▁TOKEN|>": 200030,
13
- "�": 200029
14
  }
 
9
  "<|tool_response|>": 200027,
10
  "<|tool|>": 200023,
11
  "<|user|>": 200021,
12
+ "<|PAD▁TOKEN|>": 200029
 
13
  }
special_tokens_map.json CHANGED
@@ -14,5 +14,5 @@
14
  "single_word": false
15
  },
16
  "pad_token": "<|PAD▁TOKEN|>",
17
- "unk_token": ""
18
  }
 
14
  "single_word": false
15
  },
16
  "pad_token": "<|PAD▁TOKEN|>",
17
+ "unk_token": "�"
18
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:085a99d5283b36631e74138d06b87d31eb024dd2f96f89145690edbba94cadd7
3
- size 15524471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b10016a39382ff2d24acc20a291ed83243a26c4549ab01f6240e72c6291d56
3
+ size 15524472
tokenizer_config.json CHANGED
@@ -3,6 +3,14 @@
3
  "add_eos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
 
 
 
 
 
 
 
 
6
  "199999": {
7
  "content": "<|endoftext|>",
8
  "lstrip": false,
@@ -100,14 +108,6 @@
100
  "special": true
101
  },
102
  "200029": {
103
- "content": "�",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "200030": {
111
  "content": "<|PAD▁TOKEN|>",
112
  "lstrip": false,
113
  "normalized": false,
@@ -117,7 +117,7 @@
117
  }
118
  },
119
  "bos_token": "<|endoftext|>",
120
- "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
@@ -125,5 +125,5 @@
125
  "pad_token": "<|PAD▁TOKEN|>",
126
  "padding_side": "left",
127
  "tokenizer_class": "GPT2Tokenizer",
128
- "unk_token": ""
129
  }
 
3
  "add_eos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
+ "3251": {
7
+ "content": "�",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
  "199999": {
15
  "content": "<|endoftext|>",
16
  "lstrip": false,
 
108
  "special": true
109
  },
110
  "200029": {
 
 
 
 
 
 
 
 
111
  "content": "<|PAD▁TOKEN|>",
112
  "lstrip": false,
113
  "normalized": false,
 
117
  }
118
  },
119
  "bos_token": "<|endoftext|>",
120
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% endif %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
 
125
  "pad_token": "<|PAD▁TOKEN|>",
126
  "padding_side": "left",
127
  "tokenizer_class": "GPT2Tokenizer",
128
+ "unk_token": "�"
129
  }