ZhankuiHe commited on
Commit
06bb141
·
verified ·
1 Parent(s): da2c971

Upload tokenizer

Browse files
dialog/special_tokens_map.json CHANGED
@@ -1,13 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<item>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- }
10
- ],
11
  "bos_token": {
12
  "content": "<|endoftext|>",
13
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<|endoftext|>",
4
  "lstrip": false,
dialog/tokenizer.json CHANGED
@@ -18,8 +18,8 @@
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  }
24
  ],
25
  "normalizer": null,
 
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": true,
22
+ "special": false
23
  }
24
  ],
25
  "normalizer": null,
dialog/tokenizer_config.json CHANGED
@@ -13,15 +13,12 @@
13
  "50257": {
14
  "content": "<item>",
15
  "lstrip": false,
16
- "normalized": false,
17
  "rstrip": false,
18
  "single_word": false,
19
- "special": true
20
  }
21
  },
22
- "additional_special_tokens": [
23
- "<item>"
24
- ],
25
  "bos_token": "<|endoftext|>",
26
  "chat_template": "{% for message in messages %}{{ message.role + '\n' + message.content | replace('<e>', '') | replace('</e>', '') | replace('_', ' ') }}{{ eos_token }}{% endfor %}{{ 'assistant\n' }}",
27
  "clean_up_tokenization_spaces": true,
 
13
  "50257": {
14
  "content": "<item>",
15
  "lstrip": false,
16
+ "normalized": true,
17
  "rstrip": false,
18
  "single_word": false,
19
+ "special": false
20
  }
21
  },
 
 
 
22
  "bos_token": "<|endoftext|>",
23
  "chat_template": "{% for message in messages %}{{ message.role + '\n' + message.content | replace('<e>', '') | replace('</e>', '') | replace('_', ' ') }}{{ eos_token }}{% endfor %}{{ 'assistant\n' }}",
24
  "clean_up_tokenization_spaces": true,
word/special_tokens_map.json CHANGED
@@ -1,13 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<item>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- }
10
- ],
11
  "bos_token": "<s>",
12
  "cls_token": "<s>",
13
  "eos_token": "</s>",
 
1
  {
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "cls_token": "<s>",
4
  "eos_token": "</s>",
word/tokenizer.json CHANGED
@@ -54,8 +54,8 @@
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  }
60
  ],
61
  "normalizer": null,
 
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
+ "normalized": true,
58
+ "special": false
59
  }
60
  ],
61
  "normalizer": null,
word/tokenizer_config.json CHANGED
@@ -44,15 +44,12 @@
44
  "50265": {
45
  "content": "<item>",
46
  "lstrip": false,
47
- "normalized": false,
48
  "rstrip": false,
49
  "single_word": false,
50
- "special": true
51
  }
52
  },
53
- "additional_special_tokens": [
54
- "<item>"
55
- ],
56
  "bos_token": "<s>",
57
  "chat_template": "{% for message in messages %}{{ message.role + '\n' + message.content | replace('<e>', '') | replace('</e>', '') | replace('_', ' ') }}{{ eos_token }}{% endfor %}",
58
  "clean_up_tokenization_spaces": true,
 
44
  "50265": {
45
  "content": "<item>",
46
  "lstrip": false,
47
+ "normalized": true,
48
  "rstrip": false,
49
  "single_word": false,
50
+ "special": false
51
  }
52
  },
 
 
 
53
  "bos_token": "<s>",
54
  "chat_template": "{% for message in messages %}{{ message.role + '\n' + message.content | replace('<e>', '') | replace('</e>', '') | replace('_', ' ') }}{{ eos_token }}{% endfor %}",
55
  "clean_up_tokenization_spaces": true,