jtatman commited on
Commit
43a5c76
1 Parent(s): da1a7c2

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +16 -0
  2. tokenizer.json +18 -0
  3. tokenizer_config.json +20 -0
special_tokens_map.json CHANGED
@@ -1,4 +1,20 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_end|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_start|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
  "bos_token": {
19
  "content": "<s>",
20
  "lstrip": false,
tokenizer.json CHANGED
@@ -29,6 +29,24 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "<|im_end|>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 32001,
44
+ "content": "<|im_start|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  }
51
  ],
52
  "normalizer": {
tokenizer_config.json CHANGED
@@ -25,8 +25,28 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
 
 
 
 
30
  "bos_token": "<s>",
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "</s>",
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|im_end|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|im_start|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
  }
45
  },
46
+ "additional_special_tokens": [
47
+ "<|im_end|>",
48
+ "<|im_start|>"
49
+ ],
50
  "bos_token": "<s>",
51
  "clean_up_tokenization_spaces": false,
52
  "eos_token": "</s>",