caiom commited on
Commit
e572a5e
1 Parent(s): 2fcbc9b

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,14 +1,15 @@
1
  {
2
- "<|assistant|>": 32001,
3
- "<|continue|>": 32009,
4
  "<|endoftext|>": 32000,
5
- "<|end|>": 32007,
6
- "<|function_call|>": 32005,
7
- "<|function_list|>": 32011,
8
- "<|function_output|>": 32003,
9
- "<|raw|>": 32008,
10
- "<|step|>": 32002,
11
- "<|system|>": 32006,
12
- "<|tag|>": 32004,
13
- "<|user|>": 32010
 
14
  }
 
1
  {
2
+ "<|assistant|>": 32002,
3
+ "<|continue|>": 32010,
4
  "<|endoftext|>": 32000,
5
+ "<|end|>": 32008,
6
+ "<|function_call|>": 32006,
7
+ "<|function_list|>": 32012,
8
+ "<|function_output|>": 32004,
9
+ "<|padding|>": 32001,
10
+ "<|raw|>": 32009,
11
+ "<|step|>": 32003,
12
+ "<|system|>": 32007,
13
+ "<|tag|>": 32005,
14
+ "<|user|>": 32011
15
  }
special_tokens_map.json CHANGED
@@ -86,6 +86,7 @@
86
  "single_word": false
87
  },
88
  "eos_token": "<|endoftext|>",
 
89
  "unk_token": {
90
  "content": "<unk>",
91
  "lstrip": false,
 
86
  "single_word": false
87
  },
88
  "eos_token": "<|endoftext|>",
89
+ "pad_token": "<|padding|>",
90
  "unk_token": {
91
  "content": "<unk>",
92
  "lstrip": false,
tokenizer.json CHANGED
@@ -41,6 +41,15 @@
41
  },
42
  {
43
  "id": 32001,
 
 
 
 
 
 
 
 
 
44
  "content": "<|assistant|>",
45
  "single_word": false,
46
  "lstrip": false,
@@ -49,7 +58,7 @@
49
  "special": true
50
  },
51
  {
52
- "id": 32002,
53
  "content": "<|step|>",
54
  "single_word": false,
55
  "lstrip": false,
@@ -58,7 +67,7 @@
58
  "special": true
59
  },
60
  {
61
- "id": 32003,
62
  "content": "<|function_output|>",
63
  "single_word": false,
64
  "lstrip": false,
@@ -67,7 +76,7 @@
67
  "special": true
68
  },
69
  {
70
- "id": 32004,
71
  "content": "<|tag|>",
72
  "single_word": false,
73
  "lstrip": false,
@@ -76,7 +85,7 @@
76
  "special": true
77
  },
78
  {
79
- "id": 32005,
80
  "content": "<|function_call|>",
81
  "single_word": false,
82
  "lstrip": false,
@@ -85,7 +94,7 @@
85
  "special": true
86
  },
87
  {
88
- "id": 32006,
89
  "content": "<|system|>",
90
  "single_word": false,
91
  "lstrip": false,
@@ -94,7 +103,7 @@
94
  "special": true
95
  },
96
  {
97
- "id": 32007,
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
@@ -103,7 +112,7 @@
103
  "special": true
104
  },
105
  {
106
- "id": 32008,
107
  "content": "<|raw|>",
108
  "single_word": false,
109
  "lstrip": false,
@@ -112,7 +121,7 @@
112
  "special": true
113
  },
114
  {
115
- "id": 32009,
116
  "content": "<|continue|>",
117
  "single_word": false,
118
  "lstrip": false,
@@ -121,7 +130,7 @@
121
  "special": true
122
  },
123
  {
124
- "id": 32010,
125
  "content": "<|user|>",
126
  "single_word": false,
127
  "lstrip": false,
@@ -130,7 +139,7 @@
130
  "special": true
131
  },
132
  {
133
- "id": 32011,
134
  "content": "<|function_list|>",
135
  "single_word": false,
136
  "lstrip": false,
 
41
  },
42
  {
43
  "id": 32001,
44
+ "content": "<|padding|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 32002,
53
  "content": "<|assistant|>",
54
  "single_word": false,
55
  "lstrip": false,
 
58
  "special": true
59
  },
60
  {
61
+ "id": 32003,
62
  "content": "<|step|>",
63
  "single_word": false,
64
  "lstrip": false,
 
67
  "special": true
68
  },
69
  {
70
+ "id": 32004,
71
  "content": "<|function_output|>",
72
  "single_word": false,
73
  "lstrip": false,
 
76
  "special": true
77
  },
78
  {
79
+ "id": 32005,
80
  "content": "<|tag|>",
81
  "single_word": false,
82
  "lstrip": false,
 
85
  "special": true
86
  },
87
  {
88
+ "id": 32006,
89
  "content": "<|function_call|>",
90
  "single_word": false,
91
  "lstrip": false,
 
94
  "special": true
95
  },
96
  {
97
+ "id": 32007,
98
  "content": "<|system|>",
99
  "single_word": false,
100
  "lstrip": false,
 
103
  "special": true
104
  },
105
  {
106
+ "id": 32008,
107
  "content": "<|end|>",
108
  "single_word": false,
109
  "lstrip": false,
 
112
  "special": true
113
  },
114
  {
115
+ "id": 32009,
116
  "content": "<|raw|>",
117
  "single_word": false,
118
  "lstrip": false,
 
121
  "special": true
122
  },
123
  {
124
+ "id": 32010,
125
  "content": "<|continue|>",
126
  "single_word": false,
127
  "lstrip": false,
 
130
  "special": true
131
  },
132
  {
133
+ "id": 32011,
134
  "content": "<|user|>",
135
  "single_word": false,
136
  "lstrip": false,
 
139
  "special": true
140
  },
141
  {
142
+ "id": 32012,
143
  "content": "<|function_list|>",
144
  "single_word": false,
145
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -33,6 +33,14 @@
33
  "special": true
34
  },
35
  "32001": {
 
 
 
 
 
 
 
 
36
  "content": "<|assistant|>",
37
  "lstrip": false,
38
  "normalized": false,
@@ -40,7 +48,7 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "32002": {
44
  "content": "<|step|>",
45
  "lstrip": false,
46
  "normalized": false,
@@ -48,7 +56,7 @@
48
  "single_word": false,
49
  "special": true
50
  },
51
- "32003": {
52
  "content": "<|function_output|>",
53
  "lstrip": false,
54
  "normalized": false,
@@ -56,7 +64,7 @@
56
  "single_word": false,
57
  "special": true
58
  },
59
- "32004": {
60
  "content": "<|tag|>",
61
  "lstrip": false,
62
  "normalized": false,
@@ -64,7 +72,7 @@
64
  "single_word": false,
65
  "special": true
66
  },
67
- "32005": {
68
  "content": "<|function_call|>",
69
  "lstrip": false,
70
  "normalized": false,
@@ -72,7 +80,7 @@
72
  "single_word": false,
73
  "special": true
74
  },
75
- "32006": {
76
  "content": "<|system|>",
77
  "lstrip": false,
78
  "normalized": false,
@@ -80,7 +88,7 @@
80
  "single_word": false,
81
  "special": true
82
  },
83
- "32007": {
84
  "content": "<|end|>",
85
  "lstrip": false,
86
  "normalized": false,
@@ -88,7 +96,7 @@
88
  "single_word": false,
89
  "special": true
90
  },
91
- "32008": {
92
  "content": "<|raw|>",
93
  "lstrip": false,
94
  "normalized": false,
@@ -96,7 +104,7 @@
96
  "single_word": false,
97
  "special": true
98
  },
99
- "32009": {
100
  "content": "<|continue|>",
101
  "lstrip": false,
102
  "normalized": false,
@@ -104,7 +112,7 @@
104
  "single_word": false,
105
  "special": true
106
  },
107
- "32010": {
108
  "content": "<|user|>",
109
  "lstrip": false,
110
  "normalized": false,
@@ -112,7 +120,7 @@
112
  "single_word": false,
113
  "special": true
114
  },
115
- "32011": {
116
  "content": "<|function_list|>",
117
  "lstrip": false,
118
  "normalized": false,
@@ -139,7 +147,7 @@
139
  "eos_token": "<|endoftext|>",
140
  "legacy": false,
141
  "model_max_length": 1000000000000000019884624838656,
142
- "pad_token": null,
143
  "padding_side": "right",
144
  "sp_model_kwargs": {},
145
  "tokenizer_class": "LlamaTokenizer",
 
33
  "special": true
34
  },
35
  "32001": {
36
+ "content": "<|padding|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
  "content": "<|assistant|>",
45
  "lstrip": false,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "32003": {
52
  "content": "<|step|>",
53
  "lstrip": false,
54
  "normalized": false,
 
56
  "single_word": false,
57
  "special": true
58
  },
59
+ "32004": {
60
  "content": "<|function_output|>",
61
  "lstrip": false,
62
  "normalized": false,
 
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "32005": {
68
  "content": "<|tag|>",
69
  "lstrip": false,
70
  "normalized": false,
 
72
  "single_word": false,
73
  "special": true
74
  },
75
+ "32006": {
76
  "content": "<|function_call|>",
77
  "lstrip": false,
78
  "normalized": false,
 
80
  "single_word": false,
81
  "special": true
82
  },
83
+ "32007": {
84
  "content": "<|system|>",
85
  "lstrip": false,
86
  "normalized": false,
 
88
  "single_word": false,
89
  "special": true
90
  },
91
+ "32008": {
92
  "content": "<|end|>",
93
  "lstrip": false,
94
  "normalized": false,
 
96
  "single_word": false,
97
  "special": true
98
  },
99
+ "32009": {
100
  "content": "<|raw|>",
101
  "lstrip": false,
102
  "normalized": false,
 
104
  "single_word": false,
105
  "special": true
106
  },
107
+ "32010": {
108
  "content": "<|continue|>",
109
  "lstrip": false,
110
  "normalized": false,
 
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "32011": {
116
  "content": "<|user|>",
117
  "lstrip": false,
118
  "normalized": false,
 
120
  "single_word": false,
121
  "special": true
122
  },
123
+ "32012": {
124
  "content": "<|function_list|>",
125
  "lstrip": false,
126
  "normalized": false,
 
147
  "eos_token": "<|endoftext|>",
148
  "legacy": false,
149
  "model_max_length": 1000000000000000019884624838656,
150
+ "pad_token": "<|padding|>",
151
  "padding_side": "right",
152
  "sp_model_kwargs": {},
153
  "tokenizer_class": "LlamaTokenizer",