Chakita commited on
Commit
9a10dcc
1 Parent(s): ebb8547

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +12 -0
  2. tokenizer.json +2 -2
  3. tokenizer_config.json +13 -0
special_tokens_map.json CHANGED
@@ -1,4 +1,16 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
  "pad_token": "<pad>",
 
1
  {
2
+ "additional_special_tokens": [
3
+ "number0",
4
+ "number1",
5
+ "number2",
6
+ "number3",
7
+ "number4",
8
+ "number5",
9
+ "number6",
10
+ "number7",
11
+ "number8",
12
+ "number9"
13
+ ],
14
  "bos_token": "<s>",
15
  "eos_token": "</s>",
16
  "pad_token": "<pad>",
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c1abd73425d69d27b6933af4fa2a004568434169689b37d1314c6ca3a1d2a7f
3
- size 14500541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b43f068807538425855716f1fb28c325234defb3f651ab3513dd37ba4ffb03
3
+ size 14502381
tokenizer_config.json CHANGED
@@ -1,5 +1,18 @@
1
  {
2
  "add_prefix_space": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "bos_token": "<s>",
4
  "eos_token": "</s>",
5
  "name_or_path": "bigscience/bloom-560m",
 
1
  {
2
  "add_prefix_space": false,
3
+ "add_special_tokens": true,
4
+ "additional_special_tokens": [
5
+ "number0",
6
+ "number1",
7
+ "number2",
8
+ "number3",
9
+ "number4",
10
+ "number5",
11
+ "number6",
12
+ "number7",
13
+ "number8",
14
+ "number9"
15
+ ],
16
  "bos_token": "<s>",
17
  "eos_token": "</s>",
18
  "name_or_path": "bigscience/bloom-560m",