WhiteAngels commited on
Commit
f7f2bcd
·
verified ·
1 Parent(s): ce691b9

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +19 -1
tokenizer_config.json CHANGED
@@ -23,16 +23,34 @@
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  "clean_up_tokenization_spaces": true,
29
  "cls_token": "[CLS]",
30
  "do_basic_tokenize": true,
31
  "do_lower_case": true,
 
32
  "max_len": 512,
33
  "model_max_length": 512,
34
  "never_split": null,
35
  "pad_token": "[PAD]",
 
36
  "strip_accents": null,
37
  "tokenize_chinese_chars": true,
38
  "tokenizer_class": "BertTokenizer",
 
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
  "max_len": 512,
50
  "model_max_length": 512,
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
  "strip_accents": null,
55
  "tokenize_chinese_chars": true,
56
  "tokenizer_class": "BertTokenizer",