vietdata commited on
Commit
e42d8f7
1 Parent(s): c6a834c

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +2 -8
tokenizer_config.json CHANGED
@@ -2,7 +2,6 @@
2
  "bos_token": "<s>",
3
  "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
5
- "do_lower_case": true,
6
  "eos_token": "</s>",
7
  "mask_token": {
8
  "__type": "AddedToken",
@@ -12,17 +11,12 @@
12
  "rstrip": false,
13
  "single_word": false
14
  },
15
- "max_length": 128,
16
  "model_max_length": 512,
17
- "pad_to_multiple_of": null,
18
  "pad_token": "<pad>",
19
- "pad_token_type_id": 0,
20
- "padding_side": "right",
21
  "sep_token": "</s>",
22
  "stride": 0,
23
- "strip_accents": null,
24
- "tokenize_chinese_chars": true,
25
- "tokenizer_class": "BertTokenizer",
26
  "truncation_side": "right",
27
  "truncation_strategy": "longest_first",
28
  "unk_token": "<unk>"
 
2
  "bos_token": "<s>",
3
  "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
 
5
  "eos_token": "</s>",
6
  "mask_token": {
7
  "__type": "AddedToken",
 
11
  "rstrip": false,
12
  "single_word": false
13
  },
14
+ "max_length": 256,
15
  "model_max_length": 512,
 
16
  "pad_token": "<pad>",
 
 
17
  "sep_token": "</s>",
18
  "stride": 0,
19
+ "tokenizer_class": "XLMRobertaTokenizer",
 
 
20
  "truncation_side": "right",
21
  "truncation_strategy": "longest_first",
22
  "unk_token": "<unk>"