debajyotidatta commited on
Commit
ad2a944
1 Parent(s): cc438e1

Upload tokenizer

Browse files
Files changed (4) hide show
  1. special_tokens_map.json +0 -2
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +7 -10
  4. vocab.txt +0 -0
special_tokens_map.json CHANGED
@@ -1,7 +1,5 @@
1
  {
2
- "bos_token": "[CLS]",
3
  "cls_token": "[CLS]",
4
- "eos_token": "[SEP]",
5
  "mask_token": "[MASK]",
6
  "pad_token": "[PAD]",
7
  "sep_token": "[SEP]",
 
1
  {
 
2
  "cls_token": "[CLS]",
 
3
  "mask_token": "[MASK]",
4
  "pad_token": "[PAD]",
5
  "sep_token": "[SEP]",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,17 +1,14 @@
1
  {
2
- "bos_token": "[CLS]",
3
  "cls_token": "[CLS]",
4
- "do_lower_case": false,
5
- "eos_token": "[SEP]",
6
  "mask_token": "[MASK]",
7
- "model_max_length": 1000000000000000019884624838656,
8
- "name_or_path": "microsoft/deberta-v3-base",
9
  "pad_token": "[PAD]",
10
  "sep_token": "[SEP]",
11
- "sp_model_kwargs": {},
12
  "special_tokens_map_file": null,
13
- "split_by_punct": false,
14
- "tokenizer_class": "DebertaV2Tokenizer",
15
- "unk_token": "[UNK]",
16
- "vocab_type": "spm"
17
  }
 
1
  {
 
2
  "cls_token": "[CLS]",
3
+ "do_lower_case": true,
 
4
  "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "bert-base-uncased",
7
  "pad_token": "[PAD]",
8
  "sep_token": "[SEP]",
 
9
  "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
  }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff