yartyjung commited on
Commit
31312d6
1 Parent(s): e7a25a1

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -4,9 +4,27 @@
4
  "</s>NOTUSED",
5
  "<_>"
6
  ],
7
- "bos_token": "<s>",
8
- "cls_token": "<s>",
9
- "eos_token": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "mask_token": {
11
  "content": "<mask>",
12
  "lstrip": true,
@@ -14,7 +32,25 @@
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
- "pad_token": "<pad>",
18
- "sep_token": "</s>",
19
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
 
4
  "</s>NOTUSED",
5
  "<_>"
6
  ],
7
+ "bos_token": {
8
+ "content": "<s>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "cls_token": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "eos_token": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
  "mask_token": {
29
  "content": "<mask>",
30
  "lstrip": true,
 
32
  "rstrip": false,
33
  "single_word": false
34
  },
35
+ "pad_token": {
36
+ "content": "<pad>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ },
42
+ "sep_token": {
43
+ "content": "</s>",
44
+ "lstrip": false,
45
+ "normalized": false,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ "unk_token": {
50
+ "content": "<unk>",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ }
56
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -75,8 +75,12 @@
75
  "cls_token": "<s>",
76
  "eos_token": "</s>",
77
  "mask_token": "<mask>",
 
78
  "model_max_length": 1000000000000000019884624838656,
 
79
  "pad_token": "<pad>",
 
 
80
  "sep_token": "</s>",
81
  "sp_model_kwargs": {},
82
  "tokenizer_class": "CamembertTokenizer",
 
75
  "cls_token": "<s>",
76
  "eos_token": "</s>",
77
  "mask_token": "<mask>",
78
+ "max_length": null,
79
  "model_max_length": 1000000000000000019884624838656,
80
+ "pad_to_multiple_of": null,
81
  "pad_token": "<pad>",
82
+ "pad_token_type_id": 0,
83
+ "padding_side": "right",
84
  "sep_token": "</s>",
85
  "sp_model_kwargs": {},
86
  "tokenizer_class": "CamembertTokenizer",