Evelyn18 commited on
Commit
2829c34
1 Parent(s): 9dbd558

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +67 -0
tokenizer_config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "do_lower_case": false,
20
+ "eos_token": {
21
+ "__type": "AddedToken",
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "errors": "replace",
29
+ "mask_token": {
30
+ "__type": "AddedToken",
31
+ "content": "<mask>",
32
+ "lstrip": true,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "max_len": 512,
38
+ "max_length": 512,
39
+ "name_or_path": "IIC/roberta-base-spanish-squades",
40
+ "pad_token": {
41
+ "__type": "AddedToken",
42
+ "content": "<pad>",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": false
47
+ },
48
+ "sep_token": {
49
+ "__type": "AddedToken",
50
+ "content": "</s>",
51
+ "lstrip": false,
52
+ "normalized": true,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ },
56
+ "special_tokens_map_file": null,
57
+ "tokenizer_class": "RobertaTokenizer",
58
+ "trim_offsets": true,
59
+ "unk_token": {
60
+ "__type": "AddedToken",
61
+ "content": "<unk>",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ }
67
+ }