Upload tokenizer
Browse files- tokenizer.json +2 -2
- tokenizer_config.json +5 -1
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4661b2cb6b8a1007906509fe18cbfbc03062a086102bf7b80cfedb80f16c37
|
3 |
+
size 17082854
|
tokenizer_config.json
CHANGED
@@ -46,9 +46,13 @@
|
|
46 |
"cls_token": "<s>",
|
47 |
"eos_token": "</s>",
|
48 |
"mask_token": "<mask>",
|
49 |
-
"
|
|
|
50 |
"pad_token": "<pad>",
|
51 |
"sep_token": "</s>",
|
|
|
52 |
"tokenizer_class": "XLMRobertaTokenizer",
|
|
|
|
|
53 |
"unk_token": "<unk>"
|
54 |
}
|
|
|
46 |
"cls_token": "<s>",
|
47 |
"eos_token": "</s>",
|
48 |
"mask_token": "<mask>",
|
49 |
+
"max_length": 64,
|
50 |
+
"model_max_length": 512,
|
51 |
"pad_token": "<pad>",
|
52 |
"sep_token": "</s>",
|
53 |
+
"stride": 0,
|
54 |
"tokenizer_class": "XLMRobertaTokenizer",
|
55 |
+
"truncation_side": "right",
|
56 |
+
"truncation_strategy": "longest_first",
|
57 |
"unk_token": "<unk>"
|
58 |
}
|