AlinaKozyreva
commited on
Commit
•
5f25afa
1
Parent(s):
79b90c7
Upload tokenizer
Browse files- tokenizer_config.json +0 -7
tokenizer_config.json
CHANGED
@@ -53,11 +53,9 @@
|
|
53 |
"do_basic_tokenize": true,
|
54 |
"do_lower_case": true,
|
55 |
"mask_token": "[MASK]",
|
56 |
-
"max_length": 512,
|
57 |
"model_max_length": 512,
|
58 |
"never_split": null,
|
59 |
"only_label_first_subword": true,
|
60 |
-
"pad_to_multiple_of": null,
|
61 |
"pad_token": "[PAD]",
|
62 |
"pad_token_box": [
|
63 |
0,
|
@@ -66,8 +64,6 @@
|
|
66 |
0
|
67 |
],
|
68 |
"pad_token_label": -100,
|
69 |
-
"pad_token_type_id": 0,
|
70 |
-
"padding_side": "right",
|
71 |
"processor_class": "LayoutLMv2Processor",
|
72 |
"sep_token": "[SEP]",
|
73 |
"sep_token_box": [
|
@@ -76,11 +72,8 @@
|
|
76 |
1000,
|
77 |
1000
|
78 |
],
|
79 |
-
"stride": 0,
|
80 |
"strip_accents": null,
|
81 |
"tokenize_chinese_chars": true,
|
82 |
"tokenizer_class": "LayoutLMv2Tokenizer",
|
83 |
-
"truncation_side": "right",
|
84 |
-
"truncation_strategy": "longest_first",
|
85 |
"unk_token": "[UNK]"
|
86 |
}
|
|
|
53 |
"do_basic_tokenize": true,
|
54 |
"do_lower_case": true,
|
55 |
"mask_token": "[MASK]",
|
|
|
56 |
"model_max_length": 512,
|
57 |
"never_split": null,
|
58 |
"only_label_first_subword": true,
|
|
|
59 |
"pad_token": "[PAD]",
|
60 |
"pad_token_box": [
|
61 |
0,
|
|
|
64 |
0
|
65 |
],
|
66 |
"pad_token_label": -100,
|
|
|
|
|
67 |
"processor_class": "LayoutLMv2Processor",
|
68 |
"sep_token": "[SEP]",
|
69 |
"sep_token_box": [
|
|
|
72 |
1000,
|
73 |
1000
|
74 |
],
|
|
|
75 |
"strip_accents": null,
|
76 |
"tokenize_chinese_chars": true,
|
77 |
"tokenizer_class": "LayoutLMv2Tokenizer",
|
|
|
|
|
78 |
"unk_token": "[UNK]"
|
79 |
}
|