josepruiz14 commited on
Commit
b5f91fd
·
verified ·
1 Parent(s): 56583af

Training done

Browse files
Files changed (3) hide show
  1. added_tokens.json +7 -14
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +6 -69
added_tokens.json CHANGED
@@ -1,30 +1,23 @@
1
  {
2
- "</s_IDESP>": 57542,
3
- "</s_birthDate>": 57538,
4
- "</s_birth_date>": 57545,
5
- "</s_expirationDate>": 57540,
6
- "</s_expiration_date>": 57547,
7
  "</s_idNumber>": 57536,
8
  "</s_name>": 57526,
9
  "</s_nationality>": 57534,
10
  "</s_second_surname>": 57530,
11
  "</s_sex>": 57532,
12
- "</s_support_number>": 57549,
13
  "</s_surname>": 57528,
14
- "<s_IDESP>": 57541,
15
- "<s_birthDate>": 57537,
16
- "<s_birth_date>": 57544,
17
- "<s_dnis>": 57543,
18
- "<s_dnis_2024>": 57550,
19
- "<s_expirationDate>": 57539,
20
- "<s_expiration_date>": 57546,
21
  "<s_idNumber>": 57535,
22
  "<s_iitcdip>": 57523,
23
  "<s_name>": 57525,
24
  "<s_nationality>": 57533,
25
  "<s_second_surname>": 57529,
26
  "<s_sex>": 57531,
27
- "<s_support_number>": 57548,
28
  "<s_surname>": 57527,
29
  "<s_synthdog>": 57524,
30
  "<sep/>": 57522
 
1
  {
2
+ "</s_birth_date>": 57538,
3
+ "</s_expiration_date>": 57540,
 
 
 
4
  "</s_idNumber>": 57536,
5
  "</s_name>": 57526,
6
  "</s_nationality>": 57534,
7
  "</s_second_surname>": 57530,
8
  "</s_sex>": 57532,
9
+ "</s_support_number>": 57542,
10
  "</s_surname>": 57528,
11
+ "<s_birth_date>": 57537,
12
+ "<s_dnis_2024>": 57543,
13
+ "<s_expiration_date>": 57539,
 
 
 
 
14
  "<s_idNumber>": 57535,
15
  "<s_iitcdip>": 57523,
16
  "<s_name>": 57525,
17
  "<s_nationality>": 57533,
18
  "<s_second_surname>": 57529,
19
  "<s_sex>": 57531,
20
+ "<s_support_number>": 57541,
21
  "<s_surname>": 57527,
22
  "<s_synthdog>": 57524,
23
  "<sep/>": 57522
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -161,62 +161,6 @@
161
  "special": false
162
  },
163
  "57537": {
164
- "content": "<s_birthDate>",
165
- "lstrip": false,
166
- "normalized": true,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": false
170
- },
171
- "57538": {
172
- "content": "</s_birthDate>",
173
- "lstrip": false,
174
- "normalized": true,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": false
178
- },
179
- "57539": {
180
- "content": "<s_expirationDate>",
181
- "lstrip": false,
182
- "normalized": true,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": false
186
- },
187
- "57540": {
188
- "content": "</s_expirationDate>",
189
- "lstrip": false,
190
- "normalized": true,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": false
194
- },
195
- "57541": {
196
- "content": "<s_IDESP>",
197
- "lstrip": false,
198
- "normalized": true,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": false
202
- },
203
- "57542": {
204
- "content": "</s_IDESP>",
205
- "lstrip": false,
206
- "normalized": true,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": false
210
- },
211
- "57543": {
212
- "content": "<s_dnis>",
213
- "lstrip": false,
214
- "normalized": true,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": false
218
- },
219
- "57544": {
220
  "content": "<s_birth_date>",
221
  "lstrip": false,
222
  "normalized": true,
@@ -224,7 +168,7 @@
224
  "single_word": false,
225
  "special": false
226
  },
227
- "57545": {
228
  "content": "</s_birth_date>",
229
  "lstrip": false,
230
  "normalized": true,
@@ -232,7 +176,7 @@
232
  "single_word": false,
233
  "special": false
234
  },
235
- "57546": {
236
  "content": "<s_expiration_date>",
237
  "lstrip": false,
238
  "normalized": true,
@@ -240,7 +184,7 @@
240
  "single_word": false,
241
  "special": false
242
  },
243
- "57547": {
244
  "content": "</s_expiration_date>",
245
  "lstrip": false,
246
  "normalized": true,
@@ -248,7 +192,7 @@
248
  "single_word": false,
249
  "special": false
250
  },
251
- "57548": {
252
  "content": "<s_support_number>",
253
  "lstrip": false,
254
  "normalized": true,
@@ -256,7 +200,7 @@
256
  "single_word": false,
257
  "special": false
258
  },
259
- "57549": {
260
  "content": "</s_support_number>",
261
  "lstrip": false,
262
  "normalized": true,
@@ -264,7 +208,7 @@
264
  "single_word": false,
265
  "special": false
266
  },
267
- "57550": {
268
  "content": "<s_dnis_2024>",
269
  "lstrip": false,
270
  "normalized": true,
@@ -282,18 +226,11 @@
282
  "cls_token": "<s>",
283
  "eos_token": "</s>",
284
  "mask_token": "<mask>",
285
- "max_length": 768,
286
  "model_max_length": 1000000000000000019884624838656,
287
- "pad_to_multiple_of": null,
288
  "pad_token": "<pad>",
289
- "pad_token_type_id": 0,
290
- "padding_side": "right",
291
  "processor_class": "DonutProcessor",
292
  "sep_token": "</s>",
293
  "sp_model_kwargs": {},
294
- "stride": 0,
295
  "tokenizer_class": "XLMRobertaTokenizer",
296
- "truncation_side": "right",
297
- "truncation_strategy": "longest_first",
298
  "unk_token": "<unk>"
299
  }
 
161
  "special": false
162
  },
163
  "57537": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  "content": "<s_birth_date>",
165
  "lstrip": false,
166
  "normalized": true,
 
168
  "single_word": false,
169
  "special": false
170
  },
171
+ "57538": {
172
  "content": "</s_birth_date>",
173
  "lstrip": false,
174
  "normalized": true,
 
176
  "single_word": false,
177
  "special": false
178
  },
179
+ "57539": {
180
  "content": "<s_expiration_date>",
181
  "lstrip": false,
182
  "normalized": true,
 
184
  "single_word": false,
185
  "special": false
186
  },
187
+ "57540": {
188
  "content": "</s_expiration_date>",
189
  "lstrip": false,
190
  "normalized": true,
 
192
  "single_word": false,
193
  "special": false
194
  },
195
+ "57541": {
196
  "content": "<s_support_number>",
197
  "lstrip": false,
198
  "normalized": true,
 
200
  "single_word": false,
201
  "special": false
202
  },
203
+ "57542": {
204
  "content": "</s_support_number>",
205
  "lstrip": false,
206
  "normalized": true,
 
208
  "single_word": false,
209
  "special": false
210
  },
211
+ "57543": {
212
  "content": "<s_dnis_2024>",
213
  "lstrip": false,
214
  "normalized": true,
 
226
  "cls_token": "<s>",
227
  "eos_token": "</s>",
228
  "mask_token": "<mask>",
 
229
  "model_max_length": 1000000000000000019884624838656,
 
230
  "pad_token": "<pad>",
 
 
231
  "processor_class": "DonutProcessor",
232
  "sep_token": "</s>",
233
  "sp_model_kwargs": {},
 
234
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
235
  "unk_token": "<unk>"
236
  }