serenarolloh
commited on
Commit
•
b3f7e91
1
Parent(s):
541018e
Training done
Browse files- added_tokens.json +10 -0
- tokenizer.json +0 -0
- tokenizer_config.json +87 -0
added_tokens.json
CHANGED
@@ -1,10 +1,14 @@
|
|
1 |
{
|
2 |
"</s_customer>": 57548,
|
|
|
3 |
"</s_customer_name>": 57550,
|
4 |
"</s_customer_pin>": 57552,
|
|
|
5 |
"</s_date>": 57530,
|
6 |
"</s_dnote_no>": 57532,
|
7 |
"</s_document_type>": 57528,
|
|
|
|
|
8 |
"</s_header>": 57526,
|
9 |
"</s_invoice_no>": 57562,
|
10 |
"</s_item_code>": 57560,
|
@@ -14,6 +18,7 @@
|
|
14 |
"</s_item_unit_price>": 57564,
|
15 |
"</s_items>": 57554,
|
16 |
"</s_order_no>": 57534,
|
|
|
17 |
"</s_shipper>": 57536,
|
18 |
"</s_shipper_address>": 57540,
|
19 |
"</s_shipper_email>": 57544,
|
@@ -22,11 +27,15 @@
|
|
22 |
"</s_shipper_pin>": 57546,
|
23 |
"<s_cord-v2>": 57567,
|
24 |
"<s_customer>": 57547,
|
|
|
25 |
"<s_customer_name>": 57549,
|
26 |
"<s_customer_pin>": 57551,
|
|
|
27 |
"<s_date>": 57529,
|
28 |
"<s_dnote_no>": 57531,
|
29 |
"<s_document_type>": 57527,
|
|
|
|
|
30 |
"<s_header>": 57525,
|
31 |
"<s_iitcdip>": 57523,
|
32 |
"<s_invoice_no>": 57561,
|
@@ -37,6 +46,7 @@
|
|
37 |
"<s_item_unit_price>": 57563,
|
38 |
"<s_items>": 57553,
|
39 |
"<s_order_no>": 57533,
|
|
|
40 |
"<s_shipper>": 57535,
|
41 |
"<s_shipper_address>": 57539,
|
42 |
"<s_shipper_email>": 57543,
|
|
|
1 |
{
|
2 |
"</s_customer>": 57548,
|
3 |
+
"</s_customer_address>": 57575,
|
4 |
"</s_customer_name>": 57550,
|
5 |
"</s_customer_pin>": 57552,
|
6 |
+
"</s_customer_reference>": 57577,
|
7 |
"</s_date>": 57530,
|
8 |
"</s_dnote_no>": 57532,
|
9 |
"</s_document_type>": 57528,
|
10 |
+
"</s_dropoff>": 57571,
|
11 |
+
"</s_dropoff_point>": 57573,
|
12 |
"</s_header>": 57526,
|
13 |
"</s_invoice_no>": 57562,
|
14 |
"</s_item_code>": 57560,
|
|
|
18 |
"</s_item_unit_price>": 57564,
|
19 |
"</s_items>": 57554,
|
20 |
"</s_order_no>": 57534,
|
21 |
+
"</s_reference_no>": 57569,
|
22 |
"</s_shipper>": 57536,
|
23 |
"</s_shipper_address>": 57540,
|
24 |
"</s_shipper_email>": 57544,
|
|
|
27 |
"</s_shipper_pin>": 57546,
|
28 |
"<s_cord-v2>": 57567,
|
29 |
"<s_customer>": 57547,
|
30 |
+
"<s_customer_address>": 57574,
|
31 |
"<s_customer_name>": 57549,
|
32 |
"<s_customer_pin>": 57551,
|
33 |
+
"<s_customer_reference>": 57576,
|
34 |
"<s_date>": 57529,
|
35 |
"<s_dnote_no>": 57531,
|
36 |
"<s_document_type>": 57527,
|
37 |
+
"<s_dropoff>": 57570,
|
38 |
+
"<s_dropoff_point>": 57572,
|
39 |
"<s_header>": 57525,
|
40 |
"<s_iitcdip>": 57523,
|
41 |
"<s_invoice_no>": 57561,
|
|
|
46 |
"<s_item_unit_price>": 57563,
|
47 |
"<s_items>": 57553,
|
48 |
"<s_order_no>": 57533,
|
49 |
+
"<s_reference_no>": 57568,
|
50 |
"<s_shipper>": 57535,
|
51 |
"<s_shipper_address>": 57539,
|
52 |
"<s_shipper_email>": 57543,
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -407,6 +407,86 @@
|
|
407 |
"rstrip": false,
|
408 |
"single_word": false,
|
409 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
}
|
411 |
},
|
412 |
"additional_special_tokens": [
|
@@ -418,11 +498,18 @@
|
|
418 |
"cls_token": "<s>",
|
419 |
"eos_token": "</s>",
|
420 |
"mask_token": "<mask>",
|
|
|
421 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
422 |
"pad_token": "<pad>",
|
|
|
|
|
423 |
"processor_class": "DonutProcessor",
|
424 |
"sep_token": "</s>",
|
425 |
"sp_model_kwargs": {},
|
|
|
426 |
"tokenizer_class": "XLMRobertaTokenizer",
|
|
|
|
|
427 |
"unk_token": "<unk>"
|
428 |
}
|
|
|
407 |
"rstrip": false,
|
408 |
"single_word": false,
|
409 |
"special": false
|
410 |
+
},
|
411 |
+
"57568": {
|
412 |
+
"content": "<s_reference_no>",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"57569": {
|
420 |
+
"content": "</s_reference_no>",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"57570": {
|
428 |
+
"content": "<s_dropoff>",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"57571": {
|
436 |
+
"content": "</s_dropoff>",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"57572": {
|
444 |
+
"content": "<s_dropoff_point>",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"57573": {
|
452 |
+
"content": "</s_dropoff_point>",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"57574": {
|
460 |
+
"content": "<s_customer_address>",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"57575": {
|
468 |
+
"content": "</s_customer_address>",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"57576": {
|
476 |
+
"content": "<s_customer_reference>",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"57577": {
|
484 |
+
"content": "</s_customer_reference>",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
}
|
491 |
},
|
492 |
"additional_special_tokens": [
|
|
|
498 |
"cls_token": "<s>",
|
499 |
"eos_token": "</s>",
|
500 |
"mask_token": "<mask>",
|
501 |
+
"max_length": 768,
|
502 |
"model_max_length": 1000000000000000019884624838656,
|
503 |
+
"pad_to_multiple_of": null,
|
504 |
"pad_token": "<pad>",
|
505 |
+
"pad_token_type_id": 0,
|
506 |
+
"padding_side": "right",
|
507 |
"processor_class": "DonutProcessor",
|
508 |
"sep_token": "</s>",
|
509 |
"sp_model_kwargs": {},
|
510 |
+
"stride": 0,
|
511 |
"tokenizer_class": "XLMRobertaTokenizer",
|
512 |
+
"truncation_side": "right",
|
513 |
+
"truncation_strategy": "longest_first",
|
514 |
"unk_token": "<unk>"
|
515 |
}
|