serenarolloh commited on
Commit
b3f7e91
1 Parent(s): 541018e

Training done

Browse files
Files changed (3) hide show
  1. added_tokens.json +10 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +87 -0
added_tokens.json CHANGED
@@ -1,10 +1,14 @@
1
  {
2
  "</s_customer>": 57548,
 
3
  "</s_customer_name>": 57550,
4
  "</s_customer_pin>": 57552,
 
5
  "</s_date>": 57530,
6
  "</s_dnote_no>": 57532,
7
  "</s_document_type>": 57528,
 
 
8
  "</s_header>": 57526,
9
  "</s_invoice_no>": 57562,
10
  "</s_item_code>": 57560,
@@ -14,6 +18,7 @@
14
  "</s_item_unit_price>": 57564,
15
  "</s_items>": 57554,
16
  "</s_order_no>": 57534,
 
17
  "</s_shipper>": 57536,
18
  "</s_shipper_address>": 57540,
19
  "</s_shipper_email>": 57544,
@@ -22,11 +27,15 @@
22
  "</s_shipper_pin>": 57546,
23
  "<s_cord-v2>": 57567,
24
  "<s_customer>": 57547,
 
25
  "<s_customer_name>": 57549,
26
  "<s_customer_pin>": 57551,
 
27
  "<s_date>": 57529,
28
  "<s_dnote_no>": 57531,
29
  "<s_document_type>": 57527,
 
 
30
  "<s_header>": 57525,
31
  "<s_iitcdip>": 57523,
32
  "<s_invoice_no>": 57561,
@@ -37,6 +46,7 @@
37
  "<s_item_unit_price>": 57563,
38
  "<s_items>": 57553,
39
  "<s_order_no>": 57533,
 
40
  "<s_shipper>": 57535,
41
  "<s_shipper_address>": 57539,
42
  "<s_shipper_email>": 57543,
 
1
  {
2
  "</s_customer>": 57548,
3
+ "</s_customer_address>": 57575,
4
  "</s_customer_name>": 57550,
5
  "</s_customer_pin>": 57552,
6
+ "</s_customer_reference>": 57577,
7
  "</s_date>": 57530,
8
  "</s_dnote_no>": 57532,
9
  "</s_document_type>": 57528,
10
+ "</s_dropoff>": 57571,
11
+ "</s_dropoff_point>": 57573,
12
  "</s_header>": 57526,
13
  "</s_invoice_no>": 57562,
14
  "</s_item_code>": 57560,
 
18
  "</s_item_unit_price>": 57564,
19
  "</s_items>": 57554,
20
  "</s_order_no>": 57534,
21
+ "</s_reference_no>": 57569,
22
  "</s_shipper>": 57536,
23
  "</s_shipper_address>": 57540,
24
  "</s_shipper_email>": 57544,
 
27
  "</s_shipper_pin>": 57546,
28
  "<s_cord-v2>": 57567,
29
  "<s_customer>": 57547,
30
+ "<s_customer_address>": 57574,
31
  "<s_customer_name>": 57549,
32
  "<s_customer_pin>": 57551,
33
+ "<s_customer_reference>": 57576,
34
  "<s_date>": 57529,
35
  "<s_dnote_no>": 57531,
36
  "<s_document_type>": 57527,
37
+ "<s_dropoff>": 57570,
38
+ "<s_dropoff_point>": 57572,
39
  "<s_header>": 57525,
40
  "<s_iitcdip>": 57523,
41
  "<s_invoice_no>": 57561,
 
46
  "<s_item_unit_price>": 57563,
47
  "<s_items>": 57553,
48
  "<s_order_no>": 57533,
49
+ "<s_reference_no>": 57568,
50
  "<s_shipper>": 57535,
51
  "<s_shipper_address>": 57539,
52
  "<s_shipper_email>": 57543,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -407,6 +407,86 @@
407
  "rstrip": false,
408
  "single_word": false,
409
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
  },
412
  "additional_special_tokens": [
@@ -418,11 +498,18 @@
418
  "cls_token": "<s>",
419
  "eos_token": "</s>",
420
  "mask_token": "<mask>",
 
421
  "model_max_length": 1000000000000000019884624838656,
 
422
  "pad_token": "<pad>",
 
 
423
  "processor_class": "DonutProcessor",
424
  "sep_token": "</s>",
425
  "sp_model_kwargs": {},
 
426
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
427
  "unk_token": "<unk>"
428
  }
 
407
  "rstrip": false,
408
  "single_word": false,
409
  "special": false
410
+ },
411
+ "57568": {
412
+ "content": "<s_reference_no>",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "57569": {
420
+ "content": "</s_reference_no>",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "57570": {
428
+ "content": "<s_dropoff>",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "57571": {
436
+ "content": "</s_dropoff>",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "57572": {
444
+ "content": "<s_dropoff_point>",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "57573": {
452
+ "content": "</s_dropoff_point>",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "57574": {
460
+ "content": "<s_customer_address>",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "57575": {
468
+ "content": "</s_customer_address>",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "57576": {
476
+ "content": "<s_customer_reference>",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "57577": {
484
+ "content": "</s_customer_reference>",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
  }
491
  },
492
  "additional_special_tokens": [
 
498
  "cls_token": "<s>",
499
  "eos_token": "</s>",
500
  "mask_token": "<mask>",
501
+ "max_length": 768,
502
  "model_max_length": 1000000000000000019884624838656,
503
+ "pad_to_multiple_of": null,
504
  "pad_token": "<pad>",
505
+ "pad_token_type_id": 0,
506
+ "padding_side": "right",
507
  "processor_class": "DonutProcessor",
508
  "sep_token": "</s>",
509
  "sp_model_kwargs": {},
510
+ "stride": 0,
511
  "tokenizer_class": "XLMRobertaTokenizer",
512
+ "truncation_side": "right",
513
+ "truncation_strategy": "longest_first",
514
  "unk_token": "<unk>"
515
  }