katanaml commited on
Commit
1079f75
1 Parent(s): 6b1afcc

Training done

Browse files
Files changed (3) hide show
  1. added_tokens.json +3 -1
  2. tokenizer.json +18 -0
  3. tokenizer_config.json +1 -1
added_tokens.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "</s_client>": 57534,
3
  "</s_client_tax_id>": 57538,
4
  "</s_header>": 57526,
@@ -18,9 +19,10 @@
18
  "</s_total_gross_worth>": 57562,
19
  "</s_total_net_worth>": 57558,
20
  "</s_total_vat>": 57560,
 
21
  "<s_client>": 57533,
22
  "<s_client_tax_id>": 57537,
23
- "<s_cord-v2>": 57563,
24
  "<s_header>": 57525,
25
  "<s_iban>": 57539,
26
  "<s_iitcdip>": 57523,
 
1
  {
2
+ "</s_None>": 57564,
3
  "</s_client>": 57534,
4
  "</s_client_tax_id>": 57538,
5
  "</s_header>": 57526,
 
19
  "</s_total_gross_worth>": 57562,
20
  "</s_total_net_worth>": 57558,
21
  "</s_total_vat>": 57560,
22
+ "<s_None>": 57563,
23
  "<s_client>": 57533,
24
  "<s_client_tax_id>": 57537,
25
+ "<s_cord-v2>": 57565,
26
  "<s_header>": 57525,
27
  "<s_iban>": 57539,
28
  "<s_iitcdip>": 57523,
tokenizer.json CHANGED
@@ -433,6 +433,24 @@
433
  },
434
  {
435
  "id": 57563,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  "content": "<s_cord-v2>",
437
  "single_word": false,
438
  "lstrip": false,
 
433
  },
434
  {
435
  "id": 57563,
436
+ "content": "<s_None>",
437
+ "single_word": false,
438
+ "lstrip": false,
439
+ "rstrip": false,
440
+ "normalized": true,
441
+ "special": false
442
+ },
443
+ {
444
+ "id": 57564,
445
+ "content": "</s_None>",
446
+ "single_word": false,
447
+ "lstrip": false,
448
+ "rstrip": false,
449
+ "normalized": true,
450
+ "special": false
451
+ },
452
+ {
453
+ "id": 57565,
454
  "content": "<s_cord-v2>",
455
  "single_word": false,
456
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "bos_token": "<s>",
 
3
  "cls_token": "<s>",
4
  "eos_token": "</s>",
5
  "mask_token": {
@@ -15,7 +16,6 @@
15
  "processor_class": "DonutProcessor",
16
  "sep_token": "</s>",
17
  "sp_model_kwargs": {},
18
- "special_tokens_map_file": null,
19
  "tokenizer_class": "XLMRobertaTokenizer",
20
  "unk_token": "<unk>"
21
  }
 
1
  {
2
  "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
6
  "mask_token": {
 
16
  "processor_class": "DonutProcessor",
17
  "sep_token": "</s>",
18
  "sp_model_kwargs": {},
 
19
  "tokenizer_class": "XLMRobertaTokenizer",
20
  "unk_token": "<unk>"
21
  }