silipo commited on
Commit
0343eef
Β·
1 Parent(s): 284723b

first version

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +57 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Receipt Ocr Ui
3
  emoji: πŸ“ˆ
4
  colorFrom: purple
5
  colorTo: indigo
 
1
  ---
2
+ title: Receipt Ocr Ui v1
3
  emoji: πŸ“ˆ
4
  colorFrom: purple
5
  colorTo: indigo
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+
4
+ import torch
5
+ from transformers import DonutProcessor, VisionEncoderDecoderModel
6
+
7
+ processor = DonutProcessor.from_pretrained("debu-das/donut_receipt_v1.20")
8
+ model = VisionEncoderDecoderModel.from_pretrained("debu-das/donut_receipt_v1.20"
9
+ )
10
+
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ model.to(device)
13
+
14
+ def process_document(image):
15
+ # prepare encoder inputs
16
+ pixel_values = processor(image, return_tensors="pt").pixel_values
17
+
18
+ # prepare decoder inputs
19
+ task_prompt = "<s_cord-v2>"
20
+ decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
21
+
22
+ # generate answer
23
+ outputs = model.generate(
24
+ pixel_values.to(device),
25
+ decoder_input_ids=decoder_input_ids.to(device),
26
+ max_length=model.decoder.config.max_position_embeddings,
27
+ early_stopping=True,
28
+ pad_token_id=processor.tokenizer.pad_token_id,
29
+ eos_token_id=processor.tokenizer.eos_token_id,
30
+ use_cache=True,
31
+ num_beams=1,
32
+ bad_words_ids=[[processor.tokenizer.unk_token_id]],
33
+ return_dict_in_generate=True,
34
+ )
35
+
36
+ # postprocess
37
+ sequence = processor.batch_decode(outputs.sequences)[0]
38
+ sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
39
+ sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
40
+
41
+ return processor.token2json(sequence)
42
+
43
+ description = "Gradio Demo for Donut, an instance of `VisionEncoderDecoderModel` fine-tuned on CORD (document parsing). To use it, simply upload your image and click 'submit', or click one of the examples to load them. Read more at the links below."
44
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
45
+
46
+ demo = gr.Interface(
47
+ fn=process_document,
48
+ inputs="image",
49
+ outputs="json",
50
+ title="Demo: App for Receipt Parsing using Donut",
51
+ description=description,
52
+ article=article,
53
+ enable_queue=True,
54
+ examples=[],
55
+ cache_examples=False)
56
+
57
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ git+https://github.com/huggingface/transformers.git
3
+ sentencepiece