ivelin commited on
Commit
ba6d9e2
·
1 Parent(s): e0dd23e

fix:example formatting

Browse files

Signed-off-by: ivelin <ivelin.eth@gmail.com>

Files changed (1) hide show
  1. app.py +15 -18
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import re
2
  import gradio as gr
 
3
 
4
  import torch
5
  from transformers import DonutProcessor, VisionEncoderDecoderModel
@@ -13,7 +14,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
13
  model.to(device)
14
 
15
 
16
- def process_document(image, prompt):
 
 
 
17
  # prepare encoder inputs
18
  pixel_values = processor(image, return_tensors="pt").pixel_values
19
 
@@ -68,20 +72,13 @@ def process_document(image, prompt):
68
 
69
  description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
70
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
71
-
72
- demo = gr.Interface(
73
- fn=process_document,
74
- inputs=["image", "text"],
75
- outputs="json",
76
- title="Demo: Donut 🍩 for DocVQA",
77
- description=description,
78
- article=article,
79
- enable_queue=True,
80
- examples=[
81
- ["example_1.jpg", "select the setting icon from top right corner"],
82
- ["example_2.jpg", "enter the text field next to the name"]
83
- ],
84
- cache_examples=False)
85
-
86
- demo.launch()
87
- #
 
1
  import re
2
  import gradio as gr
3
+ from PIL import Image
4
 
5
  import torch
6
  from transformers import DonutProcessor, VisionEncoderDecoderModel
 
14
  model.to(device)
15
 
16
 
17
+ def process_document(image: Image, prompt: str):
18
+ # trim prompt to 80 characters and normalize to lowercase
19
+ prompt = prompt[:80].lower()
20
+
21
  # prepare encoder inputs
22
  pixel_values = processor(image, return_tensors="pt").pixel_values
23
 
 
72
 
73
  description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
74
  article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
75
+ examples = [
76
+ ["example_1.jpg", "select the setting icon from top right corner"],
77
+ ["example_2.jpg", "enter the text field next to the name"]
78
+ ],
79
+
80
+ demo = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"],
81
+ outputs=[gr.inputs.Image(type='pil'), "textbox"],
82
+ title=title, description=description, article=article, examples=examples,
83
+ allow_flagging=False, allow_screenshot=False)
84
+ demo.launch(cache_examples=True)