File size: 2,129 Bytes
3fda4ee
 
 
 
 
 
 
fc77b97
 
4452340
3fda4ee
fc77b97
4452340
 
3fda4ee
9a891af
4452340
3fda4ee
 
4452340
3fda4ee
4452340
3fda4ee
 
 
9a891af
fc77b97
3fda4ee
 
 
fc77b97
 
3fda4ee
 
8683ec6
3fda4ee
8683ec6
3fda4ee
8683ec6
3fda4ee
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from cord_inference import prediction as cord_prediction
from sroie_inference import prediction as sroie_prediction
import gradio as gr
import json

def prediction(image):

    # first use the model fine-tuned on sroie (for now it is Theivaprakasham/layoutlmv3-finetuned-sroie)
    # on the image, which returns a JSON with some info and an image with the corresponding boxes blurred
    j1, image_blurred = sroie_prediction(image)
    
    # then use the model fine-tuned on cord on the blurred image
    img = image_blurred.copy()
    j2, image_final = cord_prediction(img)

    # link the two json files
    if len(j1) == 0:
        j3 = j2
    else:
        j3 = json.dumps(j1).split('}')[0] + ', ' + json.dumps(j2).split('{')[1]

    return j1, image_blurred, j2, image_final, j3


title = "Interactive demo: LayoutLMv3 for receipts"
description = "Demo for Microsoft's LayoutLMv3, a Transformer for state-of-the-art document image understanding tasks. This particular space uses two instances of the model, one fine-tuned on CORD and the other SROIE.\n It firsts uses the fine-tune on SROIE to extract date, company and address, then the fine-tune on CORD for the other info. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
examples = [['image.jpeg'],['image.png']]

css = """.output_image, .input_image {height: 600px !important}"""

# gradio interface that takes in input an image and return a JSON file that contains its info
# for now it shows also the intermediate steps
iface = gr.Interface(fn=prediction,
                     inputs=gr.Image(type="pil"),
                     outputs=[gr.JSON(label="sroie parsing"),
                              gr.Image(type="pil", label="blurred image"),
                              gr.JSON(label="cord parsing"),
                              gr.Image(type="pil", label="annotated image"),
                              gr.JSON(label="final output")],
                     title=title,
                     description=description,
                     examples=examples,
                     css=css)

iface.launch()