from cord_inference import prediction as cord_prediction from sroie_inference import prediction as sroie_prediction import gradio as gr import json def prediction(image): #we first use mp-02/layoutlmv3-finetuned-cord on the image, which gives us a JSON with some info and a blurred image j1, image_blurred = sroie_prediction(image) #then we use the model fine-tuned on sroie (for now it is Theivaprakasham/layoutlmv3-finetuned-sroie) img = image_blurred.copy() j2, image_final = cord_prediction(img) #we then link the two json files if len(j1) == 0: j3 = j2 else: j3 = json.dumps(j1).split('}')[0] + ', ' + json.dumps(j2).split('{')[1] return j1, image_blurred, j2, image_final, j3 title = "Interactive demo: LayoutLMv3 for receipts" description = "Demo for Microsoft's LayoutLMv3, a Transformer for state-of-the-art document image understanding tasks. This particular model is fine-tuned on CORD and SROIE, which are datasets of receipts.\n It firsts uses the fine-tune on SROIE to extract date, company and address, then the fine-tune on CORD for the other info.\n To use it, simply upload an image or use the example image below. Results will show up in a few seconds." examples = [['image.jpeg']] css = """.output_image, .input_image {height: 600px !important}""" # we use a gradio interface that takes in input an image and return a JSON file that contains its info # we show also the intermediate steps (first we take some info with the model fine-tuned on SROIE and we blur the relative boxes # then we pass the image to the model fine-tuned on CORD iface = gr.Interface(fn=prediction, inputs=gr.Image(type="pil"), outputs=[gr.JSON(label="json parsing"), gr.Image(type="pil", label="blurred image"), gr.JSON(label="json parsing"), gr.Image(type="pil", label="annotated image"), gr.JSON(label="json parsing")], title=title, description=description, examples=examples, css=css) iface.launch()