import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests
from PIL import Image

processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
model = VisionEncoderDecoderModel.from_pretrained("yhshin/latex-ocr")
tokenizer = model.tokenizer

# load image examples

def process_image(image):
    # prepare image
    pixel_values = processor(image, return_tensors="pt").pixel_values

    # generate (no beam search)
    generated_ids = model.generate(pixel_values)

    # decode
    generated_text = tokenizer.decode_batch(generated_ids.tolist(), skip_special_tokens=True)[0]

    # Strip spaces
    generated_text = generated_text.replace(" ", "")

    return generated_text

title = "Interactive demo: latex-ocr"
description = "Demo for latex-ocr, a machine learning model to parse an image of equation and attempt to find the LaTeX source code that generated it. To use it, simply upload an image or use the example image below and click 'submit'. Results will show up in a few seconds."
article = "<p style='text-align: center'>Made by Young Ho Shin<a href='https://www.github.com/yhshin11'>Github</a> | <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>"
examples =[["examples/image_0.png"], ["image_1.png"], ["image_2.png"]]

iface = gr.Interface(fn=process_image, 
                     inputs=gr.inputs.Image(type="pil"), 
                     outputs=gr.outputs.Textbox(),
                     title=title,
                     description=description,
                     article=article,
                     examples=examples)
iface.launch()