latex-ocr / app.py
yhshin's picture
Update app.py
cf700ce
raw
history blame
1.87 kB
import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import requests
from PIL import Image
url = 'https://huggingface.co/yhshin/latex-ocr/raw/main/tokenizer-wordlevel.json'
r = requests.get(url)
open('tokenizer-wordlevel.json' , 'wb').write(r.content)
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
model = VisionEncoderDecoderModel.from_pretrained("yhshin/latex-ocr")
from tokenizers import Tokenizer
tokenizer = Tokenizer.from_file("tokenizer-wordlevel.json")
# load image examples
def process_image(image):
# prepare image
pixel_values = processor(image, return_tensors="pt").pixel_values
# generate (no beam search)
generated_ids = model.generate(pixel_values)
# decode
generated_text = tokenizer.decode_batch(generated_ids.tolist(), skip_special_tokens=True)[0]
# Strip spaces
generated_text = generated_text.replace(" ", "")
return generated_text
title = "Interactive demo: latex-ocr"
description = "Demo for latex-ocr, a machine learning model to parse an image of equation and attempt to find the LaTeX source code that generated it. To use it, simply upload an image or use the example image below and click 'submit'. Results will show up in a few seconds."
article = "<p style='text-align: center'>Made by Young Ho Shin<a href='https://www.github.com/yhshin11'>Github</a> | <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>"
examples =[["examples/image_0.png"], ["image_1.png"], ["image_2.png"]]
iface = gr.Interface(fn=process_image,
inputs=gr.inputs.Image(type="pil"),
outputs=gr.outputs.Textbox(),
title=title,
description=description,
article=article,
examples=examples)
iface.launch()