|
import gradio as gr |
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
import requests |
|
from PIL import Image |
|
|
|
url = 'https://huggingface.co/yhshin/latex-ocr/raw/main/tokenizer-wordlevel.json' |
|
r = requests.get(url) |
|
open('tokenizer-wordlevel.json' , 'wb').write(r.content) |
|
|
|
|
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed") |
|
model = VisionEncoderDecoderModel.from_pretrained("yhshin/latex-ocr") |
|
|
|
from tokenizers import Tokenizer |
|
tokenizer = Tokenizer.from_file("tokenizer-wordlevel.json") |
|
|
|
|
|
|
|
def process_image(image): |
|
|
|
pixel_values = processor(image, return_tensors="pt").pixel_values |
|
|
|
|
|
generated_ids = model.generate(pixel_values) |
|
|
|
|
|
generated_text = tokenizer.decode_batch(generated_ids.tolist(), skip_special_tokens=True)[0] |
|
|
|
|
|
generated_text = generated_text.replace(" ", "") |
|
|
|
return generated_text |
|
|
|
title = "Interactive demo: latex-ocr" |
|
description = "Demo for latex-ocr, a machine learning model to parse an image of equation and attempt to find the LaTeX source code that generated it. To use it, simply upload an image or use the example image below and click 'submit'. Results will show up in a few seconds." |
|
article = "<p style='text-align: center'>Made by Young Ho Shin<a href='https://www.github.com/yhshin11'>Github</a> | <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>" |
|
examples =[["examples/image_0.png"], ["image_1.png"], ["image_2.png"]] |
|
|
|
iface = gr.Interface(fn=process_image, |
|
inputs=gr.inputs.Image(type="pil"), |
|
outputs=gr.outputs.Textbox(), |
|
title=title, |
|
description=description, |
|
article=article, |
|
examples=examples) |
|
iface.launch() |
|
|
|
|