Spaces:

yhshin
/

latex-ocr

Runtime error

App Files Files Community

latex-ocr / app.py

yhshin

Update app.py

cf700ce over 2 years ago

raw

history blame

1.87 kB

	import gradio as gr
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	import requests
	from PIL import Image

	url = 'https://huggingface.co/yhshin/latex-ocr/raw/main/tokenizer-wordlevel.json'
	r = requests.get(url)
	open('tokenizer-wordlevel.json' , 'wb').write(r.content)


	processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
	model = VisionEncoderDecoderModel.from_pretrained("yhshin/latex-ocr")

	from tokenizers import Tokenizer
	tokenizer = Tokenizer.from_file("tokenizer-wordlevel.json")

	# load image examples

	def process_image(image):
	# prepare image
	pixel_values = processor(image, return_tensors="pt").pixel_values

	# generate (no beam search)
	generated_ids = model.generate(pixel_values)

	# decode
	generated_text = tokenizer.decode_batch(generated_ids.tolist(), skip_special_tokens=True)[0]

	# Strip spaces
	generated_text = generated_text.replace(" ", "")

	return generated_text

	title = "Interactive demo: latex-ocr"
	description = "Demo for latex-ocr, a machine learning model to parse an image of equation and attempt to find the LaTeX source code that generated it. To use it, simply upload an image or use the example image below and click 'submit'. Results will show up in a few seconds."
	article = "<p style='text-align: center'>Made by Young Ho Shin<a href='https://www.github.com/yhshin11'>Github</a> \| <a href='https://github.com/microsoft/unilm/tree/master/trocr'>Github Repo</a></p>"
	examples =[["examples/image_0.png"], ["image_1.png"], ["image_2.png"]]

	iface = gr.Interface(fn=process_image,
	inputs=gr.inputs.Image(type="pil"),
	outputs=gr.outputs.Textbox(),
	title=title,
	description=description,
	article=article,
	examples=examples)
	iface.launch()