Akhil Ahuja
Adding app.py and requirements.txt
fea808c
raw
history blame
965 Bytes
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import pytesseract as tsr
from PIL import Image
import sys, os
import gradio as gr
tsr.pytesseract.tesseract_cmd = r'/opt/homebrew/Cellar/tesseract/5.2.0/bin/tesseract'
model = M2M100ForConditionalGeneration.from_pretrained("/facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("/facebook/m2m100_418M")
def extractAndTranslate(image):
# Extract Text
extractedText = tsr.image_to_string(image, lang='eng+hin')
extractedTextFormatted = ' '.join(extractedText.split('\n'))
# Translate
tokenizer.src_lang = "en"
encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("hi"))
return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
demoApp = gr.Interface(extractAndTranslate, "image", "text")
demoApp.launch()