Spaces:
Runtime error
Runtime error
File size: 965 Bytes
fea808c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
import pytesseract as tsr
from PIL import Image
import sys, os
import gradio as gr
tsr.pytesseract.tesseract_cmd = r'/opt/homebrew/Cellar/tesseract/5.2.0/bin/tesseract'
model = M2M100ForConditionalGeneration.from_pretrained("/facebook/m2m100_418M")
tokenizer = M2M100Tokenizer.from_pretrained("/facebook/m2m100_418M")
def extractAndTranslate(image):
# Extract Text
extractedText = tsr.image_to_string(image, lang='eng+hin')
extractedTextFormatted = ' '.join(extractedText.split('\n'))
# Translate
tokenizer.src_lang = "en"
encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("hi"))
return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
demoApp = gr.Interface(extractAndTranslate, "image", "text")
demoApp.launch() |