Akhil Ahuja commited on
Commit
fea808c
1 Parent(s): 283e67a

Adding app.py and requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +28 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
2
+
3
+ import pytesseract as tsr
4
+ from PIL import Image
5
+
6
+ import sys, os
7
+ import gradio as gr
8
+
9
+ tsr.pytesseract.tesseract_cmd = r'/opt/homebrew/Cellar/tesseract/5.2.0/bin/tesseract'
10
+
11
+ model = M2M100ForConditionalGeneration.from_pretrained("/facebook/m2m100_418M")
12
+ tokenizer = M2M100Tokenizer.from_pretrained("/facebook/m2m100_418M")
13
+
14
+ def extractAndTranslate(image):
15
+ # Extract Text
16
+ extractedText = tsr.image_to_string(image, lang='eng+hin')
17
+ extractedTextFormatted = ' '.join(extractedText.split('\n'))
18
+
19
+ # Translate
20
+ tokenizer.src_lang = "en"
21
+ encodedText = tokenizer(extractedTextFormatted, return_tensors="pt")
22
+ generatedTokens = model.generate(**encodedText, forced_bos_token_id=tokenizer.get_lang_id("hi"))
23
+
24
+ return tokenizer.batch_decode(generatedTokens, skip_special_tokens=True)[0]
25
+
26
+ demoApp = gr.Interface(extractAndTranslate, "image", "text")
27
+
28
+ demoApp.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.0.26
2
+ transformers==4.20.1
3
+ torch==1.12.0
4
+ pytesseract==0.3.9
5
+ pillow==9.2.0