Spaces:

Aumkeshchy2003
/

Gradio-OCR

Running

App Files Files Community

Aumkeshchy2003 commited on Sep 30

Commit

6916c84

•

1 Parent(s): 76a8b7b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -16

app.py CHANGED Viewed

@@ -4,10 +4,12 @@ from PIL import Image
 import re
 import gradio as gr
-def tesseract_ocr(filepath: str, languages: List[str]) -> str:
-    """Extract text from the image using Tesseract OCR."""
     image = Image.open(filepath)
-    extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages))
     return extracted_text
 def search_and_highlight(text: str, keyword: str) -> str:
@@ -17,13 +19,15 @@ def search_and_highlight(text: str, keyword: str) -> str:
         return highlighted_text
     return text
-def ocr_and_search(filepath: str, keyword: str, languages: List[str]) -> str:
     """Perform OCR on the image and highlight the specified keyword."""
     if filepath is None:
         return "Please upload an image."
-    extracted_text = tesseract_ocr(filepath, languages)
     if keyword:
         highlighted_text = search_and_highlight(extracted_text, keyword)
         return highlighted_text
@@ -31,26 +35,20 @@ def ocr_and_search(filepath: str, keyword: str, languages: List[str]) -> str:
         return extracted_text
 # Gradio Interface
-title = "Tesseract OCR"
-description = "Gradio demo for Tesseract."
-article = "<p>Upload an image and optionally highlight keywords.</p>"
-# Get available languages for Tesseract
-language_choices = pytesseract.get_languages()
 demo = gr.Interface(
     fn=ocr_and_search,
     inputs=[
         gr.Image(type="filepath", label="Upload Image for OCR"),
-        gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword..."),
-        gr.CheckboxGroup(choices=language_choices, label="Select OCR Language(s)", value=['eng'])  # Added language selection
     ],
     outputs='html',  # Changed to 'html' to display highlighted text
     title=title,
-    description=description,
-    article=article
 )
 if __name__ == '__main__':
     demo.launch()
-    print("Finished running")

 import re
 import gradio as gr
+def tesseract_ocr(filepath: str) -> str:
+    """Extract text from the image using Tesseract OCR with both English and Hindi."""
     image = Image.open(filepath)
+    # Set languages to English and Hindi by default
+    combined_languages = 'eng+hin'
+    extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
     return extracted_text
 def search_and_highlight(text: str, keyword: str) -> str:
         return highlighted_text
     return text
+def ocr_and_search(filepath: str, keyword: str) -> str:
     """Perform OCR on the image and highlight the specified keyword."""
     if filepath is None:
         return "Please upload an image."
+    # Perform OCR (with default English and Hindi languages)
+    extracted_text = tesseract_ocr(filepath)
+    # Highlight the keyword if provided
     if keyword:
         highlighted_text = search_and_highlight(extracted_text, keyword)
         return highlighted_text
         return extracted_text
 # Gradio Interface
+title = "Tesseract OCR (English + Hindi)"
+description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
 demo = gr.Interface(
     fn=ocr_and_search,
     inputs=[
         gr.Image(type="filepath", label="Upload Image for OCR"),
+        gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
     ],
     outputs='html',  # Changed to 'html' to display highlighted text
     title=title,
+    description=description
 )
 if __name__ == '__main__':
     demo.launch()
+    print("Finished running")