Spaces:

Aumkeshchy2003
/

Gradio-OCR

Sleeping

App Files Files Community

Aumkeshchy2003 commited on Sep 30, 2024

Commit

76a8b7b

•

1 Parent(s): a73bb26

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -18

app.py CHANGED Viewed

@@ -1,48 +1,56 @@
 from typing import List
 import pytesseract
 from PIL import Image
 import re
 import gradio as gr
-def tesseract_ocr(filepath: str, languages: List[str]):
     image = Image.open(filepath)
-    extracted_text =  pytesseract.image_to_string(image=image, lang=', '.join(languages))
     return extracted_text
-def search_and_highlight(text, keyword):
-    highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
-    return highlighted_text
-def ocr_and_search(str, keyword):
-    if image is None:
         return "Please upload an image."
-    extracted_text = tesseract_ocr(str)
     if keyword:
         highlighted_text = search_and_highlight(extracted_text, keyword)
         return highlighted_text
     else:
         return extracted_text
 title = "Tesseract OCR"
 description = "Gradio demo for Tesseract."
 language_choices = pytesseract.get_languages()
 demo = gr.Interface(
-    fn=tesseract_ocr,
     inputs=[
-        gr.Image(type="filepath", label="Input"),
-        gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")  # Keyword input
-        ],
-    outputs='text',
     title=title,
     description=description,
 )
 if __name__ == '__main__':
     demo.launch()
-    print("Finished running")

 from typing import List
 import pytesseract
 from PIL import Image
 import re
 import gradio as gr
+def tesseract_ocr(filepath: str, languages: List[str]) -> str:
+    """Extract text from the image using Tesseract OCR."""
     image = Image.open(filepath)
+    extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages))
     return extracted_text
+def search_and_highlight(text: str, keyword: str) -> str:
+    """Highlight occurrences of the keyword in the extracted text."""
+    if keyword:
+        highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
+        return highlighted_text
+    return text
+def ocr_and_search(filepath: str, keyword: str, languages: List[str]) -> str:
+    """Perform OCR on the image and highlight the specified keyword."""
+    if filepath is None:
         return "Please upload an image."
+    extracted_text = tesseract_ocr(filepath, languages)
     if keyword:
         highlighted_text = search_and_highlight(extracted_text, keyword)
         return highlighted_text
     else:
         return extracted_text
+# Gradio Interface
 title = "Tesseract OCR"
 description = "Gradio demo for Tesseract."
+article = "<p>Upload an image and optionally highlight keywords.</p>"
+# Get available languages for Tesseract
 language_choices = pytesseract.get_languages()
 demo = gr.Interface(
+    fn=ocr_and_search,
     inputs=[
+        gr.Image(type="filepath", label="Upload Image for OCR"),
+        gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword..."),
+        gr.CheckboxGroup(choices=language_choices, label="Select OCR Language(s)", value=['eng'])  # Added language selection
+    ],
+    outputs='html',  # Changed to 'html' to display highlighted text
     title=title,
     description=description,
+    article=article
 )
 if __name__ == '__main__':
     demo.launch()
+    print("Finished running")