Spaces:

Aumkeshchy2003
/

Gradio-OCR

Sleeping

App Files Files Community

Aumkeshchy2003 commited on Sep 30, 2024

Commit

03f0455

verified ·

1 Parent(s): 6916c84

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -9

app.py CHANGED Viewed

@@ -5,36 +5,28 @@ import re
 import gradio as gr
 def tesseract_ocr(filepath: str) -> str:
-    """Extract text from the image using Tesseract OCR with both English and Hindi."""
     image = Image.open(filepath)
-    # Set languages to English and Hindi by default
     combined_languages = 'eng+hin'
     extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
     return extracted_text
 def search_and_highlight(text: str, keyword: str) -> str:
-    """Highlight occurrences of the keyword in the extracted text."""
     if keyword:
         highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
         return highlighted_text
     return text
 def ocr_and_search(filepath: str, keyword: str) -> str:
-    """Perform OCR on the image and highlight the specified keyword."""
     if filepath is None:
         return "Please upload an image."
-    # Perform OCR (with default English and Hindi languages)
     extracted_text = tesseract_ocr(filepath)
-    # Highlight the keyword if provided
     if keyword:
         highlighted_text = search_and_highlight(extracted_text, keyword)
         return highlighted_text
     else:
         return extracted_text
-# Gradio Interface
 title = "Tesseract OCR (English + Hindi)"
 description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
@@ -44,7 +36,7 @@ demo = gr.Interface(
         gr.Image(type="filepath", label="Upload Image for OCR"),
         gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
     ],
-    outputs='html',  # Changed to 'html' to display highlighted text
     title=title,
     description=description
 )

 import gradio as gr
 def tesseract_ocr(filepath: str) -> str:
     image = Image.open(filepath)
     combined_languages = 'eng+hin'
     extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
     return extracted_text
 def search_and_highlight(text: str, keyword: str) -> str:
     if keyword:
         highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
         return highlighted_text
     return text
 def ocr_and_search(filepath: str, keyword: str) -> str:
     if filepath is None:
         return "Please upload an image."
     extracted_text = tesseract_ocr(filepath)
     if keyword:
         highlighted_text = search_and_highlight(extracted_text, keyword)
         return highlighted_text
     else:
         return extracted_text
 title = "Tesseract OCR (English + Hindi)"
 description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
         gr.Image(type="filepath", label="Upload Image for OCR"),
         gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
     ],
+    outputs='html',
     title=title,
     description=description
 )