Gradio-OCR / app.py
Aumkeshchy2003's picture
Update app.py
f68a073 verified
raw
history blame
1.36 kB
from typing import List
import pytesseract
from PIL import Image
import re
import gradio as gr
def tesseract_ocr(filepath: str) -> str:
image = Image.open(filepath)
combined_languages = 'eng+hin'
extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
return extracted_text
def search_and_highlight(text: str, keyword: str) -> str:
if keyword:
highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
return highlighted_text
return text
def ocr_and_search(filepath: str, keyword: str) -> str:
if filepath is None:
return "Please upload an image."
extracted_text = tesseract_ocr(filepath)
if keyword:
highlighted_text = search_and_highlight(extracted_text, keyword)
return highlighted_text
else:
return extracted_text
title = "Tesseract OCR (English + Hindi)"
description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
demo = gr.Interface(
fn=ocr_and_search,
inputs=[
gr.Image(type="filepath", label="Upload Image for OCR"),
gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
],
outputs='html',
title=title,
description=description
)
if __name__ == '__main__':
demo.launch()
print("Finished running")