from typing import List import pytesseract from PIL import Image import re import gradio as gr def tesseract_ocr(filepath: str) -> str: image = Image.open(filepath) combined_languages = 'eng+hin' extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages) return extracted_text def search_and_highlight(text: str, keyword: str) -> str: if keyword: highlighted_text = re.sub(f"({keyword})", r"\1", text, flags=re.IGNORECASE) return highlighted_text return text def ocr_and_search(filepath: str, keyword: str) -> str: if filepath is None: return "Please upload an image." extracted_text = tesseract_ocr(filepath) if keyword: highlighted_text = search_and_highlight(extracted_text, keyword) return highlighted_text else: return extracted_text title = "Tesseract OCR (English + Hindi)" description = "Gradio demo for Tesseract with multi-language support (English and Hindi)." demo = gr.Interface( fn=ocr_and_search, inputs=[ gr.Image(type="filepath", label="Upload Image for OCR"), gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...") ], outputs='html', title=title, description=description ) if __name__ == '__main__': demo.launch() print("Finished running")