from typing import List import pytesseract from PIL import Image import gradio as gr import re def tesseract_ocr_with_search(filepath: str, languages: List[str], keyword: str): # Perform OCR on the image image = Image.open(filepath) extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages)) # If keyword is provided, highlight the occurrences if keyword: highlighted_text = re.sub(f"({re.escape(keyword)})", r"\1", extracted_text, flags=re.IGNORECASE) else: highlighted_text = extracted_text return highlighted_text # Fetch available languages for Tesseract language_choices = pytesseract.get_languages() # Define Gradio Interface demo = gr.Interface( fn=tesseract_ocr_with_search, inputs=[ gr.Image(type="filepath", label="Upload Image"), # Input for image upload gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='Language'), # Language selection gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search") # Keyword input ], outputs=gr.HTML(), # Use HTML output to allow text highlighting title=title, description=description, article=article, examples=examples, ) if __name__ == '__main__': demo.launch() print("Finished running")