from typing import List import pytesseract from PIL import Image import re import gradio as gr def tesseract_ocr(filepath: str, languages: List[str]): image = Image.open(filepath) extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages)) return extracted_text def search_and_highlight(text, keyword): highlighted_text = re.sub(f"({keyword})", r"\1", text, flags=re.IGNORECASE) return highlighted_text def ocr_and_search(str, keyword): if image is None: return "Please upload an image." extracted_text = tesseract_ocr(str) if keyword: highlighted_text = search_and_highlight(extracted_text, keyword) return highlighted_text else: return extracted_text title = "Tesseract OCR" description = "Gradio demo for Tesseract." language_choices = pytesseract.get_languages() demo = gr.Interface( fn=tesseract_ocr, inputs=[ gr.Image(type="filepath", label="Input"), gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...") # Keyword input ], outputs='text', title=title, description=description, ) if __name__ == '__main__': demo.launch() print("Finished running")