from typing import List import pytesseract from PIL import Image import gradio as gr def tesseract_ocr(filepath: str, languages: List[str]): image = Image.open(filepath) return pytesseract.image_to_string(image=image, lang=', '.join(languages)) title = "Tesseract OCR" description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine." article = "

Tesseract documentation | Github Repo

" language_choices = pytesseract.get_languages() def search_and_highlight(text, keyword): highlighted_text = re.sub(f"({keyword})", r"\1", text, flags=re.IGNORECASE) return highlighted_text def ocr_and_search(image, keyword, language_choices): if image is None: return "Please upload an image." extracted_text = tesseract_ocr(image) if keyword: highlighted_text = search_and_highlight(extracted_text, keyword) return highlighted_text else: return extracted_text iface = gr.Interface( fn=ocr_and_search, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter keyword to search (optional)"), ], outputs=gr.HTML(label="Extracted and Highlighted Text"), title="OCR and Keyword Search", description="Upload an image to extract text using OCR and optionally search for keywords in the extracted text." ) iface.launch()