from typing import List import pytesseract from PIL import Image import gradio as gr def tesseract_ocr(filepath: str, keyword: str): # Load the image and perform OCR image = Image.open(filepath) extracted_text = pytesseract.image_to_string(image=image) # Highlight the keyword in the extracted text if keyword: highlighted_text = extracted_text.replace(keyword, f"{keyword}") else: highlighted_text = extracted_text return highlighted_text title = "Tesseract OCR" description = "Gradio demo for Tesseract. Tesseract is an open-source text recognition (OCR) Engine." article = "

Tesseract documentation | Github Repo

" demo = gr.Interface( fn=tesseract_ocr, inputs=[ gr.Image(type="filepath", label="Upload Image for OCR"), gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...") # Keyword input ], outputs='html', title=title, description=description, article=article, ) if __name__ == '__main__': demo.launch() print("Finished running")