Spaces:
Sleeping
Sleeping
File size: 1,818 Bytes
d4640a8 794e69a a73bb26 794e69a 6916c84 fb7988f 6916c84 a73bb26 76a8b7b a73bb26 6916c84 76a8b7b a73bb26 76a8b7b 6916c84 76a8b7b 6916c84 a73bb26 e2eafa6 76a8b7b 6916c84 1c9cf55 814690d 76a8b7b d66c9c9 76a8b7b 6916c84 76a8b7b 814690d 6916c84 04c7dbc 814690d 6916c84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from typing import List
import pytesseract
from PIL import Image
import re
import gradio as gr
def tesseract_ocr(filepath: str) -> str:
"""Extract text from the image using Tesseract OCR with both English and Hindi."""
image = Image.open(filepath)
# Set languages to English and Hindi by default
combined_languages = 'eng+hin'
extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
return extracted_text
def search_and_highlight(text: str, keyword: str) -> str:
"""Highlight occurrences of the keyword in the extracted text."""
if keyword:
highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
return highlighted_text
return text
def ocr_and_search(filepath: str, keyword: str) -> str:
"""Perform OCR on the image and highlight the specified keyword."""
if filepath is None:
return "Please upload an image."
# Perform OCR (with default English and Hindi languages)
extracted_text = tesseract_ocr(filepath)
# Highlight the keyword if provided
if keyword:
highlighted_text = search_and_highlight(extracted_text, keyword)
return highlighted_text
else:
return extracted_text
# Gradio Interface
title = "Tesseract OCR (English + Hindi)"
description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
demo = gr.Interface(
fn=ocr_and_search,
inputs=[
gr.Image(type="filepath", label="Upload Image for OCR"),
gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
],
outputs='html', # Changed to 'html' to display highlighted text
title=title,
description=description
)
if __name__ == '__main__':
demo.launch()
print("Finished running") |