File size: 1,818 Bytes
d4640a8
794e69a
 
a73bb26
794e69a
 
6916c84
 
fb7988f
6916c84
 
 
a73bb26
 
76a8b7b
 
 
 
 
 
a73bb26
6916c84
76a8b7b
 
a73bb26
76a8b7b
6916c84
 
76a8b7b
6916c84
a73bb26
 
 
 
 
e2eafa6
76a8b7b
6916c84
 
1c9cf55
814690d
76a8b7b
d66c9c9
76a8b7b
6916c84
76a8b7b
 
814690d
6916c84
04c7dbc
 
814690d
 
6916c84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from typing import List
import pytesseract
from PIL import Image
import re
import gradio as gr

def tesseract_ocr(filepath: str) -> str:
    """Extract text from the image using Tesseract OCR with both English and Hindi."""
    image = Image.open(filepath)
    # Set languages to English and Hindi by default
    combined_languages = 'eng+hin'
    extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
    return extracted_text

def search_and_highlight(text: str, keyword: str) -> str:
    """Highlight occurrences of the keyword in the extracted text."""
    if keyword:
        highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
        return highlighted_text
    return text

def ocr_and_search(filepath: str, keyword: str) -> str:
    """Perform OCR on the image and highlight the specified keyword."""
    if filepath is None:
        return "Please upload an image."
    
    # Perform OCR (with default English and Hindi languages)
    extracted_text = tesseract_ocr(filepath)
    
    # Highlight the keyword if provided
    if keyword:
        highlighted_text = search_and_highlight(extracted_text, keyword)
        return highlighted_text
    else:
        return extracted_text

# Gradio Interface
title = "Tesseract OCR (English + Hindi)"
description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."

demo = gr.Interface(
    fn=ocr_and_search, 
    inputs=[
        gr.Image(type="filepath", label="Upload Image for OCR"), 
        gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
    ],
    outputs='html',  # Changed to 'html' to display highlighted text
    title=title,
    description=description
)

if __name__ == '__main__':
    demo.launch()
    print("Finished running")