Spaces:
Sleeping
Sleeping
File size: 1,364 Bytes
d4640a8 794e69a a73bb26 794e69a 6916c84 fb7988f 6916c84 a73bb26 76a8b7b a73bb26 6916c84 76a8b7b a73bb26 6916c84 76a8b7b a73bb26 e2eafa6 6916c84 1c9cf55 814690d 76a8b7b d66c9c9 76a8b7b 6916c84 76a8b7b 03f0455 814690d 6916c84 04c7dbc 3780618 814690d 6916c84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from typing import List
import pytesseract
from PIL import Image
import re
import gradio as gr
def tesseract_ocr(filepath: str) -> str:
image = Image.open(filepath)
combined_languages = 'eng+hin'
extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
return extracted_text
def search_and_highlight(text: str, keyword: str) -> str:
if keyword:
highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
return highlighted_text
return text
def ocr_and_search(filepath: str, keyword: str) -> str:
if filepath is None:
return "Please upload an image."
extracted_text = tesseract_ocr(filepath)
if keyword:
highlighted_text = search_and_highlight(extracted_text, keyword)
return highlighted_text
else:
return extracted_text
title = "Tesseract OCR (English + Hindi)"
description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
demo = gr.Interface(
fn=ocr_and_search,
inputs=[
gr.Image(type="filepath", label="Upload Image for OCR"),
gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
],
outputs='html',
title=title,
description=description
)
if __name__ == '__main__':
demo.launch()
print("Finished running") |