Aumkeshchy2003 commited on
Commit
04c7dbc
·
verified ·
1 Parent(s): d7381ee

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import pytesseract
4
+ import re
5
+
6
+ def perform_ocr(image):
7
+ extracted_text = pytesseract.image_to_string(image, lang='hin+eng')
8
+ return extracted_text
9
+
10
+ def search_and_highlight(text, keyword):
11
+ highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
12
+ return highlighted_text
13
+
14
+ def ocr_and_search(image, keyword):
15
+ if image is None:
16
+ return "Please upload an image."
17
+
18
+ extracted_text = perform_ocr(image)
19
+
20
+ if keyword:
21
+ highlighted_text = search_and_highlight(extracted_text, keyword)
22
+ return highlighted_text
23
+ else:
24
+ return extracted_text
25
+
26
+ iface = gr.Interface(
27
+ fn=ocr_and_search,
28
+ inputs=[
29
+ gr.Image(type="pil", label="Upload Image"),
30
+ gr.Textbox(label="Enter keyword to search (optional)")
31
+ ],
32
+ outputs=gr.HTML(label="Extracted and Highlighted Text"),
33
+ title="OCR and Keyword Search",
34
+ description="Upload an image for OCR processing and search for keywords in the extracted text."
35
+ )
36
+
37
+ iface.launch()