Aumkeshchy2003 commited on
Commit
a73bb26
1 Parent(s): 1d7f623

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -2
app.py CHANGED
@@ -2,12 +2,30 @@ from typing import List
2
 
3
  import pytesseract
4
  from PIL import Image
 
5
 
6
  import gradio as gr
7
 
8
  def tesseract_ocr(filepath: str, languages: List[str]):
9
  image = Image.open(filepath)
10
- return pytesseract.image_to_string(image=image, lang=', '.join(languages))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  title = "Tesseract OCR"
13
  description = "Gradio demo for Tesseract."
@@ -18,7 +36,7 @@ demo = gr.Interface(
18
  fn=tesseract_ocr,
19
  inputs=[
20
  gr.Image(type="filepath", label="Input"),
21
- gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='language')
22
  ],
23
  outputs='text',
24
  title=title,
 
2
 
3
  import pytesseract
4
  from PIL import Image
5
+ import re
6
 
7
  import gradio as gr
8
 
9
  def tesseract_ocr(filepath: str, languages: List[str]):
10
  image = Image.open(filepath)
11
+ extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages))
12
+ return extracted_text
13
+
14
+ def search_and_highlight(text, keyword):
15
+ highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
16
+ return highlighted_text
17
+
18
+ def ocr_and_search(str, keyword):
19
+ if image is None:
20
+ return "Please upload an image."
21
+
22
+ extracted_text = tesseract_ocr(str)
23
+
24
+ if keyword:
25
+ highlighted_text = search_and_highlight(extracted_text, keyword)
26
+ return highlighted_text
27
+ else:
28
+ return extracted_text
29
 
30
  title = "Tesseract OCR"
31
  description = "Gradio demo for Tesseract."
 
36
  fn=tesseract_ocr,
37
  inputs=[
38
  gr.Image(type="filepath", label="Input"),
39
+ gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...") # Keyword input
40
  ],
41
  outputs='text',
42
  title=title,