Aumkeshchy2003 commited on
Commit
76a8b7b
1 Parent(s): a73bb26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -18
app.py CHANGED
@@ -1,48 +1,56 @@
1
  from typing import List
2
-
3
  import pytesseract
4
  from PIL import Image
5
  import re
6
-
7
  import gradio as gr
8
 
9
- def tesseract_ocr(filepath: str, languages: List[str]):
 
10
  image = Image.open(filepath)
11
- extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages))
12
  return extracted_text
13
 
14
- def search_and_highlight(text, keyword):
15
- highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
16
- return highlighted_text
 
 
 
17
 
18
- def ocr_and_search(str, keyword):
19
- if image is None:
 
20
  return "Please upload an image."
21
-
22
- extracted_text = tesseract_ocr(str)
23
-
24
  if keyword:
25
  highlighted_text = search_and_highlight(extracted_text, keyword)
26
  return highlighted_text
27
  else:
28
  return extracted_text
29
 
 
30
  title = "Tesseract OCR"
31
  description = "Gradio demo for Tesseract."
 
32
 
 
33
  language_choices = pytesseract.get_languages()
34
 
35
  demo = gr.Interface(
36
- fn=tesseract_ocr,
37
  inputs=[
38
- gr.Image(type="filepath", label="Input"),
39
- gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...") # Keyword input
40
- ],
41
- outputs='text',
 
42
  title=title,
43
  description=description,
 
44
  )
45
 
46
  if __name__ == '__main__':
47
  demo.launch()
48
- print("Finished running")
 
1
  from typing import List
 
2
  import pytesseract
3
  from PIL import Image
4
  import re
 
5
  import gradio as gr
6
 
7
+ def tesseract_ocr(filepath: str, languages: List[str]) -> str:
8
+ """Extract text from the image using Tesseract OCR."""
9
  image = Image.open(filepath)
10
+ extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages))
11
  return extracted_text
12
 
13
+ def search_and_highlight(text: str, keyword: str) -> str:
14
+ """Highlight occurrences of the keyword in the extracted text."""
15
+ if keyword:
16
+ highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
17
+ return highlighted_text
18
+ return text
19
 
20
+ def ocr_and_search(filepath: str, keyword: str, languages: List[str]) -> str:
21
+ """Perform OCR on the image and highlight the specified keyword."""
22
+ if filepath is None:
23
  return "Please upload an image."
24
+
25
+ extracted_text = tesseract_ocr(filepath, languages)
26
+
27
  if keyword:
28
  highlighted_text = search_and_highlight(extracted_text, keyword)
29
  return highlighted_text
30
  else:
31
  return extracted_text
32
 
33
+ # Gradio Interface
34
  title = "Tesseract OCR"
35
  description = "Gradio demo for Tesseract."
36
+ article = "<p>Upload an image and optionally highlight keywords.</p>"
37
 
38
+ # Get available languages for Tesseract
39
  language_choices = pytesseract.get_languages()
40
 
41
  demo = gr.Interface(
42
+ fn=ocr_and_search,
43
  inputs=[
44
+ gr.Image(type="filepath", label="Upload Image for OCR"),
45
+ gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword..."),
46
+ gr.CheckboxGroup(choices=language_choices, label="Select OCR Language(s)", value=['eng']) # Added language selection
47
+ ],
48
+ outputs='html', # Changed to 'html' to display highlighted text
49
  title=title,
50
  description=description,
51
+ article=article
52
  )
53
 
54
  if __name__ == '__main__':
55
  demo.launch()
56
+ print("Finished running")