Aumkeshchy2003 commited on
Commit
03f0455
1 Parent(s): 6916c84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -9
app.py CHANGED
@@ -5,36 +5,28 @@ import re
5
  import gradio as gr
6
 
7
  def tesseract_ocr(filepath: str) -> str:
8
- """Extract text from the image using Tesseract OCR with both English and Hindi."""
9
  image = Image.open(filepath)
10
- # Set languages to English and Hindi by default
11
  combined_languages = 'eng+hin'
12
  extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
13
  return extracted_text
14
 
15
  def search_and_highlight(text: str, keyword: str) -> str:
16
- """Highlight occurrences of the keyword in the extracted text."""
17
  if keyword:
18
  highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
19
  return highlighted_text
20
  return text
21
 
22
  def ocr_and_search(filepath: str, keyword: str) -> str:
23
- """Perform OCR on the image and highlight the specified keyword."""
24
  if filepath is None:
25
  return "Please upload an image."
26
-
27
- # Perform OCR (with default English and Hindi languages)
28
  extracted_text = tesseract_ocr(filepath)
29
 
30
- # Highlight the keyword if provided
31
  if keyword:
32
  highlighted_text = search_and_highlight(extracted_text, keyword)
33
  return highlighted_text
34
  else:
35
  return extracted_text
36
 
37
- # Gradio Interface
38
  title = "Tesseract OCR (English + Hindi)"
39
  description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
40
 
@@ -44,7 +36,7 @@ demo = gr.Interface(
44
  gr.Image(type="filepath", label="Upload Image for OCR"),
45
  gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
46
  ],
47
- outputs='html', # Changed to 'html' to display highlighted text
48
  title=title,
49
  description=description
50
  )
 
5
  import gradio as gr
6
 
7
  def tesseract_ocr(filepath: str) -> str:
 
8
  image = Image.open(filepath)
 
9
  combined_languages = 'eng+hin'
10
  extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
11
  return extracted_text
12
 
13
  def search_and_highlight(text: str, keyword: str) -> str:
 
14
  if keyword:
15
  highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
16
  return highlighted_text
17
  return text
18
 
19
  def ocr_and_search(filepath: str, keyword: str) -> str:
 
20
  if filepath is None:
21
  return "Please upload an image."
 
 
22
  extracted_text = tesseract_ocr(filepath)
23
 
 
24
  if keyword:
25
  highlighted_text = search_and_highlight(extracted_text, keyword)
26
  return highlighted_text
27
  else:
28
  return extracted_text
29
 
 
30
  title = "Tesseract OCR (English + Hindi)"
31
  description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
32
 
 
36
  gr.Image(type="filepath", label="Upload Image for OCR"),
37
  gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
38
  ],
39
+ outputs='html',
40
  title=title,
41
  description=description
42
  )