Spaces:

yashbyname
/

OCR_using_GOT_and_Tesseract

Running

App Files Files Community

OCR_using_GOT_and_Tesseract / app.py

yashbyname

Update app.py

4cdc503 verified about 2 months ago

raw

history blame

4.46 kB

	# -- coding: utf-8 --
	"""Final WebApp using Gradio.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1a5-p_KZd9Hk0tsKZ_JoqoYeRD3XOQtRK

	# Task 2 - Web App Development with Gradio

	## Gradio Interface for OCR Application

	In this notebook, I created an interactive web application using Gradio to facilitate the OCR process and allow users to perform keyword searches on the extracted text.
	"""

	#!pip install gradio
	#!pip install -q tiktoken verovio
	#!pip install pytesseract

	"""Library Imports:

	- In addition to libraries from the first notebook, I imported `gradio` to build the user interface for the application.
	"""

	import cv2
	from pytesseract import pytesseract
	from transformers import AutoModel, AutoTokenizer
	import gradio as gr

	"""Model and Tesseract Configuration:
	- Similar to the first notebook, I loaded the GOT2 model for English text and configured Tesseract for Hindi text.
	"""

	tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
	model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval()

	pytesseract.tesseract_cmd = '/usr/bin/tesseract'
	tesseract_config = '--oem 3 --psm 6 -l hin'

	"""Perform OCR Function:
	- The `perform_ocr` function was adapted to handle image input from the Gradio interface. This function processes the uploaded image based on the selected language and returns the extracted English and Hindi texts.
	"""

	def perform_ocr(img, language):

	img_path = "/tmp/uploaded_image.png"
	img.save(img_path)

	res_eng = ""
	res_hin = ""

	if language in ["English", "Both"]:
	res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')

	if language in ["Hindi", "Both"]:
	img_cv = cv2.imread(img_path)
	res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)

	return res_eng, res_hin

	"""Keyword Search Functionality:
	- A new function, `ocr_and_search`, was implemented to allow users to search for keywords within the extracted text. It checks for keyword matches in both English and Hindi texts, providing appropriate feedback.
	"""

	def ocr_and_search(image, language, keyword):

	english_text, hindi_text = perform_ocr(image, language)

	extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
	extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."

	# Search for the keyword in the extracted text
	search_results = []
	if keyword:

	if language in ["English", "Both"] and keyword.lower() in english_text.lower():
	search_results.append(f"Keyword '{keyword}' found in English text.")

	if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
	search_results.append(f"Keyword '{keyword}' found in Hindi text.")

	search_output = "\n".join(search_results) if search_results else "No matches found."

	return extracted_english, extracted_hindi, search_output

	"""Gradio Interface Setup:
	- The user interface is constructed using Gradio's Blocks API, allowing users to upload images, select the desired language for OCR, and enter a keyword for search.
	- The outputs are displayed in separate text boxes for extracted English text, extracted Hindi text, and search results.
	"""

	# Gradio
	with gr.Blocks() as app:
	gr.Markdown("### OCR Application")
	image_input = gr.Image(type="pil", label="Upload Image")
	language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
	keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
	output_english = gr.Textbox(label="Extracted English Text", interactive=False)
	output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
	output_search = gr.Textbox(label="Search Results", interactive=False)

	submit_button = gr.Button("Submit")
	submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])

	"""Application Launch:
	- Finally, the Gradio app is launched, making the OCR application accessible for user interaction. This enables real-time testing and usability of the OCR functionalities implemented in the previous notebook.
	"""

	app.launch()