Spaces:

shashankkandimalla
/

ocr-text-processing

Running

App Files Files Community

ocr-text-processing / app.py

shashankkandimalla

Upload folder using huggingface_hub

288007d verified 6 months ago

raw

history blame

3.85 kB

	import gradio as gr
	import http.client
	import json
	import requests
	import base64
	import os
	from dotenv import load_dotenv
	from openai import OpenAI

	# Load environment variables
	load_dotenv()

	# Function to upload image to imgbb
	def upload_image_to_imgbb(image_path):
	try:
	with open(image_path, "rb") as file:
	url = "https://api.imgbb.com/1/upload"
	payload = {
	"key": os.getenv("IMGBB_API_KEY"),
	"image": base64.b64encode(file.read()).decode()
	}
	res = requests.post(url, data=payload)
	res.raise_for_status() # Raises an HTTPError for bad responses
	return res.json()['data']['url']
	except Exception as e:
	return f"Error uploading image: {str(e)}"

	# OCR API request
	def get_ocr_results(image_url):
	try:
	conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
	payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
	headers = {
	'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
	'x-rapidapi-host': "ocr43.p.rapidapi.com",
	'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
	}
	conn.request("POST", "/v1/results", payload, headers)
	res = conn.getresponse()
	data = res.read()
	return data.decode("utf-8")
	except Exception as e:
	return f"Error in OCR processing: {str(e)}"

	# OpenAI API configuration
	openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	# Function to process OCR results with OpenAI
	def process_ocr_with_gpt(ocr_results):
	try:
	prompt = f"""
	You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
	Here are the OCR results:

	{ocr_results}

	Please process this information and provide a clean, well-formatted output. If there are any obvious errors in the OCR, please correct them. Format the output in a clear and readable manner.
	"""

	response = openai_client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant that processes OCR results."},
	{"role": "user", "content": prompt}
	]
	)

	return response.choices[0].message.content
	except Exception as e:
	return f"Error in GPT processing: {str(e)}"

	# Gradio interface function
	def process_image(image):
	try:
	# Save the image temporarily and get its path
	image_path = "temp_image.png"
	image.save(image_path)

	# Upload image to imgbb and get URL
	image_url = upload_image_to_imgbb(image_path)
	if image_url.startswith("Error"):
	return image_url, "Failed to process due to image upload error"

	# Get OCR results
	ocr_results = get_ocr_results(image_url)
	if ocr_results.startswith("Error"):
	return ocr_results, "Failed to process due to OCR error"

	# Process with GPT
	processed_results = process_ocr_with_gpt(ocr_results)

	# Clean up temporary file
	os.remove(image_path)

	return ocr_results, processed_results
	except Exception as e:
	return f"Error in image processing: {str(e)}", "Failed to process due to an error"

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil"),
	outputs=[
	gr.Textbox(label="OCR Results"),
	gr.Textbox(label="Processed Results")
	],
	title="OCR and Text Processing App",
	description="Upload an image to extract text and process it."
	)

	# Launch the app
	iface.launch()