Spaces:

shashankkandimalla
/

ocr-text-processing

Running

File size: 3,848 Bytes

288007d

import gradio as gr
import http.client
import json
import requests
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables
load_dotenv()

# Function to upload image to imgbb
def upload_image_to_imgbb(image_path):
    try:
        with open(image_path, "rb") as file:
            url = "https://api.imgbb.com/1/upload"
            payload = {
                "key": os.getenv("IMGBB_API_KEY"),
                "image": base64.b64encode(file.read()).decode()
            }
            res = requests.post(url, data=payload)
            res.raise_for_status()  # Raises an HTTPError for bad responses
            return res.json()['data']['url']
    except Exception as e:
        return f"Error uploading image: {str(e)}"

# OCR API request
def get_ocr_results(image_url):
    try:
        conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
        payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
        headers = {
            'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
            'x-rapidapi-host': "ocr43.p.rapidapi.com",
            'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
        }
        conn.request("POST", "/v1/results", payload, headers)
        res = conn.getresponse()
        data = res.read()
        return data.decode("utf-8")
    except Exception as e:
        return f"Error in OCR processing: {str(e)}"

# OpenAI API configuration
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Function to process OCR results with OpenAI
def process_ocr_with_gpt(ocr_results):
    try:
        prompt = f"""
        You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
        Here are the OCR results:

        {ocr_results}

        Please process this information and provide a clean, well-formatted output. If there are any obvious errors in the OCR, please correct them. Format the output in a clear and readable manner.
        """

        response = openai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that processes OCR results."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error in GPT processing: {str(e)}"

# Gradio interface function
def process_image(image):
    try:
        # Save the image temporarily and get its path
        image_path = "temp_image.png"
        image.save(image_path)
        
        # Upload image to imgbb and get URL
        image_url = upload_image_to_imgbb(image_path)
        if image_url.startswith("Error"):
            return image_url, "Failed to process due to image upload error"
        
        # Get OCR results
        ocr_results = get_ocr_results(image_url)
        if ocr_results.startswith("Error"):
            return ocr_results, "Failed to process due to OCR error"
        
        # Process with GPT
        processed_results = process_ocr_with_gpt(ocr_results)
        
        # Clean up temporary file
        os.remove(image_path)
        
        return ocr_results, processed_results
    except Exception as e:
        return f"Error in image processing: {str(e)}", "Failed to process due to an error"

# Create Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Textbox(label="OCR Results"),
        gr.Textbox(label="Processed Results")
    ],
    title="OCR and Text Processing App",
    description="Upload an image to extract text and process it."
)

# Launch the app
iface.launch()