Spaces:

shashankkandimalla
/

ocr-text-processing

Running

File size: 4,838 Bytes

import gradio as gr
import http.client
import json
import requests
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
import io

# Load environment variables
load_dotenv()

# Function to upload image to imgbb
def upload_image_to_imgbb(image_bytes):
    try:
        url = "https://api.imgbb.com/1/upload"
        payload = {
            "key": os.getenv("IMGBB_API_KEY"),
            "image": base64.b64encode(image_bytes).decode()
        }
        res = requests.post(url, data=payload)
        res.raise_for_status()  # Raises an HTTPError for bad responses
        return res.json()['data']['url']
    except Exception as e:
        return f"Error uploading image: {str(e)}"

# OCR API request
def get_ocr_results(image_url):
    try:
        conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
        payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
        headers = {
            'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
            'x-rapidapi-host': "ocr43.p.rapidapi.com",
            'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
        }
        conn.request("POST", "/v1/results", payload, headers)
        res = conn.getresponse()
        data = res.read()
        return data.decode("utf-8")
    except Exception as e:
        return f"Error in OCR processing: {str(e)}"

# OpenAI API configuration
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Function to process OCR results with OpenAI
def process_ocr_with_gpt(ocr_results):
    try:
        prompt = f"""
        You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
        Here are the OCR results:

        {ocr_results}

        Please process this information and provide a clean, well-formatted output. Arrange all elements in order and omit any elements not present in the file.
        """

        response = openai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that processes OCR results."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error in GPT processing: {str(e)}"

# Gradio interface function
def process_images(files):
    ocr_results_list = []
    processed_results_list = []
    download_links = []
    
    for idx, file in enumerate(files):
        try:
            # Read the uploaded binary file
            image = Image.open(io.BytesIO(file))
            image_path = f"temp_image_{idx}.png"
            image.save(image_path)
            
            # Upload image to imgbb and get URL
            image_url = upload_image_to_imgbb(file)
            if image_url.startswith("Error"):
                ocr_results_list.append(image_url)
                processed_results_list.append("Failed to process due to image upload error")
                continue
            
            # Get OCR results
            ocr_results = get_ocr_results(image_url)
            if ocr_results.startswith("Error"):
                ocr_results_list.append(ocr_results)
                processed_results_list.append("Failed to process due to OCR error")
                continue
            
            # Process with GPT
            processed_results = process_ocr_with_gpt(ocr_results)
            
            # Save processed results to a file for download
            result_file_path = f"processed_result_{idx}.txt"
            with open(result_file_path, 'w') as result_file:
                result_file.write(processed_results)
            
            download_links.append(result_file_path)
            
            # Clean up temporary file
            os.remove(image_path)
            
            ocr_results_list.append(ocr_results)
            processed_results_list.append(processed_results)
        except Exception as e:
            ocr_results_list.append(f"Error in image processing: {str(e)}")
            processed_results_list.append("Failed to process due to an error")

    return ocr_results_list, processed_results_list, download_links

# Create Gradio interface
iface = gr.Interface(
    fn=process_images,
    inputs=gr.Files(label="Upload Images", file_count="multiple", type="binary"),
    outputs=[
        gr.Textbox(label="OCR Results"),
        gr.Textbox(label="Processed Results"),
        gr.File(label="Download Processed Results")
    ],
    title="OCR and Text Processing App",
    description="Upload images to extract text and process it. Download the processed results."
)

# Launch the app
iface.launch()