File size: 3,848 Bytes
288007d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
import http.client
import json
import requests
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables
load_dotenv()

# Function to upload image to imgbb
def upload_image_to_imgbb(image_path):
    try:
        with open(image_path, "rb") as file:
            url = "https://api.imgbb.com/1/upload"
            payload = {
                "key": os.getenv("IMGBB_API_KEY"),
                "image": base64.b64encode(file.read()).decode()
            }
            res = requests.post(url, data=payload)
            res.raise_for_status()  # Raises an HTTPError for bad responses
            return res.json()['data']['url']
    except Exception as e:
        return f"Error uploading image: {str(e)}"

# OCR API request
def get_ocr_results(image_url):
    try:
        conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
        payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
        headers = {
            'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
            'x-rapidapi-host': "ocr43.p.rapidapi.com",
            'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
        }
        conn.request("POST", "/v1/results", payload, headers)
        res = conn.getresponse()
        data = res.read()
        return data.decode("utf-8")
    except Exception as e:
        return f"Error in OCR processing: {str(e)}"

# OpenAI API configuration
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Function to process OCR results with OpenAI
def process_ocr_with_gpt(ocr_results):
    try:
        prompt = f"""
        You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
        Here are the OCR results:

        {ocr_results}

        Please process this information and provide a clean, well-formatted output. If there are any obvious errors in the OCR, please correct them. Format the output in a clear and readable manner.
        """

        response = openai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that processes OCR results."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error in GPT processing: {str(e)}"

# Gradio interface function
def process_image(image):
    try:
        # Save the image temporarily and get its path
        image_path = "temp_image.png"
        image.save(image_path)
        
        # Upload image to imgbb and get URL
        image_url = upload_image_to_imgbb(image_path)
        if image_url.startswith("Error"):
            return image_url, "Failed to process due to image upload error"
        
        # Get OCR results
        ocr_results = get_ocr_results(image_url)
        if ocr_results.startswith("Error"):
            return ocr_results, "Failed to process due to OCR error"
        
        # Process with GPT
        processed_results = process_ocr_with_gpt(ocr_results)
        
        # Clean up temporary file
        os.remove(image_path)
        
        return ocr_results, processed_results
    except Exception as e:
        return f"Error in image processing: {str(e)}", "Failed to process due to an error"

# Create Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil"),
    outputs=[
        gr.Textbox(label="OCR Results"),
        gr.Textbox(label="Processed Results")
    ],
    title="OCR and Text Processing App",
    description="Upload an image to extract text and process it."
)

# Launch the app
iface.launch()