File size: 4,838 Bytes
288007d
 
 
 
 
 
 
 
8860032
 
288007d
 
 
 
 
8860032
288007d
8860032
 
 
 
 
 
 
 
288007d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8860032
288007d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8860032
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288007d
 
 
8860032
 
288007d
 
8860032
 
288007d
 
8860032
288007d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import http.client
import json
import requests
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
import io

# Load environment variables
load_dotenv()

# Function to upload image to imgbb
def upload_image_to_imgbb(image_bytes):
    try:
        url = "https://api.imgbb.com/1/upload"
        payload = {
            "key": os.getenv("IMGBB_API_KEY"),
            "image": base64.b64encode(image_bytes).decode()
        }
        res = requests.post(url, data=payload)
        res.raise_for_status()  # Raises an HTTPError for bad responses
        return res.json()['data']['url']
    except Exception as e:
        return f"Error uploading image: {str(e)}"

# OCR API request
def get_ocr_results(image_url):
    try:
        conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
        payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
        headers = {
            'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
            'x-rapidapi-host': "ocr43.p.rapidapi.com",
            'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
        }
        conn.request("POST", "/v1/results", payload, headers)
        res = conn.getresponse()
        data = res.read()
        return data.decode("utf-8")
    except Exception as e:
        return f"Error in OCR processing: {str(e)}"

# OpenAI API configuration
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Function to process OCR results with OpenAI
def process_ocr_with_gpt(ocr_results):
    try:
        prompt = f"""
        You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
        Here are the OCR results:

        {ocr_results}

        Please process this information and provide a clean, well-formatted output. Arrange all elements in order and omit any elements not present in the file.
        """

        response = openai_client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant that processes OCR results."},
                {"role": "user", "content": prompt}
            ]
        )

        return response.choices[0].message.content
    except Exception as e:
        return f"Error in GPT processing: {str(e)}"

# Gradio interface function
def process_images(files):
    ocr_results_list = []
    processed_results_list = []
    download_links = []
    
    for idx, file in enumerate(files):
        try:
            # Read the uploaded binary file
            image = Image.open(io.BytesIO(file))
            image_path = f"temp_image_{idx}.png"
            image.save(image_path)
            
            # Upload image to imgbb and get URL
            image_url = upload_image_to_imgbb(file)
            if image_url.startswith("Error"):
                ocr_results_list.append(image_url)
                processed_results_list.append("Failed to process due to image upload error")
                continue
            
            # Get OCR results
            ocr_results = get_ocr_results(image_url)
            if ocr_results.startswith("Error"):
                ocr_results_list.append(ocr_results)
                processed_results_list.append("Failed to process due to OCR error")
                continue
            
            # Process with GPT
            processed_results = process_ocr_with_gpt(ocr_results)
            
            # Save processed results to a file for download
            result_file_path = f"processed_result_{idx}.txt"
            with open(result_file_path, 'w') as result_file:
                result_file.write(processed_results)
            
            download_links.append(result_file_path)
            
            # Clean up temporary file
            os.remove(image_path)
            
            ocr_results_list.append(ocr_results)
            processed_results_list.append(processed_results)
        except Exception as e:
            ocr_results_list.append(f"Error in image processing: {str(e)}")
            processed_results_list.append("Failed to process due to an error")

    return ocr_results_list, processed_results_list, download_links

# Create Gradio interface
iface = gr.Interface(
    fn=process_images,
    inputs=gr.Files(label="Upload Images", file_count="multiple", type="binary"),
    outputs=[
        gr.Textbox(label="OCR Results"),
        gr.Textbox(label="Processed Results"),
        gr.File(label="Download Processed Results")
    ],
    title="OCR and Text Processing App",
    description="Upload images to extract text and process it. Download the processed results."
)

# Launch the app
iface.launch()