|
import gradio as gr |
|
import http.client |
|
import json |
|
import requests |
|
import base64 |
|
import os |
|
from dotenv import load_dotenv |
|
from openai import OpenAI |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
def upload_image_to_imgbb(image_path): |
|
try: |
|
with open(image_path, "rb") as file: |
|
url = "https://api.imgbb.com/1/upload" |
|
payload = { |
|
"key": os.getenv("IMGBB_API_KEY"), |
|
"image": base64.b64encode(file.read()).decode() |
|
} |
|
res = requests.post(url, data=payload) |
|
res.raise_for_status() |
|
return res.json()['data']['url'] |
|
except Exception as e: |
|
return f"Error uploading image: {str(e)}" |
|
|
|
|
|
def get_ocr_results(image_url): |
|
try: |
|
conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com") |
|
payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n" |
|
headers = { |
|
'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"), |
|
'x-rapidapi-host': "ocr43.p.rapidapi.com", |
|
'Content-Type': "multipart/form-data; boundary=---011000010111000001101001" |
|
} |
|
conn.request("POST", "/v1/results", payload, headers) |
|
res = conn.getresponse() |
|
data = res.read() |
|
return data.decode("utf-8") |
|
except Exception as e: |
|
return f"Error in OCR processing: {str(e)}" |
|
|
|
|
|
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
def process_ocr_with_gpt(ocr_results): |
|
try: |
|
prompt = f""" |
|
You are an AI assistant tasked with processing OCR results and providing a clean, formatted output. |
|
Here are the OCR results: |
|
|
|
{ocr_results} |
|
|
|
Please process this information and provide a clean, well-formatted output. If there are any obvious errors in the OCR, please correct them. Format the output in a clear and readable manner. |
|
""" |
|
|
|
response = openai_client.chat.completions.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant that processes OCR results."}, |
|
{"role": "user", "content": prompt} |
|
] |
|
) |
|
|
|
return response.choices[0].message.content |
|
except Exception as e: |
|
return f"Error in GPT processing: {str(e)}" |
|
|
|
|
|
def process_image(image): |
|
try: |
|
|
|
image_path = "temp_image.png" |
|
image.save(image_path) |
|
|
|
|
|
image_url = upload_image_to_imgbb(image_path) |
|
if image_url.startswith("Error"): |
|
return image_url, "Failed to process due to image upload error" |
|
|
|
|
|
ocr_results = get_ocr_results(image_url) |
|
if ocr_results.startswith("Error"): |
|
return ocr_results, "Failed to process due to OCR error" |
|
|
|
|
|
processed_results = process_ocr_with_gpt(ocr_results) |
|
|
|
|
|
os.remove(image_path) |
|
|
|
return ocr_results, processed_results |
|
except Exception as e: |
|
return f"Error in image processing: {str(e)}", "Failed to process due to an error" |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_image, |
|
inputs=gr.Image(type="pil"), |
|
outputs=[ |
|
gr.Textbox(label="OCR Results"), |
|
gr.Textbox(label="Processed Results") |
|
], |
|
title="OCR and Text Processing App", |
|
description="Upload an image to extract text and process it." |
|
) |
|
|
|
|
|
iface.launch() |
|
|