File size: 4,838 Bytes
288007d 8860032 288007d 8860032 288007d 8860032 288007d 8860032 288007d 8860032 288007d 8860032 288007d 8860032 288007d 8860032 288007d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import http.client
import json
import requests
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
from PIL import Image
import io
# Load environment variables
load_dotenv()
# Function to upload image to imgbb
def upload_image_to_imgbb(image_bytes):
try:
url = "https://api.imgbb.com/1/upload"
payload = {
"key": os.getenv("IMGBB_API_KEY"),
"image": base64.b64encode(image_bytes).decode()
}
res = requests.post(url, data=payload)
res.raise_for_status() # Raises an HTTPError for bad responses
return res.json()['data']['url']
except Exception as e:
return f"Error uploading image: {str(e)}"
# OCR API request
def get_ocr_results(image_url):
try:
conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com")
payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n"
headers = {
'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"),
'x-rapidapi-host': "ocr43.p.rapidapi.com",
'Content-Type': "multipart/form-data; boundary=---011000010111000001101001"
}
conn.request("POST", "/v1/results", payload, headers)
res = conn.getresponse()
data = res.read()
return data.decode("utf-8")
except Exception as e:
return f"Error in OCR processing: {str(e)}"
# OpenAI API configuration
openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# Function to process OCR results with OpenAI
def process_ocr_with_gpt(ocr_results):
try:
prompt = f"""
You are an AI assistant tasked with processing OCR results and providing a clean, formatted output.
Here are the OCR results:
{ocr_results}
Please process this information and provide a clean, well-formatted output. Arrange all elements in order and omit any elements not present in the file.
"""
response = openai_client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant that processes OCR results."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
except Exception as e:
return f"Error in GPT processing: {str(e)}"
# Gradio interface function
def process_images(files):
ocr_results_list = []
processed_results_list = []
download_links = []
for idx, file in enumerate(files):
try:
# Read the uploaded binary file
image = Image.open(io.BytesIO(file))
image_path = f"temp_image_{idx}.png"
image.save(image_path)
# Upload image to imgbb and get URL
image_url = upload_image_to_imgbb(file)
if image_url.startswith("Error"):
ocr_results_list.append(image_url)
processed_results_list.append("Failed to process due to image upload error")
continue
# Get OCR results
ocr_results = get_ocr_results(image_url)
if ocr_results.startswith("Error"):
ocr_results_list.append(ocr_results)
processed_results_list.append("Failed to process due to OCR error")
continue
# Process with GPT
processed_results = process_ocr_with_gpt(ocr_results)
# Save processed results to a file for download
result_file_path = f"processed_result_{idx}.txt"
with open(result_file_path, 'w') as result_file:
result_file.write(processed_results)
download_links.append(result_file_path)
# Clean up temporary file
os.remove(image_path)
ocr_results_list.append(ocr_results)
processed_results_list.append(processed_results)
except Exception as e:
ocr_results_list.append(f"Error in image processing: {str(e)}")
processed_results_list.append("Failed to process due to an error")
return ocr_results_list, processed_results_list, download_links
# Create Gradio interface
iface = gr.Interface(
fn=process_images,
inputs=gr.Files(label="Upload Images", file_count="multiple", type="binary"),
outputs=[
gr.Textbox(label="OCR Results"),
gr.Textbox(label="Processed Results"),
gr.File(label="Download Processed Results")
],
title="OCR and Text Processing App",
description="Upload images to extract text and process it. Download the processed results."
)
# Launch the app
iface.launch()
|