import gradio as gr import http.client import json import requests import base64 import os from dotenv import load_dotenv from openai import OpenAI from PIL import Image import io # Load environment variables load_dotenv() # Function to upload image to imgbb def upload_image_to_imgbb(image_bytes): try: url = "https://api.imgbb.com/1/upload" payload = { "key": os.getenv("IMGBB_API_KEY"), "image": base64.b64encode(image_bytes).decode() } res = requests.post(url, data=payload) res.raise_for_status() # Raises an HTTPError for bad responses return res.json()['data']['url'] except Exception as e: return f"Error uploading image: {str(e)}" # OCR API request def get_ocr_results(image_url): try: conn = http.client.HTTPSConnection("ocr43.p.rapidapi.com") payload = f"-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"url\"\r\n\r\n{image_url}\r\n-----011000010111000001101001--\r\n\r\n" headers = { 'x-rapidapi-key': os.getenv("RAPIDAPI_KEY"), 'x-rapidapi-host': "ocr43.p.rapidapi.com", 'Content-Type': "multipart/form-data; boundary=---011000010111000001101001" } conn.request("POST", "/v1/results", payload, headers) res = conn.getresponse() data = res.read() return data.decode("utf-8") except Exception as e: return f"Error in OCR processing: {str(e)}" # OpenAI API configuration openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Function to process OCR results with OpenAI def process_ocr_with_gpt(ocr_results): try: prompt = f""" You are an AI assistant tasked with processing OCR results and providing a clean, formatted output. Here are the OCR results: {ocr_results} Please process this information and provide a clean, well-formatted output. Arrange all elements in order and omit any elements not present in the file. """ response = openai_client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "You are a helpful assistant that processes OCR results."}, {"role": "user", "content": prompt} ] ) return response.choices[0].message.content except Exception as e: return f"Error in GPT processing: {str(e)}" # Gradio interface function def process_images(files): ocr_results_list = [] processed_results_list = [] download_links = [] for idx, file in enumerate(files): try: # Read the uploaded binary file image = Image.open(io.BytesIO(file)) image_path = f"temp_image_{idx}.png" image.save(image_path) # Upload image to imgbb and get URL image_url = upload_image_to_imgbb(file) if image_url.startswith("Error"): ocr_results_list.append(image_url) processed_results_list.append("Failed to process due to image upload error") continue # Get OCR results ocr_results = get_ocr_results(image_url) if ocr_results.startswith("Error"): ocr_results_list.append(ocr_results) processed_results_list.append("Failed to process due to OCR error") continue # Process with GPT processed_results = process_ocr_with_gpt(ocr_results) # Save processed results to a file for download result_file_path = f"processed_result_{idx}.txt" with open(result_file_path, 'w') as result_file: result_file.write(processed_results) download_links.append(result_file_path) # Clean up temporary file os.remove(image_path) ocr_results_list.append(ocr_results) processed_results_list.append(processed_results) except Exception as e: ocr_results_list.append(f"Error in image processing: {str(e)}") processed_results_list.append("Failed to process due to an error") return ocr_results_list, processed_results_list, download_links # Create Gradio interface iface = gr.Interface( fn=process_images, inputs=gr.Files(label="Upload Images", file_count="multiple", type="binary"), outputs=[ gr.Textbox(label="OCR Results"), gr.Textbox(label="Processed Results"), gr.File(label="Download Processed Results") ], title="OCR and Text Processing App", description="Upload images to extract text and process it. Download the processed results." ) # Launch the app iface.launch()