import gradio as gr import requests import json import base64 API_KEY = "your_api_key" # Replace with your actual API key def encode_image_to_base64(image): encoded_image = base64.b64encode(image).decode("utf-8") return encoded_image def extract_text_from_image(encoded_image): url = "https://api.openai.com/v1/images/generations" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {API_KEY}", } data = { "prompt": "Extract the text from the document image.", "image": encoded_image, "n": 1, "size": "1024x1024", } response = requests.post(url, headers=headers, data=json.dumps(data)) extracted_text = response.json()["data"][0]["text"] return extracted_text def summarize_text(text): url = "https://api.openai.com/v1/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {API_KEY}", } data = { "prompt": f"Summarize the following text:\n\n{text}\n\nSummary:", "max_tokens": 100, "n": 1, "stop": None, "temperature": 0.7, } response = requests.post(url, headers=headers, data=json.dumps(data)) summary = response.json()["choices"][0]["text"] return summary def document_summarizer(image): encoded_image = encode_image_to_base64(image) extracted_text = extract_text_from_image(encoded_image) summary = summarize_text(extracted_text) return summary iface = gr.Interface( fn=document_summarizer, inputs=gr.inputs.Image(type="file", label="Upload Document Image"), outputs="text", title="Document Image Summarizer", description="Upload an image of a document and get a summary of its content.", ) iface.launch()