import gradio as gr from openai import OpenAI import os from PIL import Image import base64 import io # OpenAI 클라이언트를 API 키로 초기화 api_key = os.getenv("OPENAI_API_KEY") if api_key is None: raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.") client = openai.OpenAI(api_key=api_key) def image_to_base64(image): buffered = io.BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode() return img_str def extract_and_summarize(image): # Convert image to base64 image_base64 = image_to_base64(image) # Prepare the prompt for GPT-4 prompt = [ { "role": "system", "content": "You are a helpful assistant. Summarize the text content of the document image provided." }, { "role": "user", "content": [ {"type": "text", "text": "Here is an image of a document. Please summarize its content."}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}} ] } ] # Call GPT-4 API for summarization response = client.chat.completions.create( model="gpt-4o", messages=prompt, temperature=0.0, max_tokens=300, ) # Extract summary from GPT-4 response summary = response.choices[0].message.content return summary # Define Gradio interface iface = gr.Interface( fn=extract_and_summarize, inputs=gr.Image(type="pil", label="Upload Document Image"), outputs=gr.Textbox(label="Summarized Text"), title="Document Summarizer", description="Upload an image of a document and get a summarized text." ) # Launch the interface iface.launch()