File size: 1,838 Bytes
6f5b327
52e7b1a
4f1688d
5cb55c0
 
 
6f5b327
1f6ea87
2775582
 
 
1f6ea87
 
5cb55c0
 
 
 
 
 
6f5b327
4f1688d
5cb55c0
 
4f1688d
 
5cb55c0
 
 
 
 
 
 
 
 
 
 
 
 
4f1688d
 
109854c
5cb55c0
 
109854c
5cb55c0
4f1688d
 
 
5cb55c0
4f1688d
6f5b327
 
4f1688d
6f5b327
4f1688d
 
 
 
 
6f5b327
 
4f1688d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from openai import OpenAI 
import os
from PIL import Image
import base64
import io

# OpenAI ํด๋ผ์ด์–ธํŠธ๋ฅผ API ํ‚ค๋กœ ์ดˆ๊ธฐํ™”( ๋ ˆ๊ฑฐ์‹œ๊ฐ€ ์˜ค๋ฅ˜๋‚˜์„œ ์ƒˆ๋กœ ์ˆ˜์ •ํ•จ)
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
    raise ValueError("OPENAI_API_KEY ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
client = OpenAI(api_key=api_key)


def image_to_base64(image):
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    img_str = base64.b64encode(buffered.getvalue()).decode()
    return img_str

def extract_and_summarize(image):
    # Convert image to base64
    image_base64 = image_to_base64(image)
    
    # Prepare the prompt for GPT-4
    prompt = [
        {
            "role": "system",
            "content": "You are a helpful assistant. Summarize the text content of the document image provided."
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Here is an image of a document. Please summarize its content."},
                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}
            ]
        }
    ]
    
    # Call GPT-4 API for summarization
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=prompt,
        temperature=0.0,
        max_tokens=300,
    )
    
    # Extract summary from GPT-4 response
    summary = response.choices[0].message.content
    
    return summary

# Define Gradio interface
iface = gr.Interface(
    fn=extract_and_summarize,
    inputs=gr.Image(type="pil", label="Upload Document Image"),
    outputs=gr.Textbox(label="Summarized Text"),
    title="Document Summarizer",
    description="Upload an image of a document and get a summarized text."
)

# Launch the interface
iface.launch()