|
import cv2 |
|
import base64 |
|
import requests |
|
|
|
import numpy as np |
|
|
|
|
|
META_PROMPT = ''' |
|
- For any marks mentioned in your answer, please highlight them with []. |
|
''' |
|
API_URL = "https://api.openai.com/v1/chat/completions" |
|
|
|
|
|
def encode_image_to_base64(image: np.ndarray) -> str: |
|
""" |
|
Encodes an image into a base64-encoded string in JPEG format. |
|
|
|
Parameters: |
|
image (np.ndarray): The image to be encoded. This should be a numpy array as |
|
typically used in OpenCV. |
|
|
|
Returns: |
|
str: A base64-encoded string representing the image in JPEG format. |
|
""" |
|
success, buffer = cv2.imencode('.jpg', image) |
|
if not success: |
|
raise ValueError("Could not encode image to JPEG format.") |
|
|
|
encoded_image = base64.b64encode(buffer).decode('utf-8') |
|
return encoded_image |
|
|
|
|
|
def compose_headers(api_key: str) -> dict: |
|
return { |
|
"Content-Type": "application/json", |
|
"Authorization": f"Bearer {api_key}" |
|
} |
|
|
|
|
|
def compose_payload(image: np.ndarray, prompt: str) -> dict: |
|
base64_image = encode_image_to_base64(image) |
|
return { |
|
"model": "gpt-4-vision-preview", |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"role": "system", |
|
"content": [ |
|
META_PROMPT |
|
] |
|
}, |
|
{ |
|
"type": "text", |
|
"text": prompt |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
} |
|
} |
|
] |
|
} |
|
], |
|
"max_tokens": 800 |
|
} |
|
|
|
|
|
def prompt_image(api_key: str, image: np.ndarray, prompt: str) -> str: |
|
headers = compose_headers(api_key=api_key) |
|
payload = compose_payload(image=image, prompt=prompt) |
|
response = requests.post(url=API_URL, headers=headers, json=payload).json() |
|
|
|
if 'error' in response: |
|
raise ValueError(response['error']['message']) |
|
return response['choices'][0]['message']['content'] |
|
|