import cv2 import base64 import numpy as np def encode_image_to_base64(image: np.ndarray) -> str: """ Encodes a given image represented as a NumPy array to a base64-encoded string. Parameters: image (np.ndarray): A NumPy array representing the image to be encoded. Returns: str: A base64-encoded string representing the input image in JPEG format. Raises: ValueError: If the image cannot be encoded to JPEG format. """ success, buffer = cv2.imencode('.jpg', image) if not success: raise ValueError("Could not encode image to JPEG format.") encoded_image = base64.b64encode(buffer).decode('utf-8') return encoded_image def compose_payload(image: np.ndarray, prompt: str) -> dict: """ Composes a payload dictionary with a base64 encoded image and a text prompt for the GPT-4 Vision model. Args: image (np.ndarray): The image in the form of a NumPy array to encode and send. prompt (str): The prompt text to accompany the image in the payload. Returns: dict: A dictionary structured as a payload for the GPT-4 Vision model, including the model name, an array of messages each containing a role and content with text and the base64 encoded image, and the maximum number of tokens to generate. """ base64_image = encode_image_to_base64(image) return { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": prompt }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 300 }