Spaces:

Bilic
/

visionB

Sleeping

Simba

WIP

2874322 about 1 year ago

1.97 kB

	import cv2
	import base64
	import numpy as np


	def encode_image_to_base64(image: np.ndarray) -> str:
	"""
	Encodes a given image represented as a NumPy array to a base64-encoded string.

	Parameters:
	image (np.ndarray): A NumPy array representing the image to be encoded.

	Returns:
	str: A base64-encoded string representing the input image in JPEG format.

	Raises:
	ValueError: If the image cannot be encoded to JPEG format.
	"""

	success, buffer = cv2.imencode('.jpg', image)
	if not success:
	raise ValueError("Could not encode image to JPEG format.")

	encoded_image = base64.b64encode(buffer).decode('utf-8')
	return encoded_image


	def compose_payload(image: np.ndarray, prompt: str) -> dict:
	"""
	Composes a payload dictionary with a base64 encoded image and a text prompt for the GPT-4 Vision model.

	Args:
	image (np.ndarray): The image in the form of a NumPy array to encode and send.
	prompt (str): The prompt text to accompany the image in the payload.

	Returns:
	dict: A dictionary structured as a payload for the GPT-4 Vision model, including the model name,
	an array of messages each containing a role and content with text and the base64 encoded image,
	and the maximum number of tokens to generate.
	"""
	base64_image = encode_image_to_base64(image)
	return {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}