File size: 971 Bytes
5c80958 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
"""VLM Helper Functions."""
import base64
import numpy as np
from openai import OpenAI
class GPT4V:
"""GPT4V VLM."""
def __init__(self, openai_api_key):
self.client = OpenAI(api_key=openai_api_key)
def query(self, prompt_seq, temperature=0, max_tokens=512):
"""Queries GPT-4V."""
content = []
for elem in prompt_seq:
if isinstance(elem, str):
content.append({'type': 'text', 'text': elem})
elif isinstance(elem, np.ndarray):
base64_image_str = base64.b64encode(elem).decode('utf-8')
image_url = f'data:image/jpeg;base64,{base64_image_str}'
content.append({'type': 'image_url', 'image_url': {'url': image_url}})
messages = [{'role': 'user', 'content': content}]
response = self.client.chat.completions.create(
model='gpt-4-vision-preview',
messages=messages,
temperature=temperature,
max_tokens=max_tokens
)
return response.choices[0].message.content
|