Spaces:
Runtime error
Runtime error
import base64 | |
from openai import OpenAI | |
from typing import List, Dict, Any | |
from dotenv import load_dotenv | |
import os | |
load_dotenv() | |
# source | |
# https://platform.openai.com/docs/guides/vision?lang=python | |
def analyze_images( | |
images: List[str], | |
prompt: str, | |
# api_key: str, | |
model: str = "gpt-4-vision-preview", | |
max_tokens: int = 300 | |
) -> Dict[str, Any]: | |
""" | |
Analyze multiple images using OpenAI's vision model. | |
Args: | |
images (List[str]): List of URLs and/or local paths to the image files. | |
prompt (str): Prompt message for the AI model. | |
api_key (str): Your OpenAI API key. | |
model (str, optional): Name of the vision model to use. Defaults to "gpt-4-vision-preview". | |
max_tokens (int, optional): Maximum number of tokens for the response. Defaults to 300. | |
Returns: | |
dict: JSON response from the API. | |
""" | |
client = OpenAI() | |
messages = [{ | |
"role": "user", | |
"content": [{"type": "text", "text": prompt}] | |
}] | |
for image in images: | |
if image.startswith("http://") or image.startswith("https://"): | |
# Image is a URL | |
messages.append({ | |
"role": "user", | |
"content": [{"type": "image_url", "image_url": {"url": image}}] | |
}) | |
else: | |
# Image is a local path | |
with open(image, "rb") as image_file: | |
base64_image = base64.b64encode(image_file.read()).decode('utf-8') | |
messages.append({ | |
"role": "user", | |
"content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}] | |
}) | |
response = client.chat.completions.create( | |
model=model, | |
messages=messages, | |
max_tokens=max_tokens | |
) | |
return response.choices[0] | |
def main(): | |
api_key = os.getenv("OPENAI_API_KEY") | |
images = [ | |
"/workspaces/Maker-Tech-Tree/mesh_1.png", | |
"/workspaces/Maker-Tech-Tree/mesh_2.png", | |
"/workspaces/Maker-Tech-Tree/mesh_3.png", | |
] | |
prompt = "I am creating an 3d model of a Glass lenses for refracting light,\ | |
using a text-to-3d model\ | |
Do these images look correct?\ | |
If not please make a suggesttion on how to improve the text input\ | |
As this response will be used in a pipeline please only output a new \ | |
potential prompt or output nothing, \ | |
Please keep the prompt to 5 25 words to not confuse the model" | |
response = analyze_images( | |
images, | |
prompt, | |
# api_key, | |
) | |
print(response) | |
if __name__ == "__main__": | |
main() | |