Spaces:

isayahc
/

maker-space

Runtime error

App Files Files Community

maker-space / vision_model.py

isayahc

setting up the generation and processing of images and 3d assets

c042949 12 months ago

raw

history blame

2.65 kB

	import base64
	from openai import OpenAI
	from typing import List, Dict, Any
	from dotenv import load_dotenv
	import os

	load_dotenv()

	# source
	# https://platform.openai.com/docs/guides/vision?lang=python
	def analyze_images(
	images: List[str],
	prompt: str,
	# api_key: str,
	model: str = "gpt-4-vision-preview",
	max_tokens: int = 300
	) -> Dict[str, Any]:
	"""
	Analyze multiple images using OpenAI's vision model.

	Args:
	images (List[str]): List of URLs and/or local paths to the image files.
	prompt (str): Prompt message for the AI model.
	api_key (str): Your OpenAI API key.
	model (str, optional): Name of the vision model to use. Defaults to "gpt-4-vision-preview".
	max_tokens (int, optional): Maximum number of tokens for the response. Defaults to 300.

	Returns:
	dict: JSON response from the API.
	"""
	client = OpenAI()
	messages = [{
	"role": "user",
	"content": [{"type": "text", "text": prompt}]
	}]

	for image in images:
	if image.startswith("http://") or image.startswith("https://"):
	# Image is a URL
	messages.append({
	"role": "user",
	"content": [{"type": "image_url", "image_url": {"url": image}}]
	})
	else:
	# Image is a local path
	with open(image, "rb") as image_file:
	base64_image = base64.b64encode(image_file.read()).decode('utf-8')
	messages.append({
	"role": "user",
	"content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]
	})

	response = client.chat.completions.create(
	model=model,
	messages=messages,
	max_tokens=max_tokens
	)
	return response.choices[0]

	def main():
	api_key = os.getenv("OPENAI_API_KEY")
	images = [
	"/workspaces/Maker-Tech-Tree/mesh_1.png",
	"/workspaces/Maker-Tech-Tree/mesh_2.png",
	"/workspaces/Maker-Tech-Tree/mesh_3.png",
	]
	prompt = "I am creating an 3d model of a Glass lenses for refracting light,\
	using a text-to-3d model\
	Do these images look correct?\
	If not please make a suggesttion on how to improve the text input\
	As this response will be used in a pipeline please only output a new \
	potential prompt or output nothing, \
	Please keep the prompt to 5 25 words to not confuse the model"

	response = analyze_images(
	images,
	prompt,
	# api_key,
	)

	print(response)

	if __name__ == "__main__":
	main()