bowenchen118's picture
Change to user-provided API keys
6b8dbdd
raw
history blame
8.56 kB
import os
from opentools.tools.base import BaseTool
from opentools.engine.openai import ChatOpenAI
class Generalist_Solution_Generator_Tool(BaseTool):
require_llm_engine = True
require_api_key = True
def __init__(self, model_string="gpt-4o-mini", api_key=None):
super().__init__(
tool_name="Generalist_Solution_Generator_Tool",
tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
tool_version="1.0.0",
input_types={
"prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
"image": "str - The path to the image file if applicable (default: None).",
},
output_type="str - The generated response to the original query prompt",
demo_commands=[
{
"command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
"description": "Generate a short summary given the prompt from the user."
},
{
"command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
"description": "Generate a caption focusing on the mood using a specific prompt and image."
},
{
"command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
"description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
},
{
"command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
"description": "Answer a question step by step given the image."
}
],
# # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
# user_metadata = {
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
# }
# vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
user_metadata = {
"limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
"best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
"1) Provide clear, specific prompts.\n"
"2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
"3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
"4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
"5) Verify important information from its responses.\n"
"6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
}
# # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
# user_metadata = {
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
# "1) Provide clear, specific prompts.\n"
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
# "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
# "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
# "5) Verify important information from its responses.\n"
# "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
# }
# # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
# user_metadata = {
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
# "1) Provide clear, specific prompts.\n"
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
# "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
# "4) Verify important information from its responses.\n"
# "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
# }
)
self.model_string = model_string
self.api_key = api_key
def execute(self, prompt, image=None):
print(f"\nInitializing Generalist Tool with model: {self.model_string}")
multimodal = True if image else False
llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)
try:
input_data = [prompt]
if multimodal:
if not os.path.isfile(image):
return "Error: Invalid image file path."
try:
with open(image, 'rb') as file:
image_bytes = file.read()
input_data.append(image_bytes)
except Exception as e:
return f"Error reading image file: {str(e)}"
response = llm_engine(input_data)
else:
response = llm_engine(input_data[0])
return response
except Exception as e:
return f"Error generating response: {str(e)}"
def get_metadata(self):
metadata = super().get_metadata()
return metadata
if __name__ == "__main__":
# Test command:
"""
Run the following commands in the terminal to test the script:
cd opentools
python tools/default/tool.py
"""
# Get the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))
print(f"Script directory: {script_dir}")
# Example usage of the Generalist_Tool
tool = Generalist_Solution_Generator_Tool()
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")
# Get tool metadata
metadata = tool.get_metadata()
print(metadata)
# Construct the full path to the image using the script's directory
relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
relative_image_path = "examples/mathvista_113.png"
image_path = os.path.join(script_dir, relative_image_path)
prompt = "Describe the image in detail."
# Execute the tool with default prompt
try:
execution = tool.execute(prompt=prompt, image=image_path)
# execution = tool.execute(prompt=prompt)
print("Generated Response:")
print(execution)
except Exception as e:
print(f"Execution failed: {e}")
print("Done!")