Spaces:
Running
on
T4
Running
on
T4
import os | |
from opentools.tools.base import BaseTool | |
from opentools.engine.openai import ChatOpenAI | |
class Generalist_Solution_Generator_Tool(BaseTool): | |
require_llm_engine = True | |
require_api_key = True | |
def __init__(self, model_string="gpt-4o-mini", api_key=None): | |
super().__init__( | |
tool_name="Generalist_Solution_Generator_Tool", | |
tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.", | |
tool_version="1.0.0", | |
input_types={ | |
"prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').", | |
"image": "str - The path to the image file if applicable (default: None).", | |
}, | |
output_type="str - The generated response to the original query prompt", | |
demo_commands=[ | |
{ | |
"command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")', | |
"description": "Generate a short summary given the prompt from the user." | |
}, | |
{ | |
"command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")', | |
"description": "Generate a caption focusing on the mood using a specific prompt and image." | |
}, | |
{ | |
"command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")', | |
"description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)." | |
}, | |
{ | |
"command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")', | |
"description": "Answer a question step by step given the image." | |
} | |
], | |
# # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %) | |
# user_metadata = { | |
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.", | |
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content." | |
# } | |
# vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%) | |
user_metadata = { | |
"limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.", | |
"best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n" | |
"1) Provide clear, specific prompts.\n" | |
"2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n" | |
"3) For complex queries, break them down into subtasks and use the tool multiple times.\n" | |
"4) Use it as a starting point for complex tasks, then refine with specialized tools.\n" | |
"5) Verify important information from its responses.\n" | |
"6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content." | |
} | |
# # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%) | |
# user_metadata = { | |
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.", | |
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n" | |
# "1) Provide clear, specific prompts.\n" | |
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n" | |
# "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n" | |
# "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n" | |
# "5) Verify important information from its responses.\n" | |
# "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content." | |
# } | |
# # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%) | |
# user_metadata = { | |
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.", | |
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n" | |
# "1) Provide clear, specific prompts.\n" | |
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n" | |
# "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n" | |
# "4) Verify important information from its responses.\n" | |
# "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content." | |
# } | |
) | |
self.model_string = model_string | |
self.api_key = api_key | |
def execute(self, prompt, image=None): | |
print(f"\nInitializing Generalist Tool with model: {self.model_string}") | |
multimodal = True if image else False | |
llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key) | |
try: | |
input_data = [prompt] | |
if multimodal: | |
if not os.path.isfile(image): | |
return "Error: Invalid image file path." | |
try: | |
with open(image, 'rb') as file: | |
image_bytes = file.read() | |
input_data.append(image_bytes) | |
except Exception as e: | |
return f"Error reading image file: {str(e)}" | |
response = llm_engine(input_data) | |
else: | |
response = llm_engine(input_data[0]) | |
return response | |
except Exception as e: | |
return f"Error generating response: {str(e)}" | |
def get_metadata(self): | |
metadata = super().get_metadata() | |
return metadata | |
if __name__ == "__main__": | |
# Test command: | |
""" | |
Run the following commands in the terminal to test the script: | |
cd opentools | |
python tools/default/tool.py | |
""" | |
# Get the directory of the current script | |
script_dir = os.path.dirname(os.path.abspath(__file__)) | |
print(f"Script directory: {script_dir}") | |
# Example usage of the Generalist_Tool | |
tool = Generalist_Solution_Generator_Tool() | |
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini") | |
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o") | |
# Get tool metadata | |
metadata = tool.get_metadata() | |
print(metadata) | |
# Construct the full path to the image using the script's directory | |
relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png" | |
relative_image_path = "examples/mathvista_113.png" | |
image_path = os.path.join(script_dir, relative_image_path) | |
prompt = "Describe the image in detail." | |
# Execute the tool with default prompt | |
try: | |
execution = tool.execute(prompt=prompt, image=image_path) | |
# execution = tool.execute(prompt=prompt) | |
print("Generated Response:") | |
print(execution) | |
except Exception as e: | |
print(f"Execution failed: {e}") | |
print("Done!") |