Spaces:
Running
on
T4
Running
on
T4
File size: 8,560 Bytes
d2beadd 6b8dbdd d2beadd 6b8dbdd d2beadd 6b8dbdd d2beadd 6b8dbdd d2beadd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
from opentools.tools.base import BaseTool
from opentools.engine.openai import ChatOpenAI
class Generalist_Solution_Generator_Tool(BaseTool):
require_llm_engine = True
require_api_key = True
def __init__(self, model_string="gpt-4o-mini", api_key=None):
super().__init__(
tool_name="Generalist_Solution_Generator_Tool",
tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
tool_version="1.0.0",
input_types={
"prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
"image": "str - The path to the image file if applicable (default: None).",
},
output_type="str - The generated response to the original query prompt",
demo_commands=[
{
"command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
"description": "Generate a short summary given the prompt from the user."
},
{
"command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
"description": "Generate a caption focusing on the mood using a specific prompt and image."
},
{
"command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
"description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
},
{
"command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
"description": "Answer a question step by step given the image."
}
],
# # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
# user_metadata = {
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
# }
# vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
user_metadata = {
"limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
"best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
"1) Provide clear, specific prompts.\n"
"2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
"3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
"4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
"5) Verify important information from its responses.\n"
"6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
}
# # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
# user_metadata = {
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
# "1) Provide clear, specific prompts.\n"
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
# "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
# "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
# "5) Verify important information from its responses.\n"
# "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
# }
# # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
# user_metadata = {
# "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
# "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
# "1) Provide clear, specific prompts.\n"
# "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
# "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
# "4) Verify important information from its responses.\n"
# "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
# }
)
self.model_string = model_string
self.api_key = api_key
def execute(self, prompt, image=None):
print(f"\nInitializing Generalist Tool with model: {self.model_string}")
multimodal = True if image else False
llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)
try:
input_data = [prompt]
if multimodal:
if not os.path.isfile(image):
return "Error: Invalid image file path."
try:
with open(image, 'rb') as file:
image_bytes = file.read()
input_data.append(image_bytes)
except Exception as e:
return f"Error reading image file: {str(e)}"
response = llm_engine(input_data)
else:
response = llm_engine(input_data[0])
return response
except Exception as e:
return f"Error generating response: {str(e)}"
def get_metadata(self):
metadata = super().get_metadata()
return metadata
if __name__ == "__main__":
# Test command:
"""
Run the following commands in the terminal to test the script:
cd opentools
python tools/default/tool.py
"""
# Get the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))
print(f"Script directory: {script_dir}")
# Example usage of the Generalist_Tool
tool = Generalist_Solution_Generator_Tool()
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
# tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")
# Get tool metadata
metadata = tool.get_metadata()
print(metadata)
# Construct the full path to the image using the script's directory
relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
relative_image_path = "examples/mathvista_113.png"
image_path = os.path.join(script_dir, relative_image_path)
prompt = "Describe the image in detail."
# Execute the tool with default prompt
try:
execution = tool.execute(prompt=prompt, image=image_path)
# execution = tool.execute(prompt=prompt)
print("Generated Response:")
print(execution)
except Exception as e:
print(f"Execution failed: {e}")
print("Done!") |