File size: 8,560 Bytes
d2beadd
 
 
 
 
 
6b8dbdd
d2beadd
6b8dbdd
d2beadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b8dbdd
d2beadd
 
 
 
 
6b8dbdd
d2beadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
from opentools.tools.base import BaseTool
from opentools.engine.openai import ChatOpenAI

class Generalist_Solution_Generator_Tool(BaseTool):
    require_llm_engine = True
    require_api_key = True

    def __init__(self, model_string="gpt-4o-mini", api_key=None):
        super().__init__(
            tool_name="Generalist_Solution_Generator_Tool",
            tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
            tool_version="1.0.0",
            input_types={
                "prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
                "image": "str - The path to the image file if applicable (default: None).",
            },
            output_type="str - The generated response to the original query prompt",
            demo_commands=[
                {
                    "command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
                    "description": "Generate a short summary given the prompt from the user."
                },
                {
                    "command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
                    "description": "Generate a caption focusing on the mood using a specific prompt and image."
                },
                {
                    "command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
                    "description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
                },
                {
                    "command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
                    "description": "Answer a question step by step given the image."
                }
            ],
            # # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
            # user_metadata = {
            #     "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
            #     "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            # }
            # vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
            user_metadata = {
                "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
                "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
                "1) Provide clear, specific prompts.\n"
                "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
                "3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
                "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
                "5) Verify important information from its responses.\n"
                "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            }
            # # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
            # user_metadata = {
            #     "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
            #     "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
            #     "1) Provide clear, specific prompts.\n"
            #     "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
            #     "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
            #     "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
            #     "5) Verify important information from its responses.\n"
            #     "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            # }
            # # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
            # user_metadata = {
            #     "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
            #     "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
            #     "1) Provide clear, specific prompts.\n"
            #     "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
            #     "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
            #     "4) Verify important information from its responses.\n"
            #     "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            # }
        )
        self.model_string = model_string  
        self.api_key = api_key

    def execute(self, prompt, image=None):

        print(f"\nInitializing Generalist Tool with model: {self.model_string}")
        multimodal = True if image else False
        llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)

        try:
            input_data = [prompt]
            if multimodal:
                if not os.path.isfile(image):
                    return "Error: Invalid image file path."
                try:
                    with open(image, 'rb') as file:
                        image_bytes = file.read()
                    input_data.append(image_bytes)
                except Exception as e:
                    return f"Error reading image file: {str(e)}"

                response = llm_engine(input_data)
            else:
                response = llm_engine(input_data[0])
            return response
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def get_metadata(self):
        metadata = super().get_metadata()
        return metadata

if __name__ == "__main__":
    # Test command:
    """
    Run the following commands in the terminal to test the script:
    
    cd opentools
    python tools/default/tool.py
    """

    # Get the directory of the current script
    script_dir = os.path.dirname(os.path.abspath(__file__))
    print(f"Script directory: {script_dir}")

    # Example usage of the Generalist_Tool
    tool = Generalist_Solution_Generator_Tool()
    # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
    # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")

    # Get tool metadata
    metadata = tool.get_metadata()
    print(metadata)

    # Construct the full path to the image using the script's directory
    relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
    relative_image_path = "examples/mathvista_113.png"
    image_path = os.path.join(script_dir, relative_image_path)
    prompt = "Describe the image in detail."

    # Execute the tool with default prompt
    try:
        execution = tool.execute(prompt=prompt, image=image_path)
        # execution = tool.execute(prompt=prompt)
        print("Generated Response:")
        print(execution)
    except Exception as e: 
        print(f"Execution failed: {e}")

    print("Done!")