Spaces:

OctoTools
/

octotools

Running on T4

File size: 8,560 Bytes

d2beadd
 
 
 
 
 
6b8dbdd
d2beadd
6b8dbdd
d2beadd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b8dbdd
d2beadd
 
 
 
 
6b8dbdd
d2beadd

import os
from opentools.tools.base import BaseTool
from opentools.engine.openai import ChatOpenAI

class Generalist_Solution_Generator_Tool(BaseTool):
    require_llm_engine = True
    require_api_key = True

    def __init__(self, model_string="gpt-4o-mini", api_key=None):
        super().__init__(
            tool_name="Generalist_Solution_Generator_Tool",
            tool_description="A generalized tool that takes query from the user as prompt, and answers the question step by step to the best of its ability. It can also accept an image.",
            tool_version="1.0.0",
            input_types={
                "prompt": "str - The prompt that includes query from the user to guide the agent to generate response (Examples: 'Describe this image in detail').",
                "image": "str - The path to the image file if applicable (default: None).",
            },
            output_type="str - The generated response to the original query prompt",
            demo_commands=[
                {
                    "command": 'execution = tool.execute(prompt="Summarize the following text in a few lines")',
                    "description": "Generate a short summary given the prompt from the user."
                },
                {
                    "command": 'execution = tool.execute(prompt="Explain the mood of this scene.", image="path/to/image1.png")',
                    "description": "Generate a caption focusing on the mood using a specific prompt and image."
                },
                {
                    "command": 'execution = tool.execute(prompt="Give your best coordinate estimate for the pacemaker in the image and return (x1, y1, x2, y2)", image="path/to/image2.png")',
                    "description": "Generate bounding box coordinates given the image and prompt from the user. The format should be (x1, y1, x2, y2)."
                },
                {
                    "command": 'execution = tool.execute(prompt="Is the number of tiny objects that are behind the small metal jet less than the number of tiny things left of the tiny sedan?", image="path/to/image2.png")',
                    "description": "Answer a question step by step given the image."
                }
            ],
            # # vesion 0 (bowen) (Generalist: %; 6 Tools: %; Generalist + 6 Tools: %)
            # user_metadata = {
            #     "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
            #     "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge. For optimal results: 1) Provide clear, specific prompts. 2) Use it as a starting point for complex tasks, then refine with specialized tools. 3) Verify important information from its responses. 4) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            # }
            # vesion 2 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 54%)
            user_metadata = {
                "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
                "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
                "1) Provide clear, specific prompts.\n"
                "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
                "3) For complex queries, break them down into subtasks and use the tool multiple times.\n"
                "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
                "5) Verify important information from its responses.\n"
                "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            }
            # # vesion 6 (Generalist: 70%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
            # user_metadata = {
            #     "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
            #     "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
            #     "1) Provide clear, specific prompts.\n"
            #     "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
            #     "3) For complex queries, break them down into smaller, focused sub-tasks and use the tool multiple times.\n"
            #     "4) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
            #     "5) Verify important information from its responses.\n"
            #     "6) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            # }
            # # vesion 8 (Generalist: 68%; 6 Tools: 66%; Generalist + 6 Tools: 60%)
            # user_metadata = {
            #     "limitation": "The Generalist_Solution_Generator_Tool may provide hallucinated or incorrect responses.",
            #     "best_practice": "Use the Generalist_Solution_Generator_Tool for general queries or tasks that don't require specialized knowledge or specific tools in the toolbox. For optimal results:\n\n"
            #     "1) Provide clear, specific prompts.\n"
            #     "2) Use it to answer the original query through step by step reasoning for tasks without complex or multi-step reasoning.\n"
            #     "3) Use it as a starting point for complex tasks, then refine with specialized tools.\n"
            #     "4) Verify important information from its responses.\n"
            #     "5) For image-related tasks, ensure the image path is correct and the prompt is relevant to the image content."
            # }
        )
        self.model_string = model_string  
        self.api_key = api_key

    def execute(self, prompt, image=None):

        print(f"\nInitializing Generalist Tool with model: {self.model_string}")
        multimodal = True if image else False
        llm_engine = ChatOpenAI(model_string=self.model_string, is_multimodal=multimodal, api_key=self.api_key)

        try:
            input_data = [prompt]
            if multimodal:
                if not os.path.isfile(image):
                    return "Error: Invalid image file path."
                try:
                    with open(image, 'rb') as file:
                        image_bytes = file.read()
                    input_data.append(image_bytes)
                except Exception as e:
                    return f"Error reading image file: {str(e)}"

                response = llm_engine(input_data)
            else:
                response = llm_engine(input_data[0])
            return response
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def get_metadata(self):
        metadata = super().get_metadata()
        return metadata

if __name__ == "__main__":
    # Test command:
    """
    Run the following commands in the terminal to test the script:
    
    cd opentools
    python tools/default/tool.py
    """

    # Get the directory of the current script
    script_dir = os.path.dirname(os.path.abspath(__file__))
    print(f"Script directory: {script_dir}")

    # Example usage of the Generalist_Tool
    tool = Generalist_Solution_Generator_Tool()
    # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o-mini")
    # tool = Generalist_Solution_Generator_Tool(model_string="gpt-4o")

    # Get tool metadata
    metadata = tool.get_metadata()
    print(metadata)

    # Construct the full path to the image using the script's directory
    relative_image_path = "../../tasks/minitoolbench/data/mathvista_113.png"
    relative_image_path = "examples/mathvista_113.png"
    image_path = os.path.join(script_dir, relative_image_path)
    prompt = "Describe the image in detail."

    # Execute the tool with default prompt
    try:
        execution = tool.execute(prompt=prompt, image=image_path)
        # execution = tool.execute(prompt=prompt)
        print("Generated Response:")
        print(execution)
    except Exception as e: 
        print(f"Execution failed: {e}")

    print("Done!")