from typing import Dict, List, Optional from langchain_anthropic.chat_models import ChatAnthropic from langchain_core.messages import HumanMessage, SystemMessage from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.runnables import RunnableConfig from langchain_core.tools import StructuredTool from pydantic import BaseModel, Field from research_assistant.components.plannerParser import PlannerParser from research_assistant.constants import HEILMEIER_CATECHISM class PlannerOutput(BaseModel): plan_str: str = Field( ..., description=( "This plan includes a detailed breakdown of each step, specifying the task, the tool used, the arguments provided, and any dependencies (outputs from previous steps) required as inputs for that step. An example of a single step would be:\n" 'Plan_step: "Using the different shots from #E2, analyze their impact on the game. #E3: LLM [What impact do the shots mentioned in #E2 have on the game?]"' ), ) tools: list[str] = Field( ..., description="The Tool each step of the plan needs to use." ) dependencies: Dict[int, list[int]] = Field( ..., description=( "A dictionary of dependencies, which elaborates on what outputs are needed for each step in the plan to execute. So that the output of those steps is retrieved and added inside the prompt for the present step. An example of a dependency would be: {2 : [1]}" ), ) arguments: list[str] = Field( ..., description="The arguments that the tool needs to be given. These arguments will be used in the prompt to get the output.", ) def get_planner(llm: ChatAnthropic): prompt_template = ChatPromptTemplate.from_messages( [ SystemMessage( ( f"You are a research assistant whose primary job is to explain a research article in a clear and accessible way. Your goal is to read the entire article and provide an explanation that allows other researchers to understand its content without having to read it themselves. Additionally, you should be able to answer any questions they might have. The most efficient way to accomplish this is by answering the following Heilmeier catechism questions in detail:\n{HEILMEIER_CATECHISM}\n\n" # comment for readability "You are a planner of the research assistant agent architecture. You need to generate a step-by-step plan process such that you can get all the answers to the given questions using the provided tools:\n" "(1) Arxiv [input]: A tool that searches for results from the Arxiv website. It is useful for finding information on specific topics. The input should be a concise text string, similar to a search query used in a Google search. This tool searches published articles and provides details about the article and a summary of its content. The information obtained is reliable, so if you need information not covered in the research article or require external information, use this tool.\n" "(2) LLM [input]: A pretrained language model that can answer any questions. You provide the query and additional context, and it generates a relevant, summarized answer. The additional context may include the output from previous steps or evidence gathered using the Arxiv tool.\n\n" # comment for readability "For example,\n" "Task: Explain different kinds of cricket shots.\n" "plan_str:\n" '1. Start by finding different kinds of cricket shots. #E1 = Arxiv["Different kinds of cricket shots"].\n' '2. Given the result of the search query, find different types of cricket shots. #E2 = LLM ["Find the different types of cricket shots given the result of search query #E1."]\n' '3. Now, let us find out about different types of cricket shots and their impact on the game. #E3 = LLM ["Given the different types of cricket shots from the step #E1 till step #E2, how does their impact on the game look like?"]\n\n' # comment for readability "Describe the steps of your plan with rich details. Each step of the plan should contain #E as shown in the example. DO NOT write a step at the end to summarize the plan." ) ), MessagesPlaceholder(variable_name="context", optional=True), MessagesPlaceholder(variable_name="article_text"), ] ) planner = prompt_template | llm # parse the response to get the plan, tasks, tools, dependencies, and arguments def parse_plan(plan_string: str): parser = PlannerParser(plan_string=plan_string) return PlannerOutput( plan_str=plan_string, tools=parser.get_tool_list(), dependencies=parser.get_dependency_list(), arguments=parser.get_argument_list(), ) def get_plan( article_text: str, _context: Optional[List[str]] = None, # TODO: rename when context is used _config: Optional[RunnableConfig] = None, # TODO: rename when config is used ): response = planner.invoke( { "article_text": [ HumanMessage( f"You are given a research document with the following content:\n{article_text}.\n\n" "Read the research document thoroughly. Using the tools provided to you, generate a step-by-step plan that would use these tools in the specified step-wise manner to get a detailed summary for all the questions." ) ] } ) if isinstance(response.content, str): return parse_plan(response.content) else: raise TypeError( "Response.Content i.e the plan given out from the llm must be a string" ) return StructuredTool.from_function( name="planner", func=get_plan, description=( ( 'This tool is used to generate a plan for obtaining a summary of research articles. Rather than providing the entire summary, it focuses on creating a step-by-step plan that guides the agent in producing a detailed, accurate summary of a research article. This tool can be considered the "brain" that designs the agent\'s workflow.\n' "For Example:\n" "Input: The parsed pdf string of the article\n" "Answer: An object consisting of the following fields:\n" "plan_string: str\n" "steps : List[str]\n" "tools : List[str]\n" "dependencies : dict\n" "arguments : List[str]\n" ) ), )