coderpotter's picture
Upload folder using huggingface_hub
7b2e5db verified
from typing import List, Optional
from langchain_anthropic.chat_models import ChatAnthropic
from langchain_community.utilities.arxiv import ArxivAPIWrapper
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableConfig
from langchain_core.tools import StructuredTool
from pydantic import BaseModel, Field
class ExecuteCode(BaseModel):
"""The input to the summarizer tool function."""
reasoning: str = Field(
description="The reasoning behind the code expression, including how context is included, if applicable.",
)
answer: str = Field(
...,
description="The answer to the question about the research article.",
)
def get_qa_tool(llm: ChatAnthropic):
prompt = ChatPromptTemplate.from_messages(
[
SystemMessage(
(
"You are an advanced research assistant answering questions about a specific research article. The question may require external information beyond the research article itself. This external information, along with the parsed content from the research article, will be provided as 'Additional Context'.\n\n"
# comment for readibility
"You must:\n"
"1. Thoroughly analyze the research article to understand its key objectives, methods, findings, and implications.\n"
"2. Use the research article and any additional context to construct a comprehensive, well-informed answer to the given question.\n"
"3. Explicitly reference and combine information from both the research article and the additional context when needed, ensuring that the response is relevant, accurate, and complete.\n\n"
# comment for readibility
"Follow these steps when answering:\n"
"- If the question can be answered using information from the research article alone, do so.\n"
"- If additional context is needed to supplement or clarify the answer, carefully integrate it with the information from the article.\n"
"- Ensure the response is precise, concise, and clear, citing the research article and additional context appropriately."
)
),
MessagesPlaceholder(variable_name="context", optional=True),
MessagesPlaceholder(variable_name="question"),
]
)
summarizer = prompt | llm.with_structured_output(ExecuteCode)
def get_answer(
question: str,
context: Optional[List[str]] = None,
config: Optional[RunnableConfig] = None,
):
context_str = "\n".join(context).strip() if context else None
chain_input = {
"question": [HumanMessage(question)],
"context": [
(
HumanMessage(
(
f"Additional context has been provided from other tools (such as parsed PDF content or information retrieved from internet searches). Use it to substitute into any {{#}} variables or other words in the question. Do not directly substitute the value. Rather, extract information in the best suitable format and then substitute. Use this context to enrich your answer by integrating it with the information from the research article. Context:\n{context_str}\n\n"
# comment for readibility
"Instructions:\n"
"- Identify where the additional context is necessary to supplement or clarify the research article's information.\n"
"- Replace any placeholders or variable information (e.g., {{#}}) with appropriate details from the context.\n"
"- Make sure the final answer blends the research article content with the additional context in a cohesive and accurate manner.\n\n"
# comment for readibility
"Once done, output the updated, comprehensive answer."
)
)
if context_str
else HumanMessage("No Additional Context is Provided")
)
],
}
return summarizer.invoke(chain_input, config)
return StructuredTool.from_function(
name="qa_agent",
func=get_answer,
description="This tool is designed to answer specific questions about a research article, rather than simply providing a full summary. It offers a well-rounded and accurate response to your inquiry, allowing you to focus on the exact information you need without having to go through the entire article.",
)
def get_arxiv_tool(
k_results: int = 3,
max_query_length: int = 300,
max_docs: int = 3,
doc_content_chars_max: int = 40000,
):
return ArxivAPIWrapper( # type: ignore
top_k_results=k_results,
ARXIV_MAX_QUERY_LENGTH=max_query_length,
load_max_docs=max_docs,
load_all_available_meta=False,
doc_content_chars_max=doc_content_chars_max,
)