"Open

In [1]:
!pip install -Uq langchain-community
!pip install -Uq langchain
!pip install -Uq langgraph
!pip install -Uq chromadb
!pip install -Uq sentence-transformers
!pip install -Uq gpt4all
!pip install -qU google-search-results

In [2]:
import os
from google.colab import userdata
os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get('HUGGINGFACEHUB_API_TOKEN')
os.environ["GOOGLE_CSE_ID"] = userdata.get('GOOGLE_CSE_ID')
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

### LLMs

In [None]:
# HF libraries
from langchain_community.llms import HuggingFaceEndpoint

# Load the model from the Hugging Face Hub
llm_mid = HuggingFaceEndpoint(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
 temperature=0.1,
 max_new_tokens=1024,
 repetition_penalty=1.2,
 return_full_text=False
 )

llm_small = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2",
 temperature=0.1,
 max_new_tokens=1024,
 repetition_penalty=1.2,
 return_full_text=False
 )

### Chroma DB

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings

# Load
url = "https://lilianweng.github.io/posts/2023-06-23-agent/"
loader = WebBaseLoader(url)
docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(
 chunk_size=500, chunk_overlap=100
)
all_splits = text_splitter.split_documents(docs)

# Embed and index
#embedding = GPT4AllEmbeddings()
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


# Index
vectorstore = Chroma.from_documents(
 documents=all_splits,
 collection_name="rag-chroma",
 embedding=embedding,
)
retriever = vectorstore.as_retriever()

###State

In [5]:
from typing import Annotated, Dict, TypedDict
from langchain_core.messages import BaseMessage

class GraphState(TypedDict):
 """
 Represents the state of our graph.

 Attributes:
 key: A dictionary where each key is a string.
 """

 keys: Dict[str, any]

### Nodes

In [6]:
import json
import operator
from typing import Annotated, Sequence, TypedDict

from langchain_core.output_parsers import JsonOutputParser
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain.tools import Tool
from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

### Nodes ###

def retrieve(state):
 """
 Retrieve documents

 Args:
 state (dict): The current graph state

 Returns:
 state (dict): New key added to state, documents, that contains retrieved documents
 """
 print("---RETRIEVE---")
 state_dict = state["keys"]
 question = state_dict["question"]
 local = state_dict["local"]
 documents = retriever.get_relevant_documents(question)

 return {"keys": {"documents": documents, "local": local, "question": question}}

def generate(state):
 """
 Generate answer

 Args:
 state (dict): The current graph state

 Returns:
 state (dict): New key added to state, generation, that contains generation
 """
 print("---GENERATE---")
 state_dict = state["keys"]
 question = state_dict["question"]
 documents = state_dict["documents"]
 local = state_dict["local"]

 # Prompt
 prompt = PromptTemplate(
 template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. \n
 If you don't know the answer, just say that you don't know. Keep the answer concise. \n
 Question: {question} \n
 Context: {context} \n
 """,
 input_variables=["question","context"],
 )

 # LLM
 llm = llm_mid

 # Post-processing
 def format_docs(docs):
 return "\n\n".join(doc.page_content for doc in docs)

 # Chain
 rag_chain = prompt | llm | StrOutputParser()


 # Run
 generation = rag_chain.invoke({"context": documents, "question": question})

 return {
 "keys": {"documents": documents, "question": question, "generation": generation}
 }

def grade_documents(state):
 """
 Determines whether the retrieved documents are relevant to the question.

 Args:
 state (dict): The current graph state

 Returns:
 state (dict): Update documents key with relevant documents
 """

 print("---CHECK RELEVANCE---")
 state_dict = state["keys"]
 question = state_dict["question"]
 documents = state_dict["documents"]
 local = state_dict["local"]

 # LLM
 llm = llm_small

 prompt = PromptTemplate(
 template="""You are a grader assessing relevance of a retrieved document to a user question. \n
 Here is the retrieved document: \n\n {context} \n\n
 Here is the user question: {question} \n
 If the document contains keywords related to the user question, grade it as relevant. \n
 It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
 Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
 Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
 """,
 input_variables=["question","context"],
 )

 chain = prompt | llm | JsonOutputParser()

 # Score
 filtered_docs = []
 search = "No" #Default to do not opt for web search to supplement retrieval
 for d in documents:
 score = chain.invoke(
 {
 "question": question,
 "context": d.page_content,
 }
 )
 grade = score["score"]
 if grade == "yes":
 print("---GRADE: DOCUMENT RELEVANT---")
 filtered_docs.append(d)
 else:
 print("---GRADE: DOCUMENT IRRELEVANT---")
 search = "Yes" #Perform web search
 continue

 return {
 "keys": {
 "documents": filtered_docs,
 "question": question,
 "local": local,
 "run_web_search": search,
 }
 }

def transform_query(state):
 """
 Transform the query to produce a better question.

 Args:
 state (dict): The current graph state

 Returns:
 state (dict): Updates question key with a re-phrased question
 """
 print("---TRANSFORM QUERY---")
 state_dict = state["keys"]
 question = state_dict["question"]
 documents = state_dict["documents"]
 local = state_dict["local"]

 # Create a prompt template with format instructions and the query
 prompt = PromptTemplate(
 template="""You are generating questions that are well optimized for retrieval. \n
 Look at the input and try to reasin about the underlying sematic intent / meaning . \n
 Here is the initial question:
 \n -------- \n
 {question}
 \n -------- \n
 Provide an improved question without any premable, only respond with the updated question: """,
 input_variables=["question"],
 )

 # Grader
 # LLM
 llm = llm_mid

 # Prompt
 chain = prompt | llm | StrOutputParser()
 better_question = chain.invoke({"question": question})

 return {
 "keys": {"documents": documents, "question": better_question, "local": local}
 }


def web_search(state):
 """
 Web search based on the re-phrased question using google

 Args:
 state (dict): The current graph state
 Returns:
 state (dict): Web results appended to documents.
 """

 print("---WEB SEARCH---")
 state_dict = state["keys"]
 question = state_dict["question"]
 documents = state_dict["documents"]
 local = state_dict["local"]

 websearch = GoogleSearchAPIWrapper(k=3)
 google_search = Tool(
 name="google_search",
 description="Search Google for recent results.",
 func=websearch.run,
 )
 web_search = google_search.run(question)
 #filtered_contents = [d["page_content"] for d in web_search if d["page_content"] is not None]
 #web_results = "\n".join(filtered_contents)
 web_results = Document(page_content=web_search)
 documents.append(web_results)

 return {"keys": {"documents": documents, "local": local, "question": question}}

### Edges

In [7]:
def decide_to_generate(state):
 """
 Determines whether to generate an answer or re-generate a question for web search.

 Args:
 state (dict): The current state of the agent, including all keys.

 Returns:
 str: Next node to call
 """

 print("---DECIDE TO GENERATE---")
 state_dict = state["keys"]
 question = state_dict["question"]
 filtered_documents = state_dict["documents"]
 search = state_dict["run_web_search"]

 if search == "Yes":
 # All documents have been filtered check_relevance
 # We will re-generate a new query
 print("---DECISION: TRANSFORM QUERY and RUN WEB SEARCH---")
 return "transform_query"
 else:
 # We have relevant documents, so generate answer
 print("---DECISION: GENERATE---")
 return "generate"

### Graph

In [8]:
import pprint
from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve) #retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate)
workflow.add_node("transform_query", transform_query)
workflow.add_node("web_search", web_search)

# Build graph
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
 "grade_documents",
 decide_to_generate,
 {
 "transform_query": "transform_query",
 "generate": "generate",
 },
)
workflow.add_edge("transform_query", "web_search")
workflow.add_edge("web_search", "generate")
workflow.add_edge("generate", END)

# Compile
app = workflow.compile()

### RUN

In [9]:
# Run
inputs = {
 "keys": {
 "question": "Explain how the different types of agent memory work?",
 "local": "No",
 }
}
for output in app.stream(inputs):
 for key, value in output.items():
 # Node
 pprint.pprint(f"Node '{key}':")
 # Optional: print full state at each node
 # pprint.pprint(value["keys"], indent=2, width=80, depth=None)
 pprint.pprint("\n---\n")

# Final generation
pprint.pprint(value['keys']['generation'])

---RETRIEVE---
"Node 'retrieve':"
'\n---\n'
---CHECK RELEVANCE---
---GRADE: DOCUMENT IRRELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT IRRELEVANT---
"Node 'grade_documents':"
'\n---\n'
---DECIDE TO GENERATE---
---DECISION: TRANSFORM QUERY and RUN WEB SEARCH---
---TRANSFORM QUERY---
"Node 'transform_query':"
'\n---\n'
---WEB SEARCH---
"Node 'web_search':"
'\n---\n'
---GENERATE---
"Node 'generate':"
'\n---\n'
"Node '__end__':"
'\n---\n'
('----\n'
 '\n'
 'The functionalities of sensory memory include learning embedding '
 'representations for raw inputs like text, images, or other modalities. '
 'Short-term memory serves as in-context learning with a limited capacity due '
 'to the finite context window length of Transformers. Long-term memory acts '
 'as an external vector store that the agent can access during query time '
 'through fast retrieval. Reflection mechanisms help synthesize memories into '
 "higher-level inferences over time and g