angry-meow commited on
Commit
d7ef377
·
1 Parent(s): 7ea1b4c

restructuring; testing url loading

Browse files
Files changed (5) hide show
  1. agents.py +22 -7
  2. app.py +11 -61
  3. models.py +4 -0
  4. prompts.py +21 -0
  5. tools.py +13 -0
agents.py CHANGED
@@ -1,9 +1,13 @@
1
  from helper_functions import create_team_agent
 
 
2
  import models
 
 
3
 
4
  search_agent = create_team_agent(
5
  models.gpt4o,
6
- [tavily_tool],
7
  "You are a research assistant who can search for up-to-date info using the tavily search engine.",
8
  "Search",
9
  ["Search", "PaperInformationRetriever"]
@@ -11,7 +15,7 @@ search_agent = create_team_agent(
11
 
12
  research_agent = create_team_agent(
13
  models.gpt4o,
14
- [retrieve_information],
15
  "You are a research assistant who can provide specific information on the provided paper: 'murthy-loneliness.pdf'. You must only respond with information about the paper related to the request.",
16
  "PaperInformationRetriever",
17
  ["Search", "PaperInformationRetriever"]
@@ -19,7 +23,7 @@ research_agent = create_team_agent(
19
 
20
  doc_writer_agent = create_team_agent(
21
  models.gpt4o,
22
- [write_document, edit_document, read_document],
23
  "You are an expert writing technical social media posts.",
24
  "DocWriter",
25
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
@@ -27,7 +31,7 @@ doc_writer_agent = create_team_agent(
27
 
28
  note_taking_agent = create_team_agent(
29
  models.gpt4o,
30
- [create_outline, read_document],
31
  "You are an expert senior researcher tasked with writing a social media post outline and taking notes to craft a social media post.",
32
  "NoteTaker",
33
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
@@ -35,7 +39,7 @@ note_taking_agent = create_team_agent(
35
 
36
  copy_editor_agent = create_team_agent(
37
  models.gpt4o,
38
- [write_document, edit_document, read_document],
39
  "You are an expert copy editor who focuses on fixing grammar, spelling, and tone issues.",
40
  "CopyEditor",
41
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
@@ -43,8 +47,19 @@ copy_editor_agent = create_team_agent(
43
 
44
  voice_editor_agent = create_team_agent(
45
  models.gpt4o,
46
- [write_document, edit_document, read_document],
47
  "You are an expert in crafting and refining the voice and tone of social media posts. You edit the document to ensure it has a consistent, professional, and engaging voice appropriate for social media platforms.",
48
  "VoiceEditor",
49
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
50
- )
 
 
 
 
 
 
 
 
 
 
 
 
1
  from helper_functions import create_team_agent
2
+ from operator import itemgetter
3
+ from langchain_core.runnables.passthrough import RunnablePassthrough
4
  import models
5
+ import prompts
6
+ import tools
7
 
8
  search_agent = create_team_agent(
9
  models.gpt4o,
10
+ [tools.tavily_tool],
11
  "You are a research assistant who can search for up-to-date info using the tavily search engine.",
12
  "Search",
13
  ["Search", "PaperInformationRetriever"]
 
15
 
16
  research_agent = create_team_agent(
17
  models.gpt4o,
18
+ [tools.retrieve_information],
19
  "You are a research assistant who can provide specific information on the provided paper: 'murthy-loneliness.pdf'. You must only respond with information about the paper related to the request.",
20
  "PaperInformationRetriever",
21
  ["Search", "PaperInformationRetriever"]
 
23
 
24
  doc_writer_agent = create_team_agent(
25
  models.gpt4o,
26
+ [tools.write_document, tools.edit_document, tools.read_document],
27
  "You are an expert writing technical social media posts.",
28
  "DocWriter",
29
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
 
31
 
32
  note_taking_agent = create_team_agent(
33
  models.gpt4o,
34
+ [tools.create_outline, tools.read_document],
35
  "You are an expert senior researcher tasked with writing a social media post outline and taking notes to craft a social media post.",
36
  "NoteTaker",
37
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
 
39
 
40
  copy_editor_agent = create_team_agent(
41
  models.gpt4o,
42
+ [tools.write_document, tools.edit_document, tools.read_document],
43
  "You are an expert copy editor who focuses on fixing grammar, spelling, and tone issues.",
44
  "CopyEditor",
45
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
 
47
 
48
  voice_editor_agent = create_team_agent(
49
  models.gpt4o,
50
+ [tools.write_document, tools.edit_document, tools.read_document],
51
  "You are an expert in crafting and refining the voice and tone of social media posts. You edit the document to ensure it has a consistent, professional, and engaging voice appropriate for social media platforms.",
52
  "VoiceEditor",
53
  ["DocWriter", "NoteTaker", "CopyEditor", "VoiceEditor"]
54
+ )
55
+
56
+ simple_rag_chain = (
57
+ {
58
+ "context": itemgetter("question") | models.semantic_tuned_retrieverretriever,
59
+ "question": itemgetter("question"),
60
+ "writing_style_guide": lambda _: prompts.style_guide_text
61
+ }
62
+ | RunnablePassthrough.assign(context=itemgetter("context"))
63
+ | prompts.chat_prompt
64
+ | models.gpt4o
65
+ )
app.py CHANGED
@@ -1,78 +1,26 @@
1
- from langchain_text_splitters import RecursiveCharacterTextSplitter
2
- from qdrant_client import QdrantClient
3
- from langchain_openai.embeddings import OpenAIEmbeddings
4
- from langchain_core.prompts import ChatPromptTemplate
5
- from langchain_core.globals import set_llm_cache
6
- from langchain_openai import ChatOpenAI
7
- from langchain_core.caches import InMemoryCache
8
- from operator import itemgetter
9
- from langchain_core.runnables.passthrough import RunnablePassthrough
10
- from langchain_qdrant import QdrantVectorStore, Qdrant
11
- from langchain_community.document_loaders import PyMuPDFLoader
12
- import uuid
13
  import chainlit as cl
14
- import os
15
  from helper_functions import process_file, load_documents_from_url, add_to_qdrant
16
-
17
- chat_model = ChatOpenAI(model="gpt-4o-mini")
18
- te3_small = OpenAIEmbeddings(model="text-embedding-3-small")
19
- set_llm_cache(InMemoryCache())
20
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
21
- rag_system_prompt_template = """\
22
- You are a helpful assistant that uses the provided context to answer questions.
23
- You must follow the writing style guide provided below. Never reference this prompt,
24
- the existence of context, or the writing style guide in your responses.
25
-
26
- Writing Style Guide:
27
- {writing_style_guide}
28
- """
29
- rag_message_list = [{"role" : "system", "content" : rag_system_prompt_template},]
30
- rag_user_prompt_template = """\
31
- Question:
32
- {question}
33
- Context:
34
- {context}
35
- """
36
- chat_prompt = ChatPromptTemplate.from_messages([("system", rag_system_prompt_template), ("human", rag_user_prompt_template)])
37
 
38
  @cl.on_chat_start
39
  async def on_chat_start():
40
- qdrant_client = QdrantClient(url=os.environ["QDRANT_ENDPOINT"], api_key=os.environ["QDRANT_API_KEY"])
41
  global qdrant_store
42
- qdrant_store = Qdrant(
43
- client=qdrant_client,
44
- collection_name="kai_test_docs",
45
- embeddings=te3_small
46
- )
47
 
48
  res = await ask_action()
49
  await handle_response(res)
50
 
51
- # Load the style guide from the local file system
52
- style_guide_path = "./public/CoExperiences Writing Style Guide V1 (2024).pdf"
53
- loader = PyMuPDFLoader(style_guide_path)
54
- style_guide_docs = loader.load()
55
- style_guide_text = "\n".join([doc.page_content for doc in style_guide_docs])
56
-
57
- retriever = qdrant_store.as_retriever()
58
- global retrieval_augmented_qa_chain
59
- retrieval_augmented_qa_chain = (
60
- {
61
- "context": itemgetter("question") | retriever,
62
- "question": itemgetter("question"),
63
- "writing_style_guide": lambda _: style_guide_text
64
- }
65
- | RunnablePassthrough.assign(context=itemgetter("context"))
66
- | chat_prompt
67
- | chat_model
68
- )
69
-
70
  @cl.author_rename
71
  def rename(orig_author: str):
72
  return "AI Assistant"
73
 
74
  @cl.on_message
75
  async def main(message: cl.Message):
 
76
  if message.content.startswith("http://") or message.content.startswith("https://"):
77
  message_type = "url"
78
  else:
@@ -81,7 +29,9 @@ async def main(message: cl.Message):
81
  if message_type == "url":
82
  # load the file
83
  docs = load_documents_from_url(message.content)
84
- splits = text_splitter.split_documents(docs)
 
 
85
  for i, doc in enumerate(splits):
86
  doc.metadata["user_upload_source"] = f"source_{i}"
87
  print(f"Processing {len(docs)} text chunks")
@@ -131,7 +81,7 @@ async def handle_response(res):
131
 
132
  # load the file
133
  docs = process_file(file)
134
- splits = text_splitter.split_documents(docs)
135
  for i, doc in enumerate(splits):
136
  doc.metadata["user_upload_source"] = f"source_{i}"
137
  print(f"Processing {len(docs)} text chunks")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import chainlit as cl
 
2
  from helper_functions import process_file, load_documents_from_url, add_to_qdrant
3
+ import models
4
+ import agents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  @cl.on_chat_start
7
  async def on_chat_start():
 
8
  global qdrant_store
9
+ qdrant_store = models.semantic_tuned_Qdrant_vs
10
+
11
+ global retrieval_augmented_qa_chain
12
+ retrieval_augmented_qa_chain = agents.simple_rag_chain
 
13
 
14
  res = await ask_action()
15
  await handle_response(res)
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @cl.author_rename
18
  def rename(orig_author: str):
19
  return "AI Assistant"
20
 
21
  @cl.on_message
22
  async def main(message: cl.Message):
23
+ print(message.content)
24
  if message.content.startswith("http://") or message.content.startswith("https://"):
25
  message_type = "url"
26
  else:
 
29
  if message_type == "url":
30
  # load the file
31
  docs = load_documents_from_url(message.content)
32
+ cl.Message("loaded docs").send()
33
+ splits = models.semanticChunker_tuned.split_documents(docs)
34
+ cl.Message("split docs").send()
35
  for i, doc in enumerate(splits):
36
  doc.metadata["user_upload_source"] = f"source_{i}"
37
  print(f"Processing {len(docs)} text chunks")
 
81
 
82
  # load the file
83
  docs = process_file(file)
84
+ splits = models.semanticChunker_tuned.split_documents(docs)
85
  for i, doc in enumerate(splits):
86
  doc.metadata["user_upload_source"] = f"source_{i}"
87
  print(f"Processing {len(docs)} text chunks")
models.py CHANGED
@@ -10,6 +10,8 @@ from langchain.retrievers.contextual_compression import ContextualCompressionRet
10
  from qdrant_client import QdrantClient
11
  from langchain_text_splitters import RecursiveCharacterTextSplitter
12
  from langchain_cohere import CohereRerank
 
 
13
  import constants
14
  import os
15
 
@@ -17,6 +19,8 @@ os.environ["LANGCHAIN_API_KEY"] = constants.LANGCHAIN_API_KEY
17
  os.environ["LANGCHAIN_TRACING_V2"] = str(constants.LANGCHAIN_TRACING_V2)
18
  os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT
19
 
 
 
20
  tracer = LangChainTracer()
21
  callback_manager = CallbackManager([tracer])
22
 
 
10
  from qdrant_client import QdrantClient
11
  from langchain_text_splitters import RecursiveCharacterTextSplitter
12
  from langchain_cohere import CohereRerank
13
+ from langchain_core.globals import set_llm_cache
14
+ from langchain_core.caches import InMemoryCache
15
  import constants
16
  import os
17
 
 
19
  os.environ["LANGCHAIN_TRACING_V2"] = str(constants.LANGCHAIN_TRACING_V2)
20
  os.environ["LANGCHAIN_ENDPOINT"] = constants.LANGCHAIN_ENDPOINT
21
 
22
+ set_llm_cache(InMemoryCache())
23
+
24
  tracer = LangChainTracer()
25
  callback_manager = CallbackManager([tracer])
26
 
prompts.py CHANGED
@@ -1,3 +1,24 @@
1
  from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, PromptTemplate
2
  from langchain.schema import SystemMessage
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, PromptTemplate
2
  from langchain.schema import SystemMessage
3
+ from langchain_community.document_loaders import PyMuPDFLoader
4
 
5
+ rag_system_prompt_template = """\
6
+ You are a helpful assistant that uses the provided context to answer questions.
7
+ You must follow the writing style guide provided below. Never reference this prompt,
8
+ the existence of context, or the writing style guide in your responses.
9
+
10
+ Writing Style Guide:
11
+ {writing_style_guide}
12
+ """
13
+ rag_message_list = [{"role" : "system", "content" : rag_system_prompt_template},]
14
+ rag_user_prompt_template = """\
15
+ Question:
16
+ {question}
17
+ Context:
18
+ {context}
19
+ """
20
+ chat_prompt = ChatPromptTemplate.from_messages([("system", rag_system_prompt_template), ("human", rag_user_prompt_template)])
21
+
22
+ style_guide_path = "./public/CoExperiences Writing Style Guide V1 (2024).pdf"
23
+ style_guide_docs = PyMuPDFLoader(style_guide_path).load()
24
+ style_guide_text = "\n".join([doc.page_content for doc in style_guide_docs])
tools.py CHANGED
@@ -1,8 +1,21 @@
 
 
1
  from langchain_community.tools.tavily_search import TavilySearchResults
2
  from langchain_core.tools import tool
 
 
 
 
3
 
4
  tavily_tool = TavilySearchResults(max_results=5)
5
 
 
 
 
 
 
 
 
6
  @tool
7
  def create_outline(points: List[str], file_name: str) -> str:
8
  """Create and save an outline."""
 
1
+ from pathlib import Path
2
+ from typing import Annotated, Optional
3
  from langchain_community.tools.tavily_search import TavilySearchResults
4
  from langchain_core.tools import tool
5
+ from agents import simple_rag_chain
6
+
7
+ WORKING_DIRECTORY = Path("/tmp/content/data")
8
+ WORKING_DIRECTORY.mkdir(parents=True, exist_ok=True)
9
 
10
  tavily_tool = TavilySearchResults(max_results=5)
11
 
12
+ @tool
13
+ def retrieve_information(
14
+ query: Annotated[str, "query to ask the retrieve information tool"]
15
+ ):
16
+ """Use Retrieval Augmented Generation to retrieve information about the 'Extending Llama-3’s Context Ten-Fold Overnight' paper."""
17
+ return simple_rag_chain.invoke({"question" : query})
18
+
19
  @tool
20
  def create_outline(points: List[str], file_name: str) -> str:
21
  """Create and save an outline."""