peichao.dong
commited on
Commit
·
2a0c033
1
Parent(s):
066c6cf
update embedding
Browse files- app.py +4 -3
- documents/abstract.faiss/index.faiss +0 -0
- documents/abstract.faiss/index.pkl +3 -0
- embedding.py +41 -10
app.py
CHANGED
@@ -50,12 +50,13 @@ def feedBack(context, story, chatbot=[], input=""):
|
|
50 |
|
51 |
customerEmbedding = CustomEmbedding()
|
52 |
|
53 |
-
faqChain = customerEmbedding.
|
54 |
|
55 |
code_agent_executor = code_agent_executor()
|
56 |
def faqFromLocal(input, chatbot=[]):
|
57 |
-
response = faqChain({"question": f"{input}"})
|
58 |
-
|
|
|
59 |
return chatbot, ""
|
60 |
|
61 |
|
|
|
50 |
|
51 |
customerEmbedding = CustomEmbedding()
|
52 |
|
53 |
+
faqChain = customerEmbedding.getFAQAgent()
|
54 |
|
55 |
code_agent_executor = code_agent_executor()
|
56 |
def faqFromLocal(input, chatbot=[]):
|
57 |
+
# response = faqChain({"question": f"{input}"})
|
58 |
+
response = faqChain.run(input)
|
59 |
+
chatbot.append((input, response))
|
60 |
return chatbot, ""
|
61 |
|
62 |
|
documents/abstract.faiss/index.faiss
ADDED
Binary file (58.4 kB). View file
|
|
documents/abstract.faiss/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65b241ca9d637fc607f43c0190c682677b635dbd36cddb0b754c0f74ea6988da
|
3 |
+
size 26724
|
embedding.py
CHANGED
@@ -9,34 +9,35 @@ from langchain.chains.question_answering import load_qa_chain
|
|
9 |
from langchain.document_loaders import NotionDirectoryLoader
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
12 |
|
13 |
from models import llm
|
14 |
|
15 |
|
16 |
class CustomEmbedding:
|
17 |
notionDirectoryLoader = NotionDirectoryLoader(
|
18 |
-
"
|
19 |
embeddings = HuggingFaceEmbeddings()
|
20 |
|
21 |
def calculateEmbedding(self):
|
22 |
documents = self.notionDirectoryLoader.load()
|
23 |
-
text_splitter = SpacyTextSplitter(
|
24 |
-
|
25 |
|
26 |
-
|
27 |
-
|
28 |
texts = text_splitter.split_documents(documents)
|
29 |
|
30 |
docsearch = FAISS.from_documents(texts, self.embeddings)
|
31 |
docsearch.save_local(
|
32 |
-
folder_path="./documents/
|
33 |
|
34 |
|
35 |
|
36 |
def getFAQChain(self, llm=llm(temperature=0.7)):
|
37 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
38 |
docsearch = FAISS.load_local(
|
39 |
-
"./documents/
|
40 |
# retriever = VectorStoreRetriever(vectorstore=docsearch)
|
41 |
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese.
|
42 |
|
@@ -48,15 +49,45 @@ class CustomEmbedding:
|
|
48 |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
49 |
|
50 |
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
|
51 |
-
qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(),
|
52 |
question_generator=question_generator,
|
53 |
combine_docs_chain=doc_chain,
|
54 |
memory=memory)
|
55 |
return qa
|
56 |
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
# # customerEmbedding.calculateNotionEmbedding()
|
61 |
|
62 |
# faq_chain = customerEmbedding.getFAQChain()
|
|
|
9 |
from langchain.document_loaders import NotionDirectoryLoader
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain.chains import ConversationalRetrievalChain
|
12 |
+
from langchain.agents import initialize_agent, AgentType, Tool, ZeroShotAgent, AgentExecutor
|
13 |
|
14 |
from models import llm
|
15 |
|
16 |
|
17 |
class CustomEmbedding:
|
18 |
notionDirectoryLoader = NotionDirectoryLoader(
|
19 |
+
"/Users/peichao.dong/Documents/projects/dpc/ABstract/docs/pages")
|
20 |
embeddings = HuggingFaceEmbeddings()
|
21 |
|
22 |
def calculateEmbedding(self):
|
23 |
documents = self.notionDirectoryLoader.load()
|
24 |
+
# text_splitter = SpacyTextSplitter(
|
25 |
+
# chunk_size=2048, pipeline="zh_core_web_sm", chunk_overlap=0)
|
26 |
|
27 |
+
text_splitter = MarkdownTextSplitter(
|
28 |
+
chunk_size=2048, chunk_overlap=0)
|
29 |
texts = text_splitter.split_documents(documents)
|
30 |
|
31 |
docsearch = FAISS.from_documents(texts, self.embeddings)
|
32 |
docsearch.save_local(
|
33 |
+
folder_path="./documents/abstract.faiss")
|
34 |
|
35 |
|
36 |
|
37 |
def getFAQChain(self, llm=llm(temperature=0.7)):
|
38 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
39 |
docsearch = FAISS.load_local(
|
40 |
+
"./documents/abstract.faiss", self.embeddings)
|
41 |
# retriever = VectorStoreRetriever(vectorstore=docsearch)
|
42 |
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese.
|
43 |
|
|
|
49 |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
50 |
|
51 |
doc_chain = load_qa_chain(llm, chain_type="map_reduce")
|
52 |
+
qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(search_kwargs={"k": 1}),
|
53 |
question_generator=question_generator,
|
54 |
combine_docs_chain=doc_chain,
|
55 |
memory=memory)
|
56 |
return qa
|
57 |
|
58 |
+
def faq(self, input):
|
59 |
+
qa = self.getFAQChain()
|
60 |
+
response = qa({"question": f"{input}"})
|
61 |
+
return response["answer"]
|
62 |
|
63 |
+
def getFAQAgent(self):
|
64 |
+
tools = [Tool(name="ABstract system FAQ", func= self.faq, description="Useful for anwer questions about ABstract system")]
|
65 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
66 |
+
|
67 |
+
prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
|
68 |
+
suffix = """Begin!"
|
69 |
+
|
70 |
+
{chat_history}
|
71 |
+
Question: {input}
|
72 |
+
{agent_scratchpad}"""
|
73 |
+
|
74 |
+
prompt = ZeroShotAgent.create_prompt(
|
75 |
+
tools,
|
76 |
+
prefix=prefix,
|
77 |
+
suffix=suffix,
|
78 |
+
input_variables=["input", "chat_history", "agent_scratchpad"]
|
79 |
+
)
|
80 |
+
|
81 |
+
llm_chain = LLMChain(llm=llm(), prompt=prompt)
|
82 |
+
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
|
83 |
+
faq_agent = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
|
84 |
+
return faq_agent
|
85 |
+
# faq_agent = initialize_agent(tools= tools, llm=llm(), agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True)
|
86 |
+
|
87 |
+
|
88 |
+
if __name__ == "__main__":
|
89 |
+
customerEmbedding = CustomEmbedding()
|
90 |
+
customerEmbedding.calculateEmbedding()
|
91 |
# # customerEmbedding.calculateNotionEmbedding()
|
92 |
|
93 |
# faq_chain = customerEmbedding.getFAQChain()
|