Upload 10 files
Browse filesfirst real commit
- .gitattributes +1 -0
- Dockerfile +34 -0
- app.py +52 -0
- requirements.txt +2 -0
- service/__init__.py +0 -0
- service/assets/bot.png +3 -0
- service/haystack_documentation_pipeline.py +76 -0
- service/utils/__init__.py +0 -0
- service/utils/memory_node.py +12 -0
- service/utils/prompts.py +31 -0
- service/utils/retriever.py +25 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
service/assets/bot.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app/Dockerfile
|
2 |
+
|
3 |
+
FROM python:3.10-slim-bookworm
|
4 |
+
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
COPY ./requirements.txt /app/requirements.txt
|
8 |
+
|
9 |
+
RUN apt-get update && apt-get install -y \
|
10 |
+
build-essential \
|
11 |
+
curl \
|
12 |
+
software-properties-common \
|
13 |
+
git \
|
14 |
+
&& rm -rf /var/lib/apt/lists/*
|
15 |
+
|
16 |
+
RUN pip3 install --no-cache-dir -r /app/requirements.txt
|
17 |
+
|
18 |
+
# User
|
19 |
+
RUN useradd -m -u 1000 user
|
20 |
+
USER user
|
21 |
+
ENV HOME /home/user
|
22 |
+
ENV PATH $HOME/.local/bin:$PATH
|
23 |
+
|
24 |
+
WORKDIR $HOME
|
25 |
+
RUN mkdir app
|
26 |
+
WORKDIR $HOME/app
|
27 |
+
COPY . $HOME/app
|
28 |
+
|
29 |
+
EXPOSE 7860
|
30 |
+
CMD streamlit run app.py \
|
31 |
+
--server.headless true \
|
32 |
+
--server.enableCORS false \
|
33 |
+
--server.enableXsrfProtection false \
|
34 |
+
--server.fileWatcherType none
|
app.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from service.haystack_documentation_pipeline import return_haystack_documentation_agent
|
6 |
+
|
7 |
+
st.title('Haystack Documentation Chatbot')
|
8 |
+
|
9 |
+
if 'image_bytes' not in st.session_state:
|
10 |
+
st.session_state.image_bytes = open('service/assets/bot.png', 'rb').read()
|
11 |
+
|
12 |
+
if 'messages' not in st.session_state:
|
13 |
+
st.session_state.messages = [{'role': 'assistant', 'content': 'Hello there!'}]
|
14 |
+
|
15 |
+
with st.sidebar:
|
16 |
+
st.image('service/assets/bot.png')
|
17 |
+
st.markdown(
|
18 |
+
"""
|
19 |
+
# Haystack Documentation Chatbot
|
20 |
+
|
21 |
+
This chatbot can answer questions about the Haystack documentation.
|
22 |
+
|
23 |
+
## How to use
|
24 |
+
|
25 |
+
1. Type your question in the chat input box.
|
26 |
+
2. Press enter.
|
27 |
+
3. Wait for the chatbot to respond (since it works as an agent responses may take a while).
|
28 |
+
4. enjoy!
|
29 |
+
"""
|
30 |
+
)
|
31 |
+
|
32 |
+
for message in st.session_state.messages:
|
33 |
+
with st.chat_message(
|
34 |
+
message['role'],
|
35 |
+
avatar=st.session_state.image_bytes if message['role'] == 'assistant' else None,
|
36 |
+
):
|
37 |
+
st.markdown(message['content'])
|
38 |
+
|
39 |
+
if 'agent' not in st.session_state:
|
40 |
+
st.session_state.agent = return_haystack_documentation_agent(openai_key=os.environ['OPENAI_KEY'])
|
41 |
+
|
42 |
+
if prompt := st.chat_input('What is up?"'):
|
43 |
+
st.chat_message('user').markdown(prompt)
|
44 |
+
st.session_state.messages.append({'role': 'user', 'content': prompt})
|
45 |
+
chat_message = st.chat_message(name='assistant', avatar=st.session_state.image_bytes)
|
46 |
+
with chat_message:
|
47 |
+
with st.spinner('Thinking...'):
|
48 |
+
response = st.session_state.agent.run(query=prompt)
|
49 |
+
answer = response['answers'][0].answer
|
50 |
+
chat_message.markdown(answer)
|
51 |
+
|
52 |
+
st.session_state.messages.append({'role': 'assistant', 'content': answer})
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
farm-haystack[inference,preprocessing]==1.23.0
|
2 |
+
streamlit==1.29.0
|
service/__init__.py
ADDED
File without changes
|
service/assets/bot.png
ADDED
Git LFS Details
|
service/haystack_documentation_pipeline.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Any, Callable
|
2 |
+
|
3 |
+
from haystack import Pipeline
|
4 |
+
from haystack.agents.base import ToolsManager
|
5 |
+
from haystack.nodes import PromptNode, SentenceTransformersRanker
|
6 |
+
from haystack.agents import Agent, Tool
|
7 |
+
|
8 |
+
from service.utils.memory_node import return_memory_node
|
9 |
+
from service.utils.prompts import agent_prompt
|
10 |
+
from service.utils.retriever import return_retriever
|
11 |
+
|
12 |
+
|
13 |
+
def resolver_function(
|
14 |
+
query: str,
|
15 |
+
agent: Agent,
|
16 |
+
agent_step: Callable,
|
17 |
+
) -> Dict[str, Any]:
|
18 |
+
"""
|
19 |
+
This function is used to resolve the parameters of the prompt template.
|
20 |
+
:param query: the query
|
21 |
+
:param agent: the agent
|
22 |
+
:param agent_step: the agent step
|
23 |
+
:return: a dictionary of parameters
|
24 |
+
"""
|
25 |
+
return {
|
26 |
+
'query': query,
|
27 |
+
'tool_names_with_descriptions': agent.tm.get_tool_names_with_descriptions(),
|
28 |
+
'transcript': agent_step.transcript,
|
29 |
+
'memory': agent.memory.load(),
|
30 |
+
}
|
31 |
+
|
32 |
+
|
33 |
+
def define_haystack_doc_searcher_tool() -> Tool:
|
34 |
+
"""
|
35 |
+
Defines the tool for searching the Haystack documentation.
|
36 |
+
:return: the Haystack documentation searcher tool
|
37 |
+
"""
|
38 |
+
ranker = SentenceTransformersRanker(model_name_or_path='cross-encoder/ms-marco-MiniLM-L-12-v2', top_k=5)
|
39 |
+
retriever = return_retriever()
|
40 |
+
haystack_docs = Pipeline()
|
41 |
+
haystack_docs.add_node(component=retriever, name='retriever', inputs=['Query'])
|
42 |
+
haystack_docs.add_node(component=ranker, name='ranker', inputs=['retriever'])
|
43 |
+
|
44 |
+
return Tool(
|
45 |
+
name='haystack_documentation_search_tool',
|
46 |
+
pipeline_or_node=haystack_docs,
|
47 |
+
description='Searches the Haystack documentation for information.',
|
48 |
+
output_variable='documents',
|
49 |
+
)
|
50 |
+
|
51 |
+
|
52 |
+
def return_haystack_documentation_agent(openai_key: str) -> Agent:
|
53 |
+
"""
|
54 |
+
Returns an agent that can answer questions about the Haystack documentation.
|
55 |
+
:param openai_key: the OpenAI key
|
56 |
+
:return: the agent
|
57 |
+
"""
|
58 |
+
|
59 |
+
agent_prompt_node = PromptNode(
|
60 |
+
'gpt-3.5-turbo-16k',
|
61 |
+
api_key=openai_key,
|
62 |
+
stop_words=['Observation:'],
|
63 |
+
model_kwargs={'temperature': 0.05},
|
64 |
+
max_length=10000,
|
65 |
+
)
|
66 |
+
|
67 |
+
agent = Agent(
|
68 |
+
agent_prompt_node,
|
69 |
+
prompt_template=agent_prompt,
|
70 |
+
prompt_parameters_resolver=resolver_function,
|
71 |
+
memory=return_memory_node(openai_key),
|
72 |
+
tools_manager=ToolsManager([define_haystack_doc_searcher_tool()]),
|
73 |
+
final_answer_pattern=r"(?s)Final Answer\s*:\s*(.*)",
|
74 |
+
)
|
75 |
+
|
76 |
+
return agent
|
service/utils/__init__.py
ADDED
File without changes
|
service/utils/memory_node.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from haystack.agents.memory import ConversationSummaryMemory
|
2 |
+
from haystack.nodes import PromptNode
|
3 |
+
|
4 |
+
|
5 |
+
def return_memory_node(openai_key: str) -> ConversationSummaryMemory:
|
6 |
+
"""
|
7 |
+
Returns the memory node.
|
8 |
+
:param openai_key: the OpenAI key
|
9 |
+
:return: the memory node
|
10 |
+
"""
|
11 |
+
memory_prompt_node = PromptNode('gpt-3.5-turbo-16k', api_key=openai_key, max_length=1024)
|
12 |
+
return ConversationSummaryMemory(memory_prompt_node)
|
service/utils/prompts.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
agent_prompt = """
|
3 |
+
In the following conversation, a human user interacts with an AI Agent. This agent is specialises in answering questions about the Haystack documentation.
|
4 |
+
The human user poses questions, and the AI Agent goes through several steps to provide well-informed answers. The good outcome of this conversation is of paramount importance for the wellbeing of the human.
|
5 |
+
The AI Agent must use the available tools to find the up-to-date information. The final answer to the question should be truthfully based solely on the output of the tools.
|
6 |
+
The AI Agent should always be focused on answering the human user's question avoiding any irrelevant information. The agent must provide as many code examples as possible.
|
7 |
+
The AI Agent should be sure that the information and examples provided address the user's question, if it is not
|
8 |
+
the case, the agent should use it's tools to find the correct information.
|
9 |
+
The AI Agent should ignore its knowledge when answering the questions.
|
10 |
+
The AI Agent has access to these tools:
|
11 |
+
{tool_names_with_descriptions}
|
12 |
+
|
13 |
+
The following is the previous conversation between a human and The AI Agent:
|
14 |
+
{memory}
|
15 |
+
|
16 |
+
AI Agent responses must start with one of the following:
|
17 |
+
|
18 |
+
Thought: [the AI Agent's reasoning process]
|
19 |
+
Tool: [tool names] (on a new line) Tool Input: [input as a question for the selected tool WITHOUT quotation marks and on a new line] (These must always be provided together and on separate lines.)
|
20 |
+
Observation: [tool's result]
|
21 |
+
Final Answer: [final answer to the human user's question]
|
22 |
+
|
23 |
+
When selecting a tool, the AI Agent must provide both the "Tool:" and "Tool Input:" pair in the same response, but on separate lines.
|
24 |
+
|
25 |
+
The AI Agent should not ask the human user for additional information, clarification, or context.
|
26 |
+
If the AI Agent cannot find a specific answer after exhausting available tools and approaches, it answers with Final Answer: inconclusive
|
27 |
+
|
28 |
+
Question: {query}
|
29 |
+
Thought:
|
30 |
+
{transcript}
|
31 |
+
"""
|
service/utils/retriever.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from haystack.nodes import PreProcessor, WebRetriever
|
4 |
+
|
5 |
+
|
6 |
+
def return_retriever():
|
7 |
+
"""
|
8 |
+
Returns the retriever.
|
9 |
+
:return: the retriever
|
10 |
+
"""
|
11 |
+
preprocessor = PreProcessor(
|
12 |
+
split_by='word',
|
13 |
+
split_length=4096,
|
14 |
+
split_respect_sentence_boundary=True,
|
15 |
+
split_overlap=40,
|
16 |
+
)
|
17 |
+
|
18 |
+
return WebRetriever(
|
19 |
+
api_key=os.environ['SERPERDEV_API_KEY'],
|
20 |
+
allowed_domains=['docs.haystack.deepset.ai'],
|
21 |
+
mode='preprocessed_documents',
|
22 |
+
preprocessor=preprocessor,
|
23 |
+
top_search_results=40,
|
24 |
+
top_k=20,
|
25 |
+
)
|