Upload 3 files
Browse files- Dockerfile +34 -0
- chatbot_app.py +293 -0
- requirements.txt +12 -0
Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Define the base image
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV EMBED_DEVICE_CHOICE="cpu" \
|
6 |
+
PYTHONUNBUFFERED=1
|
7 |
+
|
8 |
+
# Install system dependencies
|
9 |
+
RUN apt-get update && \
|
10 |
+
apt-get install -y curl && \
|
11 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
12 |
+
|
13 |
+
# Install Ollama
|
14 |
+
RUN curl -fsSL https://ollama.com/install.sh | sh
|
15 |
+
|
16 |
+
# Create app directory
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
# Copy application files
|
20 |
+
COPY . /app
|
21 |
+
|
22 |
+
# Install Python dependencies
|
23 |
+
COPY requirements.txt /app/requirements.txt
|
24 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
25 |
+
|
26 |
+
# Expose port for the Gradio app
|
27 |
+
EXPOSE 7860
|
28 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
29 |
+
|
30 |
+
# Pull the Llama model using Ollama
|
31 |
+
RUN ollama serve & sleep 10 && ollama run wangshenzhi/llama3-8b-chinese-chat-ollama-q4
|
32 |
+
|
33 |
+
# Start the Ollama server and the app
|
34 |
+
CMD ollama serve & python chatbot_app.py
|
chatbot_app.py
ADDED
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Imports
|
2 |
+
|
3 |
+
from langchain_community.document_loaders import HuggingFaceDatasetLoader
|
4 |
+
from langchain.text_splitter import CharacterTextSplitter
|
5 |
+
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
6 |
+
from langchain.vectorstores import Chroma
|
7 |
+
from langchain.memory import ConversationBufferMemory
|
8 |
+
from langchain_ollama import OllamaLLM
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.chains import ConversationalRetrievalChain
|
11 |
+
import textwrap
|
12 |
+
import gradio as gr
|
13 |
+
import langid
|
14 |
+
from iso639 import Lang
|
15 |
+
import os
|
16 |
+
|
17 |
+
# Set up Text Splitter
|
18 |
+
|
19 |
+
def setup_text_splitter(split_separator, split_chunk_size, split_chunk_overlap_size, split_length_function):
|
20 |
+
|
21 |
+
text_splitter = CharacterTextSplitter(
|
22 |
+
separator = split_separator,
|
23 |
+
chunk_size = split_chunk_size,
|
24 |
+
chunk_overlap = split_chunk_overlap_size,
|
25 |
+
length_function = split_length_function)
|
26 |
+
|
27 |
+
return text_splitter
|
28 |
+
|
29 |
+
# Load the external database for RAG and setting up Embedding
|
30 |
+
|
31 |
+
def load_and_process_data(dataset_name, page_content_column, text_splitter):
|
32 |
+
|
33 |
+
loader = HuggingFaceDatasetLoader(dataset_name, page_content_column)
|
34 |
+
data = loader.load()
|
35 |
+
split_data = text_splitter.split_documents(data)
|
36 |
+
|
37 |
+
return split_data
|
38 |
+
|
39 |
+
def setup_embedding(embedding_model_choice, embed_device_choice, embed_normalization_option):
|
40 |
+
|
41 |
+
hf_embeddings = HuggingFaceEmbeddings(model_name=embedding_model_choice,
|
42 |
+
model_kwargs = {'device': embed_device_choice},
|
43 |
+
encode_kwargs = {'normalize_embeddings': embed_normalization_option}
|
44 |
+
)
|
45 |
+
|
46 |
+
return hf_embeddings
|
47 |
+
|
48 |
+
def setup_vectordb_retriever(split_data, hf_embeddings, persist_directory_location, retrieve_k_choice, retrieve_search_type_choice):
|
49 |
+
|
50 |
+
vectordb = Chroma.from_documents(
|
51 |
+
documents=split_data,
|
52 |
+
embedding=hf_embeddings,
|
53 |
+
persist_directory=persist_directory_location
|
54 |
+
)
|
55 |
+
|
56 |
+
retriever = vectordb.as_retriever(search_kwargs={"k": retrieve_k_choice}, search_type=retrieve_search_type_choice)
|
57 |
+
|
58 |
+
return retriever
|
59 |
+
|
60 |
+
def setup_memory(memory_key_name, memory_input_name, memory_output_name, memory_return_message_option):
|
61 |
+
|
62 |
+
memory = ConversationBufferMemory(
|
63 |
+
memory_key = memory_key_name,
|
64 |
+
input_key = memory_input_name,
|
65 |
+
output_key = memory_output_name,
|
66 |
+
return_messages = memory_return_message_option
|
67 |
+
)
|
68 |
+
|
69 |
+
return memory
|
70 |
+
|
71 |
+
def setup_ollama_model(ollama_model_choice, ollama_temp):
|
72 |
+
|
73 |
+
llm_chosen = OllamaLLM(model = ollama_model_choice, temperature = ollama_temp)
|
74 |
+
|
75 |
+
return llm_chosen
|
76 |
+
|
77 |
+
def setup_prompt(base_prompt_template, prompt_input_list):
|
78 |
+
|
79 |
+
base_prompt = PromptTemplate(
|
80 |
+
template = base_prompt_template,
|
81 |
+
input_variables = prompt_input_list)
|
82 |
+
|
83 |
+
return base_prompt
|
84 |
+
|
85 |
+
def build_rag_chain(llm_chosen, retriever, memory, chain_return_source_option, chain_return_generate_quest_option, chain_verbose_option, base_prompt):
|
86 |
+
|
87 |
+
llm_with_rag_chain_and_memory = ConversationalRetrievalChain.from_llm(
|
88 |
+
llm = llm_chosen,
|
89 |
+
retriever = retriever,
|
90 |
+
memory = memory,
|
91 |
+
return_source_documents = chain_return_source_option,
|
92 |
+
return_generated_question = chain_return_generate_quest_option,
|
93 |
+
verbose = chain_verbose_option,
|
94 |
+
combine_docs_chain_kwargs = {'prompt': base_prompt}
|
95 |
+
)
|
96 |
+
|
97 |
+
return llm_with_rag_chain_and_memory
|
98 |
+
|
99 |
+
def wrap_text_preserve_newlines(text, width=110):
|
100 |
+
# Split the input text into lines based on newline characters
|
101 |
+
lines = text.split('\n')
|
102 |
+
|
103 |
+
# Wrap each line individually
|
104 |
+
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
|
105 |
+
|
106 |
+
# Join the wrapped lines back together using newline characters
|
107 |
+
wrapped_text = '\n'.join(wrapped_lines)
|
108 |
+
|
109 |
+
return wrapped_text
|
110 |
+
|
111 |
+
def format_response_with_source_and_memory(llm_response):
|
112 |
+
# Initialize an empty list to collect all parts of the output
|
113 |
+
output = []
|
114 |
+
|
115 |
+
# Add the answer
|
116 |
+
output.append('\n\n==================== Chatbot Answer:====================')
|
117 |
+
output.append(wrap_text_preserve_newlines(llm_response['answer']))
|
118 |
+
|
119 |
+
# Add sources
|
120 |
+
output.append('\n\n====================Other Relevant Information and Sources:====================')
|
121 |
+
for source in llm_response["source_documents"]:
|
122 |
+
output.append(source.metadata['question'])
|
123 |
+
output.append(source.page_content)
|
124 |
+
|
125 |
+
# Add history
|
126 |
+
output.append('\n\n====================Chat History:====================')
|
127 |
+
for history in llm_response['chat_history']:
|
128 |
+
output.append(history.content)
|
129 |
+
|
130 |
+
# Combine all parts into a single string and return
|
131 |
+
return '\n'.join(output)
|
132 |
+
|
133 |
+
def detect_language(input_string):
|
134 |
+
|
135 |
+
#detect the language
|
136 |
+
input_lang_code = langid.classify(input_string)[0]
|
137 |
+
|
138 |
+
#convert ISO 639 lang code to major language
|
139 |
+
input_language = Lang(input_lang_code).name
|
140 |
+
|
141 |
+
return input_language
|
142 |
+
|
143 |
+
def talk_to_chatbot(input_question):
|
144 |
+
|
145 |
+
input_language = detect_language(input_question)
|
146 |
+
|
147 |
+
if input_language != "English":
|
148 |
+
# print(f"Translating from {input_language} to English...")
|
149 |
+
input_question = llm_chosen.invoke(f"translate this {input_language} content to English: {input_question}")
|
150 |
+
|
151 |
+
# print("Retrieving Information...")
|
152 |
+
llm_response = llm_with_rag_chain_and_memory.invoke(input_question)
|
153 |
+
chatbot_answer = format_response_with_source_and_memory(llm_response)
|
154 |
+
|
155 |
+
if input_language != "English":
|
156 |
+
# print(f"translating from English to {input_language}...")
|
157 |
+
chatbot_answer = llm_chosen.invoke(f"translate this English content to {input_language}: {chatbot_answer}")
|
158 |
+
|
159 |
+
return chatbot_answer
|
160 |
+
|
161 |
+
def clear_chat_history(clear_memory=True):
|
162 |
+
if clear_memory:
|
163 |
+
return memory.clear()
|
164 |
+
|
165 |
+
# Set Variables
|
166 |
+
|
167 |
+
dataset_name = "MakTek/Customer_support_faqs_dataset"
|
168 |
+
page_content_column = "answer"
|
169 |
+
split_separator = "\n"
|
170 |
+
split_chunk_size = 1000
|
171 |
+
split_chunk_overlap_size = 150
|
172 |
+
split_length_function = len
|
173 |
+
|
174 |
+
embedding_model_choice = "hkunlp/instructor-large"
|
175 |
+
embed_device_choice = os.getenv("EMBED_DEVICE_CHOICE", "cpu")
|
176 |
+
# embed_device_choice = "cpu"
|
177 |
+
# embed_device_choice = "cuda"
|
178 |
+
# embed_device_choice = "mps"
|
179 |
+
embed_normalization_option = True
|
180 |
+
|
181 |
+
persist_directory_location = 'docs/chroma/'
|
182 |
+
retrieve_k_choice = 3
|
183 |
+
retrieve_search_type_choice = "mmr"
|
184 |
+
|
185 |
+
memory_key_name = "chat_history"
|
186 |
+
memory_input_name = "question"
|
187 |
+
memory_output_name = "answer"
|
188 |
+
memory_return_message_option = True
|
189 |
+
|
190 |
+
#ollama_model_choice = "llama3.2"
|
191 |
+
ollama_model_choice = "wangshenzhi/llama3-8b-chinese-chat-ollama-q4"
|
192 |
+
ollama_temp = 0.1
|
193 |
+
|
194 |
+
base_prompt_template = """System: You are a ABC-Company customer service representative.
|
195 |
+
\n\nInstruction: Answer the customer's question based on following context and chat history if you know the answer. Otherwise, end the answer with 'I am not sure about the answer, please contact our human service for assistance. Thank You!'.
|
196 |
+
\n\nContext: {context}
|
197 |
+
\n\nChat history: {chat_history}
|
198 |
+
\n\nQuestion: {question}
|
199 |
+
\n\nOutput Answer: """
|
200 |
+
prompt_input_list = ["context", "question", "chat_history"]
|
201 |
+
|
202 |
+
chain_return_source_option = True
|
203 |
+
chain_return_generate_quest_option = True
|
204 |
+
chain_verbose_option = False
|
205 |
+
|
206 |
+
text_splitter = setup_text_splitter(split_separator, split_chunk_size, split_chunk_overlap_size, split_length_function)
|
207 |
+
split_data = load_and_process_data(dataset_name, page_content_column, text_splitter)
|
208 |
+
hf_embeddings = setup_embedding(embedding_model_choice, embed_device_choice, embed_normalization_option)
|
209 |
+
retriever = setup_vectordb_retriever(split_data, hf_embeddings, persist_directory_location, retrieve_k_choice, retrieve_search_type_choice)
|
210 |
+
memory = setup_memory(memory_key_name, memory_input_name, memory_output_name, memory_return_message_option)
|
211 |
+
llm_chosen = setup_ollama_model(ollama_model_choice, ollama_temp)
|
212 |
+
base_prompt = setup_prompt(base_prompt_template, prompt_input_list)
|
213 |
+
llm_with_rag_chain_and_memory = build_rag_chain(llm_chosen, retriever, memory, chain_return_source_option, chain_return_generate_quest_option, chain_verbose_option, base_prompt)
|
214 |
+
|
215 |
+
memory.clear()
|
216 |
+
|
217 |
+
#memory.chat_memory.messages
|
218 |
+
|
219 |
+
"""# Gradio Application Build"""
|
220 |
+
|
221 |
+
set_gradio_theme = gr.themes.Glass(primary_hue="orange", secondary_hue="gray").set(
|
222 |
+
button_primary_background_fill="orange",
|
223 |
+
button_primary_background_fill_hover="green",
|
224 |
+
)
|
225 |
+
|
226 |
+
with gr.Blocks(theme=set_gradio_theme) as demo:
|
227 |
+
|
228 |
+
gr.Markdown(
|
229 |
+
"""
|
230 |
+
# Welcome visitor to the our Multilingual Customer Service Chatbot!
|
231 |
+
## I am a demo. Feel free to ask me any questions related to your order and our company in your own language.
|
232 |
+
### I can speak most major languages such as English, Chinese, French, Spanish, Japanese etc...
|
233 |
+
|
234 |
+
### I am built using Ollama-llama3 llm model fine-tuned by Wangshenzhi and Langchain for RAG (Retrieval-Augmented Generation).
|
235 |
+
### For technical details, please see info at the bottom of the page.
|
236 |
+
|
237 |
+
Start talking to me by typing below.
|
238 |
+
|
239 |
+
Please note that:
|
240 |
+
- The output sources and chat-history are mostly for debug and monitor purposes during development. It is for making sure the chatbot is responding properly.
|
241 |
+
- The application is running on GPU, so the response time is pretty fast, but multilingual processing can take slightly longer than English.
|
242 |
+
""")
|
243 |
+
|
244 |
+
question = gr.Textbox(label="Ask me a question (You can ask in your own language!)", placeholder="Can I request a refund?")
|
245 |
+
send_btn = gr.Button("Send Question")
|
246 |
+
answer = gr.Textbox(label="Chatbot response", lines=20)
|
247 |
+
|
248 |
+
send_btn.click(fn=talk_to_chatbot, inputs=question, outputs=answer, api_name="customer_service_chatbot")
|
249 |
+
|
250 |
+
gr.Markdown(
|
251 |
+
"""
|
252 |
+
If clear chat history, the next query's chat history will be emptined and refreshed.
|
253 |
+
""")
|
254 |
+
clear_btn = gr.Button("Clear Chat History")
|
255 |
+
clear_btn.click(fn=clear_chat_history)
|
256 |
+
|
257 |
+
|
258 |
+
gr.Markdown(
|
259 |
+
"""
|
260 |
+
## Chatbot Technical Details:
|
261 |
+
|
262 |
+
#### Model: llama3-8b-chinese-chat-ollama-q4(8B parameters)
|
263 |
+
#### Dataset: Hugging Face Hub "MakTek/Customer_support_faqs_dataset"
|
264 |
+
#### Embedding: Hugging Face Hub "hkunlp/instructor-large"
|
265 |
+
#### Vector Database: Chroma
|
266 |
+
#### Retrieval Search Type: Maximal Marginal Relevance (MMR)
|
267 |
+
#### Prompt:
|
268 |
+
LLM is told that it is a customer representative from ABC-company and to use chat history and RAG context to answer questions
|
269 |
+
If it does not know the answer, it is told to say it does not know and tell user to contact human service
|
270 |
+
#### Memory:
|
271 |
+
Chat memory is fed into the input so that the chatbot is aware of the context of the conversation.
|
272 |
+
However, as the chat history gets long, it becomes confused. It is a limitation of this simple demo.
|
273 |
+
#### Temperature: 0.1
|
274 |
+
The chatbot is not encouraged to be creative but use factual answers provided in retrieval results.
|
275 |
+
|
276 |
+
#### Good Testing Question Example:
|
277 |
+
- Who are you?
|
278 |
+
- The answer should show the role assigned in prompt is working.
|
279 |
+
- How do I go to Mars?
|
280 |
+
- The answer should show that when asked about things it doesn't know or irrelevant, it knows it should refer users to human service.
|
281 |
+
- Can I talk to someone? Followed by next query: When can I do that?
|
282 |
+
- This question pair should show that the chatbot has memory and it can understand what it means by "that".
|
283 |
+
- Other typical customer support questions:
|
284 |
+
- Can I request a refund? (or in chinese: 我可以申請退款嗎?)
|
285 |
+
- How do I track my order? (or in chinese: 怎樣查找我的訂單?)
|
286 |
+
|
287 |
+
""")
|
288 |
+
|
289 |
+
demo.launch()
|
290 |
+
|
291 |
+
# demo.close()
|
292 |
+
|
293 |
+
#
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dependencies:
|
2 |
+
|
3 |
+
gradio==4.44.1
|
4 |
+
langchain_community
|
5 |
+
langchain
|
6 |
+
langchain_huggingface
|
7 |
+
langchain_ollama
|
8 |
+
chromadb
|
9 |
+
datasets
|
10 |
+
iso639-lang
|
11 |
+
langid
|
12 |
+
sentence-transformers
|