eliujl commited on
Commit
eef7615
1 Parent(s): 7ee9dd1

Updated with local LLMs

Browse files

Updated with options of local Llama2 and Mistral models, using a local embedding model. Requires to pre-download the LLM models to a local folder. To be further improved.

Files changed (1) hide show
  1. app.py +127 -31
app.py CHANGED
@@ -6,9 +6,12 @@ from langchain.document_loaders import (
6
  UnstructuredFileLoader,
7
  )
8
  from langchain.embeddings.openai import OpenAIEmbeddings
 
9
  from langchain.chat_models import ChatOpenAI
10
  from langchain.vectorstores import Pinecone, Chroma
11
  from langchain.chains import ConversationalRetrievalChain
 
 
12
  import os
13
  import langchain
14
  import pinecone
@@ -19,6 +22,10 @@ import json
19
  OPENAI_API_KEY = ''
20
  PINECONE_API_KEY = ''
21
  PINECONE_API_ENV = ''
 
 
 
 
22
  langchain.verbose = False
23
 
24
 
@@ -112,8 +119,15 @@ def setup_docsearch(use_pinecone, pinecone_index_name, embeddings, chroma_collec
112
  index_client = pinecone.Index(pinecone_index_name)
113
  # Get the index information
114
  index_info = index_client.describe_index_stats()
115
- namespace_name = ''
116
- n_texts = index_info['namespaces'][namespace_name]['vector_count']
 
 
 
 
 
 
 
117
  else:
118
  raise ValueError('''Cannot find the specified Pinecone index.
119
  Create one in pinecone.io or using, e.g.,
@@ -132,14 +146,69 @@ def get_response(query, chat_history, CRqa):
132
  result = CRqa({"question": query, "chat_history": chat_history})
133
  return result['answer'], result['source_documents']
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
137
- # Set up OpenAI embeddings
138
- embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
139
- # Use Open AI LLM with gpt-3.5-turbo or gpt-4.
140
- # Set the temperature to be 0 if you do not want it to make up things
141
- llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
142
- openai_api_key=OPENAI_API_KEY)
 
 
 
 
 
 
143
  return embeddings, llm
144
 
145
 
@@ -166,38 +235,53 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
166
  latest_chats = []
167
  reply = ''
168
  source = ''
 
169
  # Get user input of whether to use Pinecone or not
170
  col1, col2, col3 = st.columns([1, 1, 1])
171
  # create the radio buttons and text input fields
172
  with col1:
173
- r_pinecone = st.radio('Use Pinecone?', ('Yes', 'No'))
 
 
 
 
 
 
 
 
 
174
  r_ingest = st.radio(
175
  'Ingest file(s)?', ('Yes', 'No'))
176
- r_llm = st.multiselect(
177
- 'LLM:', ['gpt-3.5-turbo', 'gpt-4'], 'gpt-3.5-turbo')
178
- r_llm = r_llm[0]
 
179
  with col2:
180
- OPENAI_API_KEY = st.text_input(
181
- "OpenAI API key:", type="password")
182
  temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
183
  k_sources = st.slider('# source(s) to print out', 0, 20, 2)
184
- with col3:
185
- if OPENAI_API_KEY:
186
- embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm)
187
- if r_pinecone.lower() == 'yes':
188
- use_pinecone = True
189
- PINECONE_API_KEY = st.text_input(
190
- "Pinecone API key:", type="password")
191
- PINECONE_API_ENV = st.text_input(
192
- "Pinecone API env:", type="password")
193
- pinecone_index_name = st.text_input('Pinecone index:')
194
- pinecone.init(api_key=PINECONE_API_KEY,
195
- environment=PINECONE_API_ENV)
196
  else:
197
- use_pinecone = False
198
- chroma_collection_name = st.text_input(
199
- '''Chroma collection name of 3-63 characters:''')
200
- persist_directory = "./vectorstore"
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  if pinecone_index_name or chroma_collection_name:
203
  session_name = pinecone_index_name + chroma_collection_name
@@ -220,8 +304,19 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
220
  # number of sources (split-documents when ingesting files); default is 4
221
  k = min([20, n_texts])
222
  retriever = setup_retriever(docsearch, k)
 
 
 
 
 
223
  CRqa = ConversationalRetrievalChain.from_llm(
224
- llm, retriever=retriever, return_source_documents=True)
 
 
 
 
 
 
225
 
226
  st.title(':blue[Chatbot]')
227
  # Get user input
@@ -239,6 +334,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
239
  chat_history = [(user, bot)
240
  for user, bot in chat_history]
241
  reply, source = get_response(query, chat_history, CRqa)
 
242
  # Update the chat history with the user input and system response
243
  chat_history.append(('User', query))
244
  chat_history.append(('Bot', reply))
 
6
  UnstructuredFileLoader,
7
  )
8
  from langchain.embeddings.openai import OpenAIEmbeddings
9
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
10
  from langchain.chat_models import ChatOpenAI
11
  from langchain.vectorstores import Pinecone, Chroma
12
  from langchain.chains import ConversationalRetrievalChain
13
+ from langchain.prompts import PromptTemplate
14
+ from langchain.memory import ConversationBufferMemory
15
  import os
16
  import langchain
17
  import pinecone
 
22
  OPENAI_API_KEY = ''
23
  PINECONE_API_KEY = ''
24
  PINECONE_API_ENV = ''
25
+ gpt3p5 = 'gpt-3.5-turbo-1106'
26
+ gpt4 = 'gpt-4-1106-preview'
27
+ gpt_local_mistral = 'mistral_7b'
28
+ gpt_local_llama = 'llama_13b'
29
  langchain.verbose = False
30
 
31
 
 
119
  index_client = pinecone.Index(pinecone_index_name)
120
  # Get the index information
121
  index_info = index_client.describe_index_stats()
122
+ # namespace_name = ''
123
+ # if index_info is not None:
124
+ # print(index_info)
125
+ # print(index_info['namespaces'][namespace_name]['vector_count'])
126
+ # print(index_info['total_vector_count'])
127
+ # else:
128
+ # print("Index information is not available.")
129
+ # n_texts = index_info['namespaces'][namespace_name]['vector_count']
130
+ n_texts = index_info['total_vector_count']
131
  else:
132
  raise ValueError('''Cannot find the specified Pinecone index.
133
  Create one in pinecone.io or using, e.g.,
 
146
  result = CRqa({"question": query, "chat_history": chat_history})
147
  return result['answer'], result['source_documents']
148
 
149
+ @st.cache_resource()
150
+ def use_local_llm(r_llm):
151
+ from langchain.llms import LlamaCpp
152
+ from langchain.callbacks.manager import CallbackManager
153
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
154
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
155
+ if r_llm == gpt_local_mistral:
156
+ gpt_local = 'openhermes-2-mistral-7b.Q8_0.gguf'
157
+ else:
158
+ gpt_local = 'llama-2-13b-chat.Q8_0.gguf'
159
+ llm = LlamaCpp(
160
+ model_path='~//models//'+gpt_local,
161
+ temperature=0.0,
162
+ n_batch=300,
163
+ n_ctx=4000,
164
+ max_tokens=2000,
165
+ n_gpu_layers=10,
166
+ n_threads=12,
167
+ top_p=1,
168
+ repeat_penalty=1.15,
169
+ verbose=False,
170
+ callback_manager=callback_manager,
171
+ streaming=True,
172
+ # verbose=True, # Verbose is required to pass to the callback manager
173
+ )
174
+ return llm
175
+
176
+
177
+ def setup_prompt():
178
+
179
+ template = """Answer the question in your own words as truthfully as possible from the context given to you.
180
+ Supply sufficient information, evidence, reasoning, source from the context, etc., to justify your answer with details and logic.
181
+ Think step by step and do not jump to conclusion during your reasoning at the beginning.
182
+ Sometimes user's question may appear to be directly related to the context but may still be indirectly related,
183
+ so try your best to understand the question based on the context and chat history.
184
+ If questions are asked where there is no relevant context available,
185
+ respond using out-of-context knowledge with
186
+ "This question does not seem to be relevant to the documents. I am trying to explore knowledge outside the context."
187
+
188
+ Context: {context}
189
+
190
+ {chat_history}
191
+ User: {question}
192
+ Bot:"""
193
+
194
+ prompt = PromptTemplate(
195
+ input_variables=["context", "chat_history", "question"], template=template
196
+ )
197
+ return prompt
198
 
199
  def setup_em_llm(OPENAI_API_KEY, temperature, r_llm):
200
+ if r_llm == gpt3p5 or r_llm == gpt4:
201
+ # Set up OpenAI embeddings
202
+ embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
203
+ # Use Open AI LLM with gpt-3.5-turbo or gpt-4.
204
+ # Set the temperature to be 0 if you do not want it to make up things
205
+ llm = ChatOpenAI(temperature=temperature, model_name=r_llm, streaming=True,
206
+ openai_api_key=OPENAI_API_KEY)
207
+ else:
208
+ #em_model_name = 'hkunlp/instructor-xl'
209
+ em_model_name='sentence-transformers/all-mpnet-base-v2'
210
+ embeddings = HuggingFaceEmbeddings(model_name=em_model_name)
211
+ llm = use_local_llm(r_llm)
212
  return embeddings, llm
213
 
214
 
 
235
  latest_chats = []
236
  reply = ''
237
  source = ''
238
+ LLMs = [gpt3p5, gpt4, gpt_local_llama, gpt_local_mistral]
239
  # Get user input of whether to use Pinecone or not
240
  col1, col2, col3 = st.columns([1, 1, 1])
241
  # create the radio buttons and text input fields
242
  with col1:
243
+ r_llm = st.multiselect('LLM:', LLMs, gpt3p5)
244
+ if not r_llm:
245
+ r_llm = gpt3p5
246
+ else:
247
+ r_llm = r_llm[0]
248
+ if r_llm == gpt3p5 or r_llm == gpt4:
249
+ use_openai = True
250
+ else:
251
+ use_openai = False
252
+ r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
253
  r_ingest = st.radio(
254
  'Ingest file(s)?', ('Yes', 'No'))
255
+ if r_pinecone == 'Pinecone (online)':
256
+ use_pinecone = True
257
+ else:
258
+ use_pinecone = False
259
  with col2:
 
 
260
  temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
261
  k_sources = st.slider('# source(s) to print out', 0, 20, 2)
262
+ if use_openai == True:
263
+ OPENAI_API_KEY = st.text_input(
264
+ "OpenAI API key:", type="password")
265
+ else:
266
+ OPENAI_API_KEY = ''
267
+ if use_pinecone == True:
268
+ st.write('Local GPT model (and local embedding model) is selected. Online vector store is selected.')
 
 
 
 
 
269
  else:
270
+ st.write('Local GPT model (and local embedding model) and local vector store are selected. All info remains local.')
271
+ embeddings, llm = setup_em_llm(OPENAI_API_KEY, temperature, r_llm)
272
+ with col3:
273
+ if use_pinecone == True:
274
+ PINECONE_API_KEY = st.text_input(
275
+ "Pinecone API key:", type="password")
276
+ PINECONE_API_ENV = st.text_input(
277
+ "Pinecone API env:", type="password")
278
+ pinecone_index_name = st.text_input('Pinecone index:')
279
+ pinecone.init(api_key=PINECONE_API_KEY,
280
+ environment=PINECONE_API_ENV)
281
+ else:
282
+ chroma_collection_name = st.text_input(
283
+ '''Chroma collection name of 3-63 characters:''')
284
+ persist_directory = "./vectorstore"
285
 
286
  if pinecone_index_name or chroma_collection_name:
287
  session_name = pinecone_index_name + chroma_collection_name
 
304
  # number of sources (split-documents when ingesting files); default is 4
305
  k = min([20, n_texts])
306
  retriever = setup_retriever(docsearch, k)
307
+
308
+ #prompt = setup_prompt()
309
+
310
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
311
+
312
  CRqa = ConversationalRetrievalChain.from_llm(
313
+ llm,
314
+ chain_type="stuff",
315
+ retriever=retriever,
316
+ memory=memory,
317
+ return_source_documents=True,
318
+ #combine_docs_chain_kwargs={'prompt': prompt},
319
+ )
320
 
321
  st.title(':blue[Chatbot]')
322
  # Get user input
 
334
  chat_history = [(user, bot)
335
  for user, bot in chat_history]
336
  reply, source = get_response(query, chat_history, CRqa)
337
+
338
  # Update the chat history with the user input and system response
339
  chat_history.append(('User', query))
340
  chat_history.append(('Bot', reply))