SiraH commited on
Commit
0efb16d
1 Parent(s): 4c5f2fb

change method to import from folder

Browse files
Files changed (1) hide show
  1. app.py +70 -76
app.py CHANGED
@@ -202,26 +202,27 @@ def load_embeddings():
202
  return embeddings
203
 
204
  def main():
205
- # msgs = StreamlitChatMessageHistory(key="langchain_messages")
206
- # print(msgs)
207
- # if "messages" not in st.session_state:
208
- # st.session_state.messages = []
 
209
 
210
  # DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
211
  st.header("DOCUMENT QUESTION ANSWERING IS2")
212
- # directory = "data"
213
- # data_dir = UploadDoc(directory).create_document()
214
- # data.extend(data_dir)
215
 
216
- # #create vector from upload
217
- # if len(data) > 0 :
218
- # sp_docs = split_docs(documents = data)
219
- # st.write(f"This document have {len(sp_docs)} chunks")
220
- # embeddings = load_embeddings()
221
- # with st.spinner('Wait for create vector'):
222
- # db = FAISS.from_documents(sp_docs, embeddings)
223
- # # db.save_local(DB_FAISS_UPLOAD_PATH)
224
- # # st.write(f"Your model is already store in {DB_FAISS_UPLOAD_PATH}")
225
 
226
  llm = load_llama2_llamaCpp()
227
  qa_prompt = set_custom_prompt()
@@ -229,38 +230,31 @@ def main():
229
  #memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
230
  #doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
231
  #question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
232
- embeddings = load_embeddings()
233
 
234
 
235
- uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
236
- print(uploaded_file)
237
- if uploaded_file is not None:
238
- pdf_reader = PdfReader(uploaded_file)
239
- text = ""
240
- for page in pdf_reader.pages:
241
- text += page.extract_text()
242
- print(text)
243
- db = FAISS.from_texts(text, embeddings)
244
 
245
- memory = ConversationBufferMemory(memory_key="chat_history",
246
- return_messages=True,
247
- input_key="query",
248
- output_key="result")
249
- qa_chain = RetrievalQA.from_chain_type(
250
- llm = llm,
251
- chain_type = "stuff",
252
- retriever = db.as_retriever(search_kwargs = {'k':3}),
253
- return_source_documents = True,
254
- memory = memory,
255
- chain_type_kwargs = {"prompt":qa_prompt})
256
 
257
- query = st.text_input("ASK ABOUT THE DOCS:")
258
- if query:
259
- start = time.time()
260
- response = qa_chain({'query': query})
261
- st.write(response["result"])
262
- end = time.time()
263
- st.write("Respone time:",int(end-start),"sec")
264
 
265
  # qa_chain = ConversationalRetrievalChain(
266
  # retriever =db.as_retriever(search_kwargs={'k':2}),
@@ -272,44 +266,44 @@ def main():
272
  # #get_chat_history=lambda h :h
273
  # )
274
 
275
- # for message in st.session_state.messages:
276
- # with st.chat_message(message["role"]):
277
- # st.markdown(message["content"])
278
 
279
- # # Accept user input
280
- # if query := st.chat_input("What is up?"):
281
- # # Display user message in chat message container
282
- # with st.chat_message("user"):
283
- # st.markdown(query)
284
- # # Add user message to chat history
285
- # st.session_state.messages.append({"role": "user", "content": query})
286
 
287
- # start = time.time()
288
 
289
- # response = qa_chain({'query': query})
290
 
291
- # # url_list = set([i.metadata['source'] for i in response['source_documents']])
292
- # #print(f"condensed quesion : {question_generator.run({'chat_history': response['chat_history'], 'question' : query})}")
293
-
294
- # with st.chat_message("assistant"):
295
- # st.markdown(response['result'])
296
-
297
- # end = time.time()
298
- # st.write("Respone time:",int(end-start),"sec")
299
- # print(response)
300
-
301
- # # Add assistant response to chat history
302
- # st.session_state.messages.append({"role": "assistant", "content": response['result']})
303
 
304
- # # with st.expander("See the related documents"):
305
- # # for count, url in enumerate(url_list):
306
- # # #url_reg = regex_source(url)
307
- # # st.write(str(count+1)+":", url)
308
 
309
- # clear_button = st.button("Start new convo")
310
- # if clear_button :
311
- # st.session_state.messages = []
312
- # qa_chain.memory.chat_memory.clear()
313
 
314
 
315
  if __name__ == '__main__':
 
202
  return embeddings
203
 
204
  def main():
205
+ data = []
206
+ msgs = StreamlitChatMessageHistory(key="langchain_messages")
207
+ print(msgs)
208
+ if "messages" not in st.session_state:
209
+ st.session_state.messages = []
210
 
211
  # DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
212
  st.header("DOCUMENT QUESTION ANSWERING IS2")
213
+ directory = "data"
214
+ data_dir = UploadDoc(directory).create_document()
215
+ data.extend(data_dir)
216
 
217
+ #create vector from upload
218
+ if len(data) > 0 :
219
+ sp_docs = split_docs(documents = data)
220
+ st.write(f"This document have {len(sp_docs)} chunks")
221
+ embeddings = load_embeddings()
222
+ with st.spinner('Wait for create vector'):
223
+ db = FAISS.from_documents(sp_docs, embeddings)
224
+ # db.save_local(DB_FAISS_UPLOAD_PATH)
225
+ # st.write(f"Your model is already store in {DB_FAISS_UPLOAD_PATH}")
226
 
227
  llm = load_llama2_llamaCpp()
228
  qa_prompt = set_custom_prompt()
 
230
  #memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
231
  #doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
232
  #question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
233
+ #embeddings = load_embeddings()
234
 
235
 
236
+ # uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
237
+ # print(uploaded_file)
238
+ # if uploaded_file is not None:
239
+ # pdf_reader = PdfReader(uploaded_file)
240
+ # text = ""
241
+ # for page in pdf_reader.pages:
242
+ # text += page.extract_text()
243
+ # print(text)
244
+ # db = FAISS.from_texts(text, embeddings)
245
 
246
+ memory = ConversationBufferMemory(memory_key="chat_history",
247
+ return_messages=True,
248
+ input_key="query",
249
+ output_key="result")
250
+ qa_chain = RetrievalQA.from_chain_type(
251
+ llm = llm,
252
+ chain_type = "stuff",
253
+ retriever = db.as_retriever(search_kwargs = {'k':3}),
254
+ return_source_documents = True,
255
+ memory = memory,
256
+ chain_type_kwargs = {"prompt":qa_prompt})
257
 
 
 
 
 
 
 
 
258
 
259
  # qa_chain = ConversationalRetrievalChain(
260
  # retriever =db.as_retriever(search_kwargs={'k':2}),
 
266
  # #get_chat_history=lambda h :h
267
  # )
268
 
269
+ for message in st.session_state.messages:
270
+ with st.chat_message(message["role"]):
271
+ st.markdown(message["content"])
272
 
273
+ # Accept user input
274
+ if query := st.chat_input("What is up?"):
275
+ # Display user message in chat message container
276
+ with st.chat_message("user"):
277
+ st.markdown(query)
278
+ # Add user message to chat history
279
+ st.session_state.messages.append({"role": "user", "content": query})
280
 
281
+ start = time.time()
282
 
283
+ response = qa_chain({'query': query})
284
 
285
+ # url_list = set([i.metadata['source'] for i in response['source_documents']])
286
+ #print(f"condensed quesion : {question_generator.run({'chat_history': response['chat_history'], 'question' : query})}")
287
+
288
+ with st.chat_message("assistant"):
289
+ st.markdown(response['result'])
290
+
291
+ end = time.time()
292
+ st.write("Respone time:",int(end-start),"sec")
293
+ print(response)
294
+
295
+ # Add assistant response to chat history
296
+ st.session_state.messages.append({"role": "assistant", "content": response['result']})
297
 
298
+ # with st.expander("See the related documents"):
299
+ # for count, url in enumerate(url_list):
300
+ # #url_reg = regex_source(url)
301
+ # st.write(str(count+1)+":", url)
302
 
303
+ clear_button = st.button("Start new convo")
304
+ if clear_button :
305
+ st.session_state.messages = []
306
+ qa_chain.memory.chat_memory.clear()
307
 
308
 
309
  if __name__ == '__main__':