Spaces:
Sleeping
Sleeping
change to upload from streamlit
Browse files
app.py
CHANGED
@@ -201,19 +201,28 @@ def main():
|
|
201 |
data = []
|
202 |
# DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
|
203 |
st.header("DOCUMENT QUESTION ANSWERING IS2")
|
204 |
-
directory = "data"
|
205 |
-
data_dir = UploadDoc(directory).create_document()
|
206 |
-
data.extend(data_dir)
|
207 |
|
208 |
-
#create vector from upload
|
209 |
-
if len(data) > 0 :
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
embeddings = load_embeddings()
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
|
|
217 |
|
218 |
llm = load_llama2_llamaCpp()
|
219 |
qa_prompt = set_custom_prompt()
|
@@ -221,52 +230,49 @@ def main():
|
|
221 |
#memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
222 |
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
|
223 |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
for message in st.session_state.messages:
|
235 |
-
with st.chat_message(message["role"]):
|
236 |
-
st.markdown(message["content"])
|
237 |
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
with st.chat_message("user"):
|
242 |
-
st.markdown(query)
|
243 |
-
# Add user message to chat history
|
244 |
-
st.session_state.messages.append({"role": "user", "content": query})
|
245 |
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
-
|
249 |
|
250 |
-
|
251 |
-
#print(f"condensed quesion : {question_generator.run({'chat_history': response['chat_history'], 'question' : query})}")
|
252 |
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
# Add assistant response to chat history
|
260 |
-
st.session_state.messages.append({"role": "assistant", "content": response['answer']})
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
|
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
270 |
|
271 |
clear_button = st.button("Start new convo")
|
272 |
if clear_button :
|
|
|
201 |
data = []
|
202 |
# DB_FAISS_UPLOAD_PATH = "vectorstores/db_faiss"
|
203 |
st.header("DOCUMENT QUESTION ANSWERING IS2")
|
204 |
+
# directory = "data"
|
205 |
+
# data_dir = UploadDoc(directory).create_document()
|
206 |
+
# data.extend(data_dir)
|
207 |
|
208 |
+
# #create vector from upload
|
209 |
+
# if len(data) > 0 :
|
210 |
+
# sp_docs = split_docs(documents = data)
|
211 |
+
# st.write(f"This document have {len(sp_docs)} chunks")
|
212 |
+
# embeddings = load_embeddings()
|
213 |
+
# with st.spinner('Wait for create vector'):
|
214 |
+
# db = FAISS.from_documents(sp_docs, embeddings)
|
215 |
+
# # db.save_local(DB_FAISS_UPLOAD_PATH)
|
216 |
+
# # st.write(f"Your model is already store in {DB_FAISS_UPLOAD_PATH}")
|
217 |
+
uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
|
218 |
+
print(uploaded_file)
|
219 |
+
if uploaded_file is not None:
|
220 |
embeddings = load_embeddings()
|
221 |
+
pdf_reader = PdfReader(uploaded_file)
|
222 |
+
text = ""
|
223 |
+
for page in pdf_reader.pages:
|
224 |
+
text += page.extract_text()
|
225 |
+
db = FAISS.from_texts(text, embeddings)
|
226 |
|
227 |
llm = load_llama2_llamaCpp()
|
228 |
qa_prompt = set_custom_prompt()
|
|
|
230 |
#memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
231 |
doc_chain = load_qa_chain(llm, chain_type="stuff", prompt = qa_prompt)
|
232 |
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
233 |
+
if db is not None :
|
234 |
+
qa_chain = ConversationalRetrievalChain(
|
235 |
+
retriever =db.as_retriever(search_type="similarity_score_threshold", search_kwargs={'k':3, "score_threshold": 0.7}),
|
236 |
+
question_generator=question_generator,
|
237 |
+
#condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
238 |
+
combine_docs_chain=doc_chain,
|
239 |
+
return_source_documents=True,
|
240 |
+
memory = memory,
|
241 |
+
#get_chat_history=lambda h :h
|
242 |
+
)
|
|
|
|
|
|
|
243 |
|
244 |
+
for message in st.session_state.messages:
|
245 |
+
with st.chat_message(message["role"]):
|
246 |
+
st.markdown(message["content"])
|
|
|
|
|
|
|
|
|
247 |
|
248 |
+
# Accept user input
|
249 |
+
if query := st.chat_input("What is up?"):
|
250 |
+
# Display user message in chat message container
|
251 |
+
with st.chat_message("user"):
|
252 |
+
st.markdown(query)
|
253 |
+
# Add user message to chat history
|
254 |
+
st.session_state.messages.append({"role": "user", "content": query})
|
255 |
|
256 |
+
start = time.time()
|
257 |
|
258 |
+
response = qa_chain({'question': query})
|
|
|
259 |
|
260 |
+
url_list = set([i.metadata['source'] for i in response['source_documents']])
|
261 |
+
#print(f"condensed quesion : {question_generator.run({'chat_history': response['chat_history'], 'question' : query})}")
|
262 |
+
|
263 |
+
with st.chat_message("assistant"):
|
264 |
+
st.markdown(response['answer'])
|
|
|
|
|
|
|
265 |
|
266 |
+
end = time.time()
|
267 |
+
st.write("Respone time:",int(end-start),"sec")
|
268 |
+
|
269 |
+
# Add assistant response to chat history
|
270 |
+
st.session_state.messages.append({"role": "assistant", "content": response['answer']})
|
271 |
|
272 |
+
with st.expander("See the related documents"):
|
273 |
+
for count, url in enumerate(url_list):
|
274 |
+
#url_reg = regex_source(url)
|
275 |
+
st.write(str(count+1)+":", url)
|
276 |
|
277 |
clear_button = st.button("Start new convo")
|
278 |
if clear_button :
|