annas4421 commited on
Commit
ca63198
1 Parent(s): 71f7666

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +108 -0
  2. htmlTemplates.py +45 -0
  3. requirements.txt +13 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing dependencies
2
+ from dotenv import load_dotenv
3
+ import streamlit as st
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import faiss
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from htmlTemplates import css, bot_template, user_template
13
+ from langchain.embeddings import openai
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+
16
+
17
+ # creating custom template to guide llm model
18
+ custom_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
19
+ Chat History:
20
+ {chat_history}
21
+ Follow Up Input: {question}
22
+ Standalone question:"""
23
+
24
+ CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)
25
+
26
+ # extracting text from pdf
27
+ def get_pdf_text(docs):
28
+ text=""
29
+ for pdf in docs:
30
+ pdf_reader=PdfReader(pdf)
31
+ for page in pdf_reader.pages:
32
+ text+=page.extract_text()
33
+ return text
34
+
35
+ # converting text to chunks
36
+ def get_chunks(raw_text):
37
+ text_splitter=CharacterTextSplitter(separator="\n",
38
+ chunk_size=1000,
39
+ chunk_overlap=200,
40
+ length_function=len)
41
+ chunks=text_splitter.split_text(raw_text)
42
+ return chunks
43
+
44
+ # using all-MiniLm embeddings model and faiss to get vectorstore
45
+ def get_vectorstore(chunks):
46
+ embeddings=OpenAIEmbeddings()
47
+ vectorstore=faiss.FAISS.from_texts(texts=chunks,embedding=embeddings)
48
+ return vectorstore
49
+
50
+ # generating conversation chain
51
+ def get_conversationchain(vectorstore):
52
+ llm=ChatOpenAI(temperature=0.2,model_name='gpt-4-turbo')
53
+ memory = ConversationBufferMemory(memory_key='chat_history',
54
+ return_messages=True,
55
+ output_key='answer') # using conversation buffer memory to hold past information
56
+ conversation_chain = ConversationalRetrievalChain.from_llm(
57
+ llm=llm,
58
+ retriever=vectorstore.as_retriever(),
59
+ condense_question_prompt=CUSTOM_QUESTION_PROMPT,
60
+ memory=memory)
61
+ return conversation_chain
62
+
63
+ # generating response from user queries and displaying them accordingly
64
+ def handle_question(question):
65
+ response=st.session_state.conversation({'question': question})
66
+ st.session_state.chat_history=response["chat_history"]
67
+ for i,msg in enumerate(st.session_state.chat_history):
68
+ if i%2==0:
69
+ st.write(user_template.replace("{{MSG}}",msg.content,),unsafe_allow_html=True)
70
+ else:
71
+ st.write(bot_template.replace("{{MSG}}",msg.content),unsafe_allow_html=True)
72
+
73
+
74
+ def main():
75
+ load_dotenv()
76
+ st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:")
77
+ st.write(css,unsafe_allow_html=True)
78
+ if "conversation" not in st.session_state:
79
+ st.session_state.conversation=None
80
+
81
+ if "chat_history" not in st.session_state:
82
+ st.session_state.chat_history=None
83
+
84
+ st.header("Chat with multiple PDFs :books:")
85
+ question=st.text_input("Ask question from your document:")
86
+ if question:
87
+ handle_question(question)
88
+ with st.sidebar:
89
+ st.subheader("Your documents")
90
+ docs=st.file_uploader("Upload your PDF here and click on 'Process'",accept_multiple_files=True)
91
+ if st.button("Process"):
92
+ with st.spinner("Processing"):
93
+
94
+ #get the pdf
95
+ raw_text=get_pdf_text(docs)
96
+
97
+ #get the text chunks
98
+ text_chunks=get_chunks(raw_text)
99
+
100
+ #create vectorstore
101
+ vectorstore=get_vectorstore(text_chunks)
102
+
103
+ #create conversation chain
104
+ st.session_state.conversation=get_conversationchain(vectorstore)
105
+
106
+
107
+ if __name__ == '__main__':
108
+ main()
htmlTemplates.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ align: right,
8
+ background-color: #2b313e
9
+ }
10
+ .chat-message.bot {
11
+ background-color: #475063
12
+ }
13
+ .chat-message .avatar {
14
+ width: 20%;
15
+ }
16
+ .chat-message .avatar img {
17
+ max-width: 78px;
18
+ max-height: 78px;
19
+ border-radius: 50%;
20
+ object-fit: cover;
21
+ }
22
+ .chat-message .message {
23
+ width: 80%;
24
+ padding: 0 1.5rem;
25
+ color: #fff;
26
+ }
27
+ '''
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://cdn-icons-png.flaticon.com/512/6134/6134346.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="message" style="text-align:right">{{MSG}}</div>
40
+ <div class="avatar">
41
+ <img src="https://png.pngtree.com/png-vector/20190321/ourmid/pngtree-vector-users-icon-png-image_856952.jpg">
42
+ </div>
43
+
44
+ </div>
45
+ '''
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ pypdf4
3
+ chromadb
4
+ streamlit
5
+ einops
6
+ langchain-together
7
+ faiss-gpu
8
+ sentence-transformers
9
+ htmltemplate
10
+ PyPDF2==3.0.1
11
+ openai
12
+ tiktoken
13
+ einops