LuckRafly commited on
Commit
c74cdc6
·
1 Parent(s): 490cd70

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +56 -0
  2. function.py +60 -0
  3. htmlTemplate.py +56 -0
  4. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from function import *
2
+ import streamlit as st
3
+ from htmlTemplate import css, bot_template, user_template
4
+
5
+ def handle_user_input(user_question):
6
+ if "conversation" not in st.session_state:
7
+ st.session_state.conversation = None
8
+ if "chat_history" not in st.session_state:
9
+ st.session_state.chat_history = []
10
+
11
+ response = st.session_state.conversation({'question': user_question, 'chat_history': st.session_state.chat_history})
12
+ st.session_state.chat_history = response['chat_history']
13
+
14
+ for i, message in enumerate(st.session_state.chat_history):
15
+ if i % 2 == 0:
16
+ st.markdown(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
17
+ else:
18
+ st.markdown(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
19
+
20
+
21
+ def main():
22
+ st.set_page_config(page_title="ChatBot with Multiple PDF", layout="wide", page_icon=":robot_face:") # Set layout to wide
23
+ st.write(css, unsafe_allow_html=True)
24
+
25
+ # Improved sidebar layout
26
+ with st.sidebar:
27
+ st.title("ChatBot Settings 🛠️")
28
+ pdf_docs = st.file_uploader("Upload PDF documents", accept_multiple_files=True)
29
+ if st.button("Process or Reset Conversation 🔄"):
30
+ with st.spinner("Processing..."):
31
+ if pdf_docs:
32
+ # get pdf text
33
+ documents = read_multiple_pdf(pdf_docs)
34
+ # get the text chunks
35
+ chunks = chunk_docs(documents, chunk_size=500, chunk_overlap=50)
36
+ # create vector store
37
+ vector_db = embedding_chunks(chunks)
38
+ # create conversation chain
39
+ st.session_state.conversation = chain_conversation(vector_db)
40
+ else:
41
+ st.warning("Please upload at least one PDF before processing.")
42
+
43
+ # Improved main content layout
44
+ st.title("ChatBot with Multiple PDF 🤖")
45
+ st.markdown("---")
46
+
47
+ user_question = st.text_input("Ask a question about your documents:", key="user_input_key", value="", disabled=not pdf_docs)
48
+ if st.button("Ask 🤔") or user_question:
49
+ if not pdf_docs:
50
+ st.warning("Please upload PDFs and click 'Process' before asking questions.")
51
+ else:
52
+ handle_user_input(user_question)
53
+
54
+
55
+ if __name__ == "__main__":
56
+ main()
function.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.llms import HuggingFaceHub
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains import ConversationalRetrievalChain
9
+
10
+ load_dotenv()
11
+
12
+ ## Read Multiple PDF files
13
+ def read_multiple_pdf(files):
14
+ if type(files) == str:
15
+ files = list("document\yolo.pdf".split(" "))
16
+ texts = ""
17
+ for file in files:
18
+ docs = PdfReader(file)
19
+ for text in docs.pages:
20
+ texts += (text.extract_text())
21
+ return texts
22
+
23
+
24
+ ## Split PDF into chunks
25
+ def chunk_docs(document, chunk_size = 500, chunk_overlap = 50, separators="\n"):
26
+ """
27
+ Split a document into smaller chunks of text.
28
+
29
+ Args:
30
+ document (str): The document to be chunked.
31
+ chunk_size (int, optional): The size of each chunk in characters. Defaults to 500.
32
+ chunk_overlap (int, optional): The overlap between adjacent chunks in characters. Defaults to 50.
33
+ separators (str, optional): The separators used to split the document into chunks. Defaults to "\\n".
34
+
35
+ Returns:
36
+ str: The chunked document.
37
+ """
38
+ text_splitter = RecursiveCharacterTextSplitter(
39
+ separators=separators,
40
+ chunk_size = chunk_size,
41
+ chunk_overlap = chunk_overlap
42
+ )
43
+ chunk = text_splitter.split_text(document)
44
+ return chunk
45
+
46
+ ## Embeds the Data
47
+ def embedding_chunks(chunk, model_name = "sentence-transformers/all-MiniLM-L12-v2"):
48
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
49
+ vector_stores = FAISS.from_texts(chunk, embeddings)
50
+ return vector_stores
51
+
52
+
53
+ ## setup conversational chain
54
+ def chain_conversation(vector_stores,config = {'max_new_tokens': 256, 'temperature': 0.1},model_repo = "mistralai/Mixtral-8x7B-Instruct-v0.1"):
55
+ llm = HuggingFaceHub(repo_id = model_repo, model_kwargs = config)
56
+ memory = ConversationBufferMemory(memory_key= "chat_history", return_messages=True)
57
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm= llm,
58
+ retriever= vector_stores.as_retriever(search_kwargs={"k": 10}),
59
+ memory= memory)
60
+ return conversation_chain
htmlTemplate.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Updated CSS
2
+ css = '''
3
+ <style>
4
+ .chat-message {
5
+ padding: 1rem;
6
+ border-radius: 0.5rem;
7
+ margin-bottom: 1rem;
8
+ display: flex;
9
+ border: 1px solid #d3d3d3; /* Add a subtle border */
10
+ }
11
+
12
+ .chat-message.user {
13
+ background-color: #2b313e;
14
+ }
15
+
16
+ .chat-message.bot {
17
+ background-color: #475063;
18
+ }
19
+
20
+ .chat-message .avatar {
21
+ width: 15%; /* Adjust avatar size */
22
+ }
23
+
24
+ .chat-message .avatar img {
25
+ max-width: 60px;
26
+ max-height: 60px;
27
+ border-radius: 50%;
28
+ object-fit: cover;
29
+ }
30
+
31
+ .chat-message .message {
32
+ width: 85%; /* Adjust message width */
33
+ padding: 0.75rem;
34
+ color: #fff;
35
+ }
36
+ </style>
37
+ '''
38
+
39
+ # Updated Templates
40
+ bot_template = '''
41
+ <div class="chat-message bot">
42
+ <div class="avatar">
43
+ <img src="https://i.ibb.co/3pvQJ2B/bot-icon.jpg">
44
+ </div>
45
+ <div class="message">{{MSG}}</div>
46
+ </div>
47
+ '''
48
+
49
+ user_template = '''
50
+ <div class="chat-message user">
51
+ <div class="avatar">
52
+ <img src="https://i.ibb.co/HY8rRpL/human.jpg">
53
+ </div>
54
+ <div class="message">{{MSG}}</div>
55
+ </div>
56
+ '''
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub
2
+ python-dotenv
3
+ langchain
4
+ streamlit
5
+ sentence-transformers
6
+ PyPDF2
7
+ faiss-cpu
8
+ dotenv