Spaces:

LuckRafly
/

ChatBot-PDF

Sleeping

App Files Files Community

LuckRafly commited on Dec 20, 2023

Commit

c74cdc6

1 Parent(s): 490cd70

Upload 4 files

Browse files

Files changed (4) hide show

app.py +56 -0
function.py +60 -0
htmlTemplate.py +56 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from function import *
+import streamlit as st
+from htmlTemplate import css, bot_template, user_template
+def handle_user_input(user_question):
+    if "conversation" not in st.session_state:
+        st.session_state.conversation = None
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    response = st.session_state.conversation({'question': user_question, 'chat_history': st.session_state.chat_history})
+    st.session_state.chat_history = response['chat_history']
+    for i, message in enumerate(st.session_state.chat_history):
+        if i % 2 == 0:
+            st.markdown(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
+        else:
+            st.markdown(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
+def main():
+    st.set_page_config(page_title="ChatBot with Multiple PDF", layout="wide", page_icon=":robot_face:")  # Set layout to wide
+    st.write(css, unsafe_allow_html=True)
+    # Improved sidebar layout
+    with st.sidebar:
+        st.title("ChatBot Settings 🛠️")
+        pdf_docs = st.file_uploader("Upload PDF documents", accept_multiple_files=True)
+        if st.button("Process or Reset Conversation 🔄"):
+            with st.spinner("Processing..."):
+                if pdf_docs:
+                    # get pdf text
+                    documents = read_multiple_pdf(pdf_docs)
+                    # get the text chunks
+                    chunks = chunk_docs(documents, chunk_size=500, chunk_overlap=50)
+                    # create vector store
+                    vector_db = embedding_chunks(chunks)
+                    # create conversation chain
+                    st.session_state.conversation = chain_conversation(vector_db)
+                else:
+                    st.warning("Please upload at least one PDF before processing.")
+    # Improved main content layout
+    st.title("ChatBot with Multiple PDF 🤖")
+    st.markdown("---")
+    user_question = st.text_input("Ask a question about your documents:", key="user_input_key", value="", disabled=not pdf_docs)
+    if st.button("Ask 🤔") or user_question:
+        if not pdf_docs:
+            st.warning("Please upload PDFs and click 'Process' before asking questions.")
+        else:
+            handle_user_input(user_question)
+if __name__ == "__main__":
+    main()

function.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from dotenv import load_dotenv
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import HuggingFaceHub
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationalRetrievalChain
+load_dotenv()
+## Read Multiple PDF files
+def read_multiple_pdf(files):
+    if type(files) == str:
+        files = list("document\yolo.pdf".split(" "))
+    texts = ""
+    for file in files:
+        docs = PdfReader(file)
+        for text in docs.pages:
+            texts += (text.extract_text())
+        return texts
+## Split PDF into chunks
+def chunk_docs(document, chunk_size = 500, chunk_overlap = 50, separators="\n"):
+    """
+    Split a document into smaller chunks of text.
+    Args:
+        document (str): The document to be chunked.
+        chunk_size (int, optional): The size of each chunk in characters. Defaults to 500.
+        chunk_overlap (int, optional): The overlap between adjacent chunks in characters. Defaults to 50.
+        separators (str, optional): The separators used to split the document into chunks. Defaults to "\\n".
+    Returns:
+        str: The chunked document.
+    """
+    text_splitter = RecursiveCharacterTextSplitter(
+        separators=separators,
+        chunk_size = chunk_size,
+        chunk_overlap = chunk_overlap
+    )
+    chunk = text_splitter.split_text(document)
+    return chunk
+## Embeds the Data
+def embedding_chunks(chunk, model_name = "sentence-transformers/all-MiniLM-L12-v2"):
+    embeddings = HuggingFaceEmbeddings(model_name=model_name)
+    vector_stores = FAISS.from_texts(chunk, embeddings)
+    return vector_stores
+## setup conversational chain
+def chain_conversation(vector_stores,config = {'max_new_tokens': 256, 'temperature': 0.1},model_repo = "mistralai/Mixtral-8x7B-Instruct-v0.1"):
+    llm = HuggingFaceHub(repo_id = model_repo, model_kwargs = config)
+    memory = ConversationBufferMemory(memory_key= "chat_history", return_messages=True)
+    conversation_chain = ConversationalRetrievalChain.from_llm(llm= llm,
+                                                               retriever= vector_stores.as_retriever(search_kwargs={"k": 10}),
+                                                               memory= memory)
+    return conversation_chain

htmlTemplate.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Updated CSS
+css = '''
+<style>
+.chat-message {
+    padding: 1rem;
+    border-radius: 0.5rem;
+    margin-bottom: 1rem;
+    display: flex;
+    border: 1px solid #d3d3d3; /* Add a subtle border */
+}
+.chat-message.user {
+    background-color: #2b313e;
+}
+.chat-message.bot {
+    background-color: #475063;
+}
+.chat-message .avatar {
+    width: 15%; /* Adjust avatar size */
+}
+.chat-message .avatar img {
+    max-width: 60px;
+    max-height: 60px;
+    border-radius: 50%;
+    object-fit: cover;
+}
+.chat-message .message {
+    width: 85%; /* Adjust message width */
+    padding: 0.75rem;
+    color: #fff;
+}
+</style>
+'''
+# Updated Templates
+bot_template = '''
+<div class="chat-message bot">
+    <div class="avatar">
+        <img src="https://i.ibb.co/3pvQJ2B/bot-icon.jpg">
+    </div>
+    <div class="message">{{MSG}}</div>
+</div>
+'''
+user_template = '''
+<div class="chat-message user">
+    <div class="avatar">
+        <img src="https://i.ibb.co/HY8rRpL/human.jpg">
+    </div>
+    <div class="message">{{MSG}}</div>
+</div>
+'''

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+huggingface_hub
+python-dotenv
+langchain
+streamlit
+sentence-transformers
+PyPDF2
+faiss-cpu
+dotenv