Spaces:

KrishnaKumar23
/

documentQABot

Sleeping

App Files Files Community

KrishnaKumar23 commited on Dec 24, 2023

Commit

e797f63

•

1 Parent(s): 028692e

Changed LLM to Mixtral-8x7B-Instruct-v0.1

Browse files

Files changed (4) hide show

app.py +165 -45
llm_model.py +104 -83
requirements.txt +2 -0
static/temp.txt +0 -0

app.py CHANGED Viewed

@@ -3,11 +3,21 @@ from streamlit_lottie import st_lottie
 import fitz  # PyMuPDF
 import requests
 import os, shutil
-import sidebar
 import llm_model
 @st.cache_data(experimental_allow_widgets=True)
-def index_document(uploaded_file):
     if uploaded_file is not None:
         # Specify the folder path where you want to store the uploaded file in the 'assets' folder
@@ -24,8 +34,9 @@ def index_document(uploaded_file):
         st.success(f"File '{file_name}' uploaded !")
         with st.spinner("Indexing document... This is a free CPU version and may take a while ⏳"):
-            llm_model.create_vector_db(file_name, instructor_embeddings)
         return file_name
     else:
         return None
@@ -44,11 +55,135 @@ def is_query_valid(query: str) -> bool:
         return False
     return True
 # Function to load model parameters
 @st.cache_resource()
 def load_model():
-    return llm_model.load_model_params()
 st.set_page_config(page_title="Document QA Bot")
 lottie_book = load_lottieurl("https://assets4.lottiefiles.com/temp/lf20_aKAfIn.json")
@@ -56,44 +191,29 @@ st_lottie(lottie_book, speed=1, height=200, key="initial")
 # Place the title below the Lottie animation
 st.title("Document Q&A Bot 🤖")
 # Left Sidebar
-sidebar.sidebar()
-# st.sidebar.header("Upload PDF")
-# load model parameters
-llm, instructor_embeddings = load_model()
-# Upload file through Streamlit
-uploaded_file = st.file_uploader("Upload a file", type=["pdf", "doc", "docx", "txt"])
-filename = index_document(uploaded_file)
-print(filename)
-if not filename:
-    st.stop()
-with st.form(key="qa_form"):
-    query = st.text_area("Ask a question about the document")
-    submit = st.form_submit_button("Submit")
-if submit:
-    if not is_query_valid(query):
-        st.stop()
-    # Output Columns
-    answer_col, sources_col = st.columns(2)
-    qa_chain = llm_model.document_parser(instructor_embeddings, llm)
-    result = qa_chain(query)
-    with answer_col:
-        st.markdown("#### Answer")
-        st.markdown(result["result"])
-    with sources_col:
-        st.markdown("#### Sources")
-        if not ("i don't know" in result["result"].lower()):
-            for source in result["source_documents"]:
-                st.markdown(source.page_content)
-                st.markdown(source.metadata["source"])
-                st.markdown("--------------------------")

 import fitz  # PyMuPDF
 import requests
 import os, shutil
 import llm_model
+SYSTEM_PROMPT = [
+    """
+    You are not Mistral AI, but rather a Q&A bot trained by Krishna Kumar while building a cool side project based on RAG. Whenever asked, you need to answer as Q&A bot.
+    """,
+    """You are a RAG based Document Q&A bot. Based on the input prompt and retrieved context from the vector database you will answer questions that are closer to the context.
+    If no context was found then, say "I don't know" instead of making up answer on your own. Follow above rules strictly.
+    """
+]
 @st.cache_data(experimental_allow_widgets=True)
+def index_document(_llm_object, uploaded_file):
     if uploaded_file is not None:
         # Specify the folder path where you want to store the uploaded file in the 'assets' folder
         st.success(f"File '{file_name}' uploaded !")
         with st.spinner("Indexing document... This is a free CPU version and may take a while ⏳"):
+            retriever = _llm_object.create_vector_db(file_name)
+            st.session_state.retriever = retriever
         return file_name
     else:
         return None
         return False
     return True
+def init_state() :
+    if "filename" not in st.session_state:
+        st.session_state.filename = None
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    if "temp" not in st.session_state:
+        st.session_state.temp = 0.7
+    if "history" not in st.session_state:
+        st.session_state.history = [SYSTEM_PROMPT]
+    if "repetion_penalty" not in st.session_state :
+        st.session_state.repetion_penalty = 1
+    if "chat_bot" not in st.session_state :
+        st.session_state.chat_bot = "Mixtral-8x7B-Instruct-v0.1"
+def faq():
+    st.markdown(
+        """
+        # FAQ
+        ## How does Document Q&A Bot work?
+        When you upload a document (in Pdf, word, csv or txt format), it will be divided into smaller chunks
+        and stored in a special type of database called a vector index
+        that allows for semantic search and retrieval.
+        When you ask a question, our Q&A bot will first look through the document chunks and find the
+        most relevant ones using the vector index. This acts as a context to our custom prompt which will be feed to the LLM model.
+        If the context was not found in the document then, LLM will reply 'I don't know'
+        ## Is my data safe?
+        Yes, your data is safe. Our bot does not store your documents or
+        questions. All uploaded data is deleted after you close the browser tab.
+        ## Why does it take so long to index my document?
+        Since, this is a sample QA bot project that uses open-source model
+        and doesn't have much resource capabilities like GPU, it may take time
+        to index your document based on the size of the document.
+        ## Are the answers 100% accurate?
+        No, the answers are not 100% accurate.
+        But for most use cases, our QA bot is very accurate and can answer
+        most questions. Always check with the sources to make sure that the answers
+        are correct.
+        """
+    )
+def sidebar():
+    with st.sidebar:
+        st.markdown("## Document Q&A Bot")
+        st.write("LLM: Mixtral-8x7B-Instruct-v0.1")
+        #st.success('API key already provided!', icon='✅')
+        st.markdown("### Set Model Parameters")
+        # select LLM model
+        st.session_state.model_name = 'Mixtral-8x7B-Instruct-v0.1'
+        # set model temperature
+        st.session_state.temperature = st.slider(label="Temperature", min_value=0.0, max_value=1.0, step=0.1, value=0.7)
+        st.session_state.top_p = st.slider(label="Top Probablity", min_value=0.0, max_value=1.0, step=0.1, value=0.95)
+        st.session_state.repetition_penalty = st.slider(label="Repetition Penalty", min_value=0.0, max_value=1.0, step=0.1, value=1.0)
+        # load model parameters
+        st.session_state.llm_object = load_model()
+        st.markdown("---")
+        # Upload file through Streamlit
+        st.session_state.uploaded_file = st.file_uploader("Upload a file", type=["pdf", "doc", "docx", "txt"])
+        index_document(st.session_state.llm_object, st.session_state.uploaded_file)
+        st.markdown("---")
+        st.markdown("# About")
+        st.markdown(
+            """QA bot 🤖 allows you to ask questions about your
+            documents and get accurate answers with citations."""
+        )
+        st.markdown("Created with ❤️ by Krishna Kumar Yadav")
+        st.markdown(
+            """
+            - [Email](mailto:krishna158@live.com)
+            - [LinkedIn](https://www.linkedin.com/in/krishna-kumar-yadav-726831105/)
+            - [Github](https://github.com/krish-yadav23)
+            - [LeetCode](https://leetcode.com/KrishnaKumar23/)
+            """
+        )
+        faq()
+def chat_box() :
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+def generate_chat_stream(prompt) :
+    with st.spinner("Fetching relevant answers from source document..."):
+        response, sources = st.session_state.llm_object.mixtral_chat_inference(prompt, st.session_state.history, st.session_state.temperature,
+                                                                               st.session_state.top_p, st.session_state.repetition_penalty, st.session_state.retriever)
+    return response, sources
+def stream_handler(chat_stream, placeholder) :
+    full_response = ''
+    for chunk in chat_stream :
+        if chunk.token.text!='</s>' :
+            full_response += chunk.token.text
+            placeholder.markdown(full_response + "▌")
+    placeholder.markdown(full_response)
+    return full_response
+def show_source(sources) :
+    with st.expander("Show source") :
+        for source in sources:
+            st.info(f"{source}")
 # Function to load model parameters
 @st.cache_resource()
 def load_model():
+    # create llm object
+    return llm_model.LlmModel()
 st.set_page_config(page_title="Document QA Bot")
 lottie_book = load_lottieurl("https://assets4.lottiefiles.com/temp/lf20_aKAfIn.json")
 # Place the title below the Lottie animation
 st.title("Document Q&A Bot 🤖")
+# initialize session state for streamlit app
+init_state()
 # Left Sidebar
+sidebar()
+chat_box()
+if prompt := st.chat_input("Ask a question about your document!"):
+    st.chat_message("user").markdown(prompt)
+    st.session_state.messages.append({"role": "user", "content": prompt})
+    try:
+        chat_stream, sources = generate_chat_stream(prompt)
+        with st.chat_message("assistant"):
+            placeholder = st.empty()
+            full_response = stream_handler(chat_stream, placeholder)
+            show_source(sources)
+        st.session_state.history.append([prompt, full_response])
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+    except Exception as e:
+        if not st.session_state.uploaded_file:
+            st.error("Kindly provide the document file by uploading it before posing any questions. Your cooperation is appreciated!")
+        else:
+            st.error(e)

llm_model.py CHANGED Viewed

@@ -1,92 +1,113 @@
 from langchain.vectorstores import FAISS
-from langchain.llms import GooglePalm
-from langchain.document_loaders import PyPDFLoader
-from langchain.document_loaders import TextLoader
-from langchain.document_loaders import Docx2txtLoader
-from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain.chains import RetrievalQA
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 from dotenv import load_dotenv
 vector_index_path = "assets/vectordb/faiss_index"
-def load_env_variables():
-    load_dotenv()  # take environment variables from .env
-def create_vector_db(filename, instructor_embeddings):
-    if filename.endswith(".pdf"):
-        loader = PyPDFLoader(file_path=filename)
-    elif filename.endswith(".doc") or filename.endswith(".docx"):
-        loader = Docx2txtLoader(filename)
-    elif filename.endswith("txt") or filename.endswith("TXT"):
-        loader = TextLoader(filename)
-    # Split documents
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=10)
-    splits = text_splitter.split_documents(loader.load())
-    # data = loader.load()
-    # Create a FAISS instance for vector database from 'data'
-    vectordb = FAISS.from_documents(documents=splits,
-                                    embedding=instructor_embeddings)
-    # Save vector database locally
-    vectordb.save_local(vector_index_path)
-def get_qa_chain(instructor_embeddings, llm):
-    # Load the vector database from the local folder
-    vectordb = FAISS.load_local(vector_index_path, instructor_embeddings)
-    # Create a retriever for querying the vector database
-    retriever = vectordb.as_retriever(search_type="similarity")
-    prompt_template = """
-    You are a question answer agent and you must strictly follow below prompt template.
-    Given the following context and a question, generate an answer based on this context only.
-    In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
-    Keep answers brief and well-structured. Do not give one word answers.
-    If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.
-    CONTEXT: {context}
-    QUESTION: {question}"""
-    PROMPT = PromptTemplate(
-        template=prompt_template, input_variables=["context", "question"]
-    )
-    chain = RetrievalQA.from_chain_type(llm=llm,
-                                        chain_type="stuff",  # or map-reduce
-                                        retriever=retriever,
-                                        input_key="query",
-                                        return_source_documents=True,  # return source document from the vector db
-                                        chain_type_kwargs={"prompt": PROMPT},
-                                        verbose=True)
-    return chain
-def load_model_params():
-    load_env_variables()
-    # Create Google Palm LLM model
-    llm = GooglePalm(google_api_key=os.environ["GOOGLE_API_KEY"], temperature=0.1)
-    # # Initialize instructor embeddings using the Hugging Face model
-    instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
-    return llm, instructor_embeddings
-def document_parser(instructor_embeddings, llm):
-    chain = get_qa_chain(instructor_embeddings=instructor_embeddings, llm=llm)
-    return chain

 from langchain.vectorstores import FAISS
+#from langchain.llms import GooglePalm, CTransformers
+from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
+from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
 from langchain.prompts import PromptTemplate
 from langchain.chains import RetrievalQA
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from huggingface_hub import InferenceClient
 import os
 from dotenv import load_dotenv
 vector_index_path = "assets/vectordb/faiss_index"
+class LlmModel:
+    def __init__(self):
+        # load dot env variables
+        self.load_env_variables()
+        # load llm model
+        self.hf_embeddings = self.load_huggingface_embeddings()
+    def load_env_variables(self):
+        load_dotenv()  # take environment variables from .env
+    def custom_prompt(self, question, history, context):
+        #RAG prompt template
+        prompt = "<s>"
+        for user_prompt, bot_response in history: # provide chat history
+            prompt += f"[INST] {user_prompt} [/INST]"
+            prompt += f" {bot_response}</s>"
+        message_prompt = f"""
+        You are a question answer agent and you must strictly follow below prompt template.
+        Given the following context and a question, generate an answer based on this context only.
+        Keep answers brief and well-structured. Do not give one word answers.
+        If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.
+        CONTEXT: {context}
+        QUESTION: {question}
+        """
+        prompt += f"[INST] {message_prompt} [/INST]"
+        return prompt
+    def format_sources(self, sources):
+        # format the document sources
+        source_results = []
+        for source in sources:
+            source_results.append(str(source.page_content) +
+                                  "\n Document: " + str(source.metadata['source']) +
+                                  " Page: " + str(source.metadata['page']))
+        return source_results
+    def mixtral_chat_inference(self, prompt, history, temperature, top_p, repetition_penalty, retriever):
+        context = retriever.get_relevant_documents(prompt)
+        sources = self.format_sources(context)
+        # use hugging face infrence api
+        client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1",
+                                    token=os.environ["HF_TOKEN"]
+                                )
+        temperature = float(temperature)
+        if temperature < 1e-2:
+            temperature = 1e-2
+        generate_kwargs = dict(
+                            temperature = temperature,
+                            max_new_tokens = 512,
+                            top_p = top_p,
+                            repetition_penalty = repetition_penalty,
+                            do_sample = True
+                            )
+        formatted_prompt = self.custom_prompt(prompt, history, context)
+        return client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False), sources
+    def load_huggingface_embeddings(self):
+        # Initialize instructor embeddings using the Hugging Face model
+        #return HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
+        return HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",
+                                     model_kwargs={'device': 'cpu'})
+    def create_vector_db(self, filename):
+        if filename.endswith(".pdf"):
+            loader = PyPDFLoader(file_path=filename)
+        elif filename.endswith(".doc") or filename.endswith(".docx"):
+            loader = Docx2txtLoader(filename)
+        elif filename.endswith("txt") or filename.endswith("TXT"):
+            loader = TextLoader(filename)
+        # Split documents
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+        splits = text_splitter.split_documents(loader.load())
+        # Create a FAISS instance for vector database from 'data'
+        vectordb = FAISS.from_documents(documents = splits,
+                                        embedding = self.hf_embeddings)
+        # Save vector database locally
+        #vectordb.save_local(vector_index_path)
+        # set vectordb content
+        # Load the vector database from the local folder
+        #vectordb = FAISS.load_local(vector_index_path, self.hf_embeddings)
+        # Create a retriever for querying the vector database
+        return vectordb.as_retriever(search_type="similarity")

requirements.txt CHANGED Viewed

@@ -13,3 +13,5 @@ frontend
 tools
 docx2txt
 fitz

 tools
 docx2txt
 fitz
+huggingface_hub
+chainlit

static/temp.txt DELETED Viewed

File without changes