Spaces:

vinhnx90
/

inkchatgpt

Sleeping

App Files Files Community

vinhnx90 commited on Apr 14

Commit

e698d82

•

1 Parent(s): 3cd35af

Use Cohere's Rerank to improve search retrieval performance

Browse files

Files changed (3) hide show

app.py +67 -68
document_retriever.py +5 -6
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import streamlit as st
 from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.chat_message_histories.streamlit import (
     StreamlitChatMessageHistory,
@@ -34,86 +35,84 @@ st.set_page_config(
 # Setup memory for contextual conversation
 msgs = StreamlitChatMessageHistory()
-with st.container():
-    col1, col2 = st.columns([0.2, 0.8])
-    with col1:
-        st.image(
-            "./assets/app_icon.png",
-            use_column_width="always",
-            output_format="PNG",
-        )
-    with col2:
-        st.header(":books: InkChatGPT")
-        st.caption(
-            """
-            Simple Retrieval Augmented Generation (RAG) application that allows users to upload PDF documents and engage in a conversational Q&A, with a language model (LLM) based on the content of those documents. Built with LangChain as Streamlit.
-            Supports PDF, TXT, DOCX • Limit 200MB per file.
-            * GitHub: https://github.com/vinhnx/InkChatGPT
-            * Twitter: https://x.com/vinhnx
-            """
         )
-chat_tab, documents_tab, settings_tab = st.tabs(["Chat", "Documents", "Settings"])
-with settings_tab:
-    openai_api_key = st.text_input("OpenAI API Key", type="password")
-    if len(msgs.messages) == 0 or st.button("Clear message history"):
-        msgs.clear()
-        msgs.add_ai_message("""
-        Hi, your uploaded document(s) had been analyzed.
-        Feel free to ask me any questions. For example: you can start by asking me `'What is this book about?` or `Tell me about the content of this book!`'
-        """)
-with documents_tab:
-    uploaded_files = st.file_uploader(
-        label="Select files",
-        type=["pdf", "txt", "docx"],
-        accept_multiple_files=True,
-        disabled=(not openai_api_key),
-    )
-with chat_tab:
-    if uploaded_files:
-        result_retriever = configure_retriever(uploaded_files)
-        if result_retriever is not None:
-            memory = ConversationBufferMemory(
-                memory_key="chat_history",
-                chat_memory=msgs,
-                return_messages=True,
-            )
-            # Setup LLM and QA chain
-            llm = ChatOpenAI(
-                model=LLM_MODEL,
-                api_key=openai_api_key,
-                temperature=0,
-                streaming=True,
-            )
-            chain = ConversationalRetrievalChain.from_llm(
-                llm,
-                retriever=result_retriever,
-                memory=memory,
-                verbose=False,
-                max_tokens_limit=4000,
-            )
-            avatars = {
-                ChatProfileRoleEnum.HUMAN: "user",
-                ChatProfileRoleEnum.AI: "assistant",
-            }
-            for msg in msgs.messages:
-                st.chat_message(avatars[msg.type]).write(msg.content)
-if not openai_api_key:
-    st.caption("🔑 Add your **OpenAI API key** on the `Settings` to continue.")
 if user_query := st.chat_input(
     placeholder="Ask me anything!",
-    disabled=(not openai_api_key),
 ):
     st.chat_message("user").write(user_query)

 import streamlit as st
 from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
+from langchain.chains.retrieval_qa.base import RetrievalQA
 from langchain.memory import ConversationBufferMemory
 from langchain_community.chat_message_histories.streamlit import (
     StreamlitChatMessageHistory,
 # Setup memory for contextual conversation
 msgs = StreamlitChatMessageHistory()
+with st.sidebar:
+    with st.container():
+        col1, col2 = st.columns([0.2, 0.8])
+        with col1:
+            st.image(
+                "./assets/app_icon.png",
+                use_column_width="always",
+                output_format="PNG",
+            )
+        with col2:
+            st.header(":books: InkChatGPT")
+    # chat_tab,
+    documents_tab, settings_tab = st.tabs(
+        [
+            # "Chat",
+            "Documents",
+            "Settings",
+        ]
+    )
+    with settings_tab:
+        openai_api_key = st.text_input("OpenAI API Key", type="password")
+        if len(msgs.messages) == 0 or st.button("Clear message history"):
+            msgs.clear()
+            msgs.add_ai_message("""
+            Hi, your uploaded document(s) had been analyzed.
+            Feel free to ask me any questions. For example: you can start by asking me `'What is this book about?` or `Tell me about the content of this book!`'
+            """)
+    with documents_tab:
+        uploaded_files = st.file_uploader(
+            label="Select files",
+            type=["pdf", "txt", "docx"],
+            accept_multiple_files=True,
+            disabled=(not openai_api_key),
         )
+if not openai_api_key:
+    st.info("🔑 Please Add your **OpenAI API key** on the `Settings` to continue.")
+if uploaded_files:
+    result_retriever = configure_retriever(uploaded_files)
+    if result_retriever is not None:
+        memory = ConversationBufferMemory(
+            memory_key="chat_history",
+            chat_memory=msgs,
+            return_messages=True,
+        )
+        # Setup LLM and QA chain
+        llm = ChatOpenAI(
+            model=LLM_MODEL,
+            api_key=openai_api_key,
+            temperature=0,
+            streaming=True,
+        )
+        chain = ConversationalRetrievalChain.from_llm(
+            llm,
+            retriever=result_retriever,
+            memory=memory,
+            verbose=False,
+            max_tokens_limit=4000,
+        )
+        avatars = {
+            ChatProfileRoleEnum.HUMAN: "user",
+            ChatProfileRoleEnum.AI: "assistant",
+        }
+        for msg in msgs.messages:
+            st.chat_message(avatars[msg.type]).write(msg.content)
 if user_query := st.chat_input(
     placeholder="Ask me anything!",
+    disabled=(not openai_api_key and not result_retriever),
 ):
     st.chat_message("user").write(user_query)

document_retriever.py CHANGED Viewed

@@ -3,7 +3,8 @@ import tempfile
 import streamlit as st
 from langchain.retrievers import ContextualCompressionRetriever
-from langchain.retrievers.document_compressors import EmbeddingsFilter
 from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader, TextLoader
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import DocArrayInMemorySearch
@@ -53,10 +54,8 @@ def configure_retriever(files, use_compression=False):
     if not use_compression:
         return retriever
-    embeddings_filter = EmbeddingsFilter(
-        embeddings=embeddings, similarity_threshold=0.76
-    )
     return ContextualCompressionRetriever(
-        base_compressor=embeddings_filter, base_retriever=retriever
     )

 import streamlit as st
 from langchain.retrievers import ContextualCompressionRetriever
+from langchain_cohere import CohereRerank
 from langchain_community.document_loaders import Docx2txtLoader, PyPDFLoader, TextLoader
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import DocArrayInMemorySearch
     if not use_compression:
         return retriever
+    compressor = CohereRerank()
     return ContextualCompressionRetriever(
+        base_compressor=compressor,
+        base_retriever=retriever,
     )

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ openai
 sentence-transformers
 docarray
 langchain
 streamlit
 streamlit_chat
 streamlit-extras

 sentence-transformers
 docarray
 langchain
+langchain_cohere
 streamlit
 streamlit_chat
 streamlit-extras