Spaces:

Sandaruth
/

StockGPT

Sleeping

App Files Files Community

Sandaruth commited on Jun 1, 2024

Commit

ee0f24f

verified ·

1 Parent(s): 72c011d

Upload 8 files

Browse files

Files changed (8) hide show

MultiQueryRetriever.py +216 -0
README.md +12 -12
Retrieval.py +33 -0
app.py +78 -0
htmlTemplates.py +51 -0
model.py +62 -0
prompts.py +60 -0
requirements.txt +85 -0

MultiQueryRetriever.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import asyncio
+import logging
+from typing import List, Optional, Sequence
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForRetrieverRun,
+    CallbackManagerForRetrieverRun,
+)
+from langchain_core.documents import Document
+from langchain_core.language_models import BaseLanguageModel
+from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.prompts.prompt import PromptTemplate
+from langchain_core.retrievers import BaseRetriever
+from langchain.chains.llm import LLMChain
+logger = logging.getLogger(__name__)
+class LineListOutputParser(BaseOutputParser[List[str]]):
+    """Output parser for a list of lines."""
+    def parse(self, text: str) -> List[str]:
+        lines = text.strip().split("\n")
+        return lines
+# Default prompt
+DEFAULT_QUERY_PROMPT = PromptTemplate(
+    input_variables=["question"],
+    template="""You are an AI language model assistant. Your task is
+    to generate 3 different versions of the given user
+    question to retrieve relevant documents from a vector  database.
+    By generating multiple perspectives on the user question,
+    your goal is to help the user overcome some of the limitations
+    of distance-based similarity search. Provide these alternative
+    questions separated by newlines. Original question: {question}""",
+)
+def _unique_documents(documents: Sequence[Document]) -> List[Document]:
+    return [doc for i, doc in enumerate(documents) if doc not in documents[:i]][:4]
+class MultiQueryRetriever(BaseRetriever):
+    """Given a query, use an LLM to write a set of queries.
+    Retrieve docs for each query. Return the unique union of all retrieved docs.
+    """
+    retriever: BaseRetriever
+    llm_chain: LLMChain
+    verbose: bool = True
+    parser_key: str = "lines"
+    """DEPRECATED. parser_key is no longer used and should not be specified."""
+    include_original: bool = False
+    """Whether to include the original query in the list of generated queries."""
+    @classmethod
+    def from_llm(
+        cls,
+        retriever: BaseRetriever,
+        llm: BaseLanguageModel,
+        prompt: PromptTemplate = DEFAULT_QUERY_PROMPT,
+        parser_key: Optional[str] = None,
+        include_original: bool = False,
+    ) -> "MultiQueryRetriever":
+        """Initialize from llm using default template.
+        Args:
+            retriever: retriever to query documents from
+            llm: llm for query generation using DEFAULT_QUERY_PROMPT
+            include_original: Whether to include the original query in the list of
+                generated queries.
+        Returns:
+            MultiQueryRetriever
+        """
+        output_parser = LineListOutputParser()
+        llm_chain = LLMChain(llm=llm, prompt=prompt, output_parser=output_parser)
+        return cls(
+            retriever=retriever,
+            llm_chain=llm_chain,
+            include_original=include_original,
+        )
+    async def _aget_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: AsyncCallbackManagerForRetrieverRun,
+    ) -> List[Document]:
+        """Get relevant documents given a user query.
+        Args:
+            question: user query
+        Returns:
+            Unique union of relevant documents from all generated queries
+        """
+        queries = await self.agenerate_queries(query, run_manager)
+        if self.include_original:
+            queries.append(query)
+        documents = await self.aretrieve_documents(queries, run_manager)
+        return self.unique_union(documents)
+    async def agenerate_queries(
+        self, question: str, run_manager: AsyncCallbackManagerForRetrieverRun
+    ) -> List[str]:
+        """Generate queries based upon user input.
+        Args:
+            question: user query
+        Returns:
+            List of LLM generated queries that are similar to the user input
+        """
+        response = await self.llm_chain.acall(
+            inputs={"question": question}, callbacks=run_manager.get_child()
+        )
+        lines = response["text"]
+        if self.verbose:
+            logger.info(f"Generated queries: {lines}")
+        return lines
+    async def aretrieve_documents(
+        self, queries: List[str], run_manager: AsyncCallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        """Run all LLM generated queries.
+        Args:
+            queries: query list
+        Returns:
+            List of retrieved Documents
+        """
+        document_lists = await asyncio.gather(
+            *(
+                self.retriever.aget_relevant_documents(
+                    query, callbacks=run_manager.get_child()
+                )
+                for query in queries
+            )
+        )
+        return [doc for docs in document_lists for doc in docs]
+    def _get_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: CallbackManagerForRetrieverRun,
+    ) -> List[Document]:
+        """Get relevant documents given a user query.
+        Args:
+            question: user query
+        Returns:
+            Unique union of relevant documents from all generated queries
+        """
+        queries = self.generate_queries(query, run_manager)
+        if self.include_original:
+            queries.append(query)
+        documents = self.retrieve_documents(queries, run_manager)
+        return self.unique_union(documents)
+    def generate_queries(
+        self, question: str, run_manager: CallbackManagerForRetrieverRun
+    ) -> List[str]:
+        """Generate queries based upon user input.
+        Args:
+            question: user query
+        Returns:
+            List of LLM generated queries that are similar to the user input
+        """
+        response = self.llm_chain(
+            {"question": question}, callbacks=run_manager.get_child()
+        )
+        lines = response["text"]
+        if self.verbose:
+            logger.info(f"Generated queries: {lines}")
+        return lines
+    def retrieve_documents(
+        self, queries: List[str], run_manager: CallbackManagerForRetrieverRun
+    ) -> List[Document]:
+        """Run all LLM generated queries.
+        Args:
+            queries: query list
+        Returns:
+            List of retrieved Documents
+        """
+        documents = []
+        for query in queries:
+            docs = self.retriever.get_relevant_documents(
+                query, callbacks=run_manager.get_child()
+            )
+            documents.extend(docs)
+        print("retrieve documents--", len(documents))
+        return documents
+    def unique_union(self, documents: List[Document]) -> List[Document]:
+        """Get unique Documents.
+        Args:
+            documents: List of retrieved Documents
+        Returns:
+            List of unique retrieved Documents
+        """
+        print("unique union--", len(documents))
+        return _unique_documents(documents)

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
----
-title: StockGPT
-emoji: 📚
-colorFrom: red
-colorTo: red
-sdk: streamlit
-sdk_version: 1.35.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: StockGPT
+emoji: 📚
+colorFrom: red
+colorTo: red
+sdk: streamlit
+sdk_version: 1.35.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

Retrieval.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+from model import llm, vectorstore, splitter, embedding, QA_PROMPT
+# Chain for Web
+from langchain.chains import RetrievalQA
+bsic_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever = vectorstore.as_retriever(search_kwargs={"k": 4}),
+    return_source_documents= True,
+    input_key="question",
+    chain_type_kwargs={"prompt": QA_PROMPT},
+)
+from MultiQueryRetriever import MultiQueryRetriever
+retriever_from_llm = MultiQueryRetriever.from_llm(
+    retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
+    llm=llm,
+)
+multiQuery_chain = RetrievalQA.from_chain_type(
+    llm=llm,
+    chain_type="stuff",
+    retriever = retriever_from_llm,
+    return_source_documents= True,
+    input_key="question",
+    chain_type_kwargs={"prompt": QA_PROMPT},
+)

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+from Retrieval import bsic_chain, multiQuery_chain
+import time
+from htmlTemplates import css, bot_template, user_template, source_template
+st.set_page_config(page_title="Chat with StockGPT", page_icon=":currency_exchange:")
+st.write(css, unsafe_allow_html=True)
+def main():
+    # Set up the layout --------------------------------------------------------------
+    st.sidebar.title("Guideline")
+    st.sidebar.markdown("""
+        1. Type your message in the chat box on the right.
+        2. Hit Enter or click the send button to send your message.
+        3. Chat bot responses will appear below.
+        4. Source documents will be displayed in the sidebar.
+    """)
+    # Dropdown to select model --------------------------------------------------------
+    model_selection = st.sidebar.selectbox("Select Model", ["Basic", "MultiQuery"])
+    print(model_selection)
+    # Button to connect to Google link ------------------------------------------------
+    st.sidebar.markdown('<a href="https://drive.google.com/drive/folders/13v6LsaYH9wEwvqVtlLG1U4OiUHgZ7hY4?usp=sharing" target="_blank" style="display: inline-block;'
+                        'background-color: #475063; color: white; padding: 10px 20px; text-align: center;border: 1px solid white;'
+                        'text-decoration: none; cursor: pointer; border-radius: 5px;">Sources</a>',
+                        unsafe_allow_html=True)
+    st.title("StockGPT Chat App")
+    # Chat area -----------------------------------------------------------------------
+    user_input = st.text_input("", key="user_input",placeholder="Type your question here...")
+    # JavaScript code to submit the form on Enter key press
+    js_submit = f"""
+        document.addEventListener("keydown", function(event) {{
+            if (event.code === "Enter" && !event.shiftKey) {{
+                document.querySelector(".stTextInput").dispatchEvent(new Event("submit"));
+            }}
+        }});
+    """
+    st.markdown(f'<script>{js_submit}</script>', unsafe_allow_html=True)
+    if st.button("Send"):
+        if user_input:
+            with st.spinner('Waiting for response...'):
+                # Add bot response here (you can replace this with your bot logic)
+                response, metadata, source_documents = generate_bot_response(user_input, model_selection)
+                st.write(user_template.replace("{{MSG}}", user_input), unsafe_allow_html=True)
+                st.write(bot_template.replace("{{MSG}}", response), unsafe_allow_html=True)
+                # Source documents
+                st.sidebar.title("Source Documents")
+                for i, doc in enumerate(source_documents, 1):
+                    tit = metadata[i-1]["source"].split("\\")[-1]
+                    with st.sidebar.expander(f"{tit}"):
+                        st.write(doc)  # Assuming the Document object can be directly written to display its content
+def generate_bot_response(user_input, model):
+    # Simple bot logic (replace with your actual bot logic)
+    start_time = time.time()
+    print(f"User Input: {user_input}")
+    if model == "Basic":
+        res = bsic_chain(user_input)
+    elif model == "MultiQuery":
+        res = multiQuery_chain(user_input)
+    response = res['result']
+    metadata = [i.metadata for i in res.get("source_documents", [])]
+    end_time = time.time()
+    response_time = end_time - start_time
+    print(f"Response Time: {response_time} seconds")
+    return response, metadata, res.get('source_documents', [])
+if __name__ == "__main__":
+    main()

htmlTemplates.py ADDED Viewed

	@@ -0,0 +1,51 @@

+css = '''
+<style>
+.chat-message {
+    padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
+}
+.chat-message.user {
+    background-color: #2b313e
+}
+.chat-message.bot {
+    background-color: #475063
+}
+.chat-message .avatar {
+  width: 20%;
+}
+.chat-message .avatar img {
+  max-width: 78px;
+  max-height: 78px;
+  border-radius: 50%;
+  object-fit: cover;
+}
+.chat-message .message {
+  width: 80%;
+  padding: 0 1.5rem;
+  color: #fff;
+}
+'''
+bot_template = '''
+<div class="chat-message bot">
+    <div class="avatar">
+        <img src="https://cdn-icons-png.flaticon.com/128/4712/4712038.png">
+    </div>
+    <div class="message">{{MSG}}</div>
+</div>
+'''
+user_template = '''
+<div class="chat-message user">
+    <div class="avatar">
+        <img src="https://cdn-icons-png.flaticon.com/512/1177/1177568.png">
+    </div>
+    <div class="message">{{MSG}}</div>
+</div>
+'''
+source_template = '''
+<div class="chat-message bot">
+ <div class="avatar">
+        <img src="https://st.depositphotos.com/1427101/4468/v/950/depositphotos_44680417-stock-illustration-pdf-paper-sheet-icons.jpg">
+    </div>
+    <div class="message">{{MSG}}</div>
+</div>
+'''

model.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+from dotenv import load_dotenv
+from prompts import qa_template_V0, qa_template_V1, qa_template_V2
+# Load environment variables from .env file
+load_dotenv()
+from langchain.chat_models import ChatAnyscale
+ANYSCALE_ENDPOINT_TOKEN=os.environ.get("ANYSCALE_ENDPOINT_TOKEN")
+anyscale_api_key =ANYSCALE_ENDPOINT_TOKEN
+llm=ChatAnyscale(anyscale_api_key=anyscale_api_key,temperature=0, model_name='mistralai/Mistral-7B-Instruct-v0.1', streaming=False)
+## Create embeddings and splitter
+from langchain.embeddings import HuggingFaceBgeEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+# Create Embeddings
+model_name = "BAAI/bge-large-en"
+embedding = HuggingFaceBgeEmbeddings(
+    model_name = model_name,
+    # model_kwargs = {'device':'cuda'},
+    encode_kwargs = {'normalize_embeddings': True}
+)
+# Create Splitter
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1000,
+    chunk_overlap=100,
+)
+from langchain_community.vectorstores import FAISS
+# persits_directory="./faiss_V04_C500_BGE_large_web_doc_with_split-final"
+persits_directory="./faiss_V06_C500_BGE_large-Final"
+vectorstore= FAISS.load_local(persits_directory, embedding)
+# Define a custom prompt for Unser manual
+from langchain.prompts import PromptTemplate
+QA_PROMPT = PromptTemplate(input_variables=["context", "question"],template=qa_template_V2,)

prompts.py ADDED Viewed

	@@ -0,0 +1,60 @@

+qa_template_V0 = ("""
+    You are the AI assistant of the IronOne Technologies which provide services for companies members and  novice users with learning with ATrad Aplication .
+    You have provided context information below related to learning material.
+    Context: {context}
+    Given this information, please answer the question with the latest information.
+    If you dont know the answer say you dont know, dont try to makeup answers.
+    if context is not enough to answer the question, ask for more information.
+    if context is not related to the question, say I dont know.
+    give the answer with very clear structure and clear language.
+    each answer Must start with code word ATrad Ai(QA):
+    Question: {question}
+    answer: let me think about it...""")
+qa_template_V1 = ("""
+    Welcome to IronOne Technologies' AI Assistant, designed to assist you in learning with the ATrad Application.
+    Context: {context}
+    As your AI assistant, I'm here to help you navigate through learning materials and provide guidance.
+    Please provide me with any questions or concerns you have regarding the ATrad Application.
+    If you're unsure about something or need more information, feel free to ask.
+    Important:-No need to mention provided document. give strictly answers.
+              -Give answers in a very structured manner to understand easily.
+    each answer Must start with code word ATrad Ai(QA):
+    Question: {question}
+    ATrad Ai(QA): Let me think about it...""")
+qa_template_V2= ("""
+<<SYS>>
+You are the AI assistant for ATrad, which offers services to members and novice users through the ATrad Application and Online Invest platform.
+ATrad is a specialized trading and analytics platform focusing on Emerging Markets, with a strong emphasis on ESG (Environmental, Social, and Governance) investments. It holds a dominant market share of 75% among member firms of the Colombo Stock Exchange in Sri Lanka.
+Please refrain from providing fabricated answers. If you're unsure, simply state that you don't know and avoid adding any information not mentioned in the provided context.
+If the context is unclear, ask for clarification rather than making assumptions. If a question falls outside the scope of your domain, politely indicate so.
+Important : No need to mention provided document. give strictly answers. Give answers in a very structured manner to understand easily.
+Important and Urgent : Explain the answers point by point and give the answer in a very structured manner to understand easily.
+Ensure your responses are polite, concise, and straightforward.
+If the question pertains to topics beyond ATrad Application and Online Invest platform, indicate that it's outside your area of expertise.
+<</SYS>>
+[INST]
+<DOCUMENTS>
+{context}
+</DOCUMENTS>
+Question : {question}[/INST]"""
+)

requirements.txt ADDED Viewed

	@@ -0,0 +1,85 @@

+aiohttp==3.9.3
+aiosignal==1.3.1
+annotated-types==0.6.0
+anyio==4.2.0
+async-timeout==4.0.3
+attrs==23.2.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.6.4
+distro==1.9.0
+faiss-cpu==1.7.4
+filelock==3.13.1
+frozenlist==1.4.1
+fsspec==2024.2.0
+greenlet==3.0.3
+h11==0.14.0
+httpcore==1.0.3
+httpx==0.26.0
+huggingface-hub==0.20.3
+idna==3.6
+Jinja2==3.1.3
+joblib==1.3.2
+jsonpatch==1.33
+jsonpointer==2.4
+langchain==0.1.7
+langchain-community==0.0.20
+langchain-core==0.1.23
+langchain-openai==0.0.6
+langsmith==0.0.87
+MarkupSafe==2.1.5
+marshmallow==3.20.2
+mpmath==1.3.0
+multidict==6.0.5
+mypy-extensions==1.0.0
+networkx==3.2.1
+nltk==3.8.1
+numpy==1.26.4
+openai==1.12.0
+pillow==10.2.0
+pydantic==2.6.1
+pydantic_core==2.16.2
+python-dotenv==1.0.1
+# pywin32==305.1
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+safetensors==0.4.2
+scikit-learn==1.4.1.post1
+scipy==1.12.0
+sentence-transformers==2.3.1
+sentencepiece==0.1.99
+sniffio==1.3.0
+SQLAlchemy==2.0.27
+sympy==1.12
+tenacity==8.2.3
+threadpoolctl==3.3.0
+tiktoken==0.6.0
+tokenizers==0.15.2
+torch==2.2.0
+torchaudio==2.2.0
+torchvision==0.17.0
+tqdm==4.66.2
+transformers==4.37.2
+typing-inspect==0.9.0
+yarl==1.9.4