Spaces:

wandb
/

paper_reader

Runtime error

App Files Files Community

parambharat commited on Jul 25

Commit

049ff35

•

1 Parent(s): dbb0a0b

chore: improve rag pipeline

Browse files

Files changed (2) hide show

app.py +17 -8
rag/rag.py +72 -22

app.py CHANGED Viewed

@@ -1,15 +1,21 @@
 import os
-os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
-import weave
 import streamlit as st
 from rag.rag import SimpleRAGPipeline
-st.set_page_config(page_title="Chat with the Llama 3 paper!", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None)
-wandb_api_key = st.sidebar.text_input('WANDB_API_KEY', type='password')
-if len(wandb_api_key)>=10:
     os.environ["WANDB_API_KEY"] = wandb_api_key
 else:
     st.stop()
@@ -20,6 +26,7 @@ weave.init(f"{WANDB_PROJECT}")
 st.title("Chat with the Llama 3 paper 💬🦙")
 @st.cache_resource(show_spinner=False)
 def load_rag_pipeline():
     rag_pipeline = SimpleRAGPipeline()
@@ -27,6 +34,7 @@ def load_rag_pipeline():
     return rag_pipeline
 if "rag_pipeline" not in st.session_state.keys():
     st.session_state.rag_pipeline = load_rag_pipeline()
@@ -37,8 +45,9 @@ def generate_response(query):
     response = rag_pipeline.predict(query)
     st.write_stream(response.response_gen)
-with st.form('my_form'):
-    query = st.text_area('Ask your question about the Llama 3 paper here:')
-    submitted = st.form_submit_button('Submit')
     if submitted:
         generate_response(query)

 import os
+os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
 import streamlit as st
+import weave
 from rag.rag import SimpleRAGPipeline
+st.set_page_config(
+    page_title="Chat with the Llama 3 paper!",
+    page_icon="🦙",
+    layout="centered",
+    initial_sidebar_state="auto",
+    menu_items=None,
+)
+wandb_api_key = st.sidebar.text_input("WANDB_API_KEY", type="password")
+if len(wandb_api_key) >= 10:
     os.environ["WANDB_API_KEY"] = wandb_api_key
 else:
     st.stop()
 st.title("Chat with the Llama 3 paper 💬🦙")
 @st.cache_resource(show_spinner=False)
 def load_rag_pipeline():
     rag_pipeline = SimpleRAGPipeline()
     return rag_pipeline
 if "rag_pipeline" not in st.session_state.keys():
     st.session_state.rag_pipeline = load_rag_pipeline()
     response = rag_pipeline.predict(query)
     st.write_stream(response.response_gen)
+with st.form("my_form"):
+    query = st.text_area("Ask your question about the Llama 3 paper here:")
+    submitted = st.form_submit_button("Submit")
     if submitted:
         generate_response(query)

rag/rag.py CHANGED Viewed

@@ -2,42 +2,89 @@ from dotenv import load_dotenv
 load_dotenv()
-import weave
-import pathlib
 import pickle
-from llama_index.core import PromptTemplate
 from llama_index.core.node_parser import MarkdownNodeParser
-from llama_index.core import VectorStoreIndex
-from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core.query_engine import RetrieverQueryEngine
-from llama_index.core import get_response_synthesizer
-from llama_index.llms.openai import OpenAI
 from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core import VectorStoreIndex
 data_dir = "data/raw_docs/documents.pkl"
 with open(data_dir, "rb") as file:
     docs_files = pickle.load(file)
-print(f"Number of files: {len(docs_files)}\n")
 SYSTEM_PROMPT_TEMPLATE = """
-Answer to the user question about the newly released Llama 3 405 billion parameter model based on the context. Provide an helful and complete answer. The paper will have information about the training, inference, evaluation and many developments in Machine Learning.
-Answer based only on the context provided in the documents. The answer should be tehcnical and informative. Do not make up things.
-User Query: {query_str}
-Context: {context_str}
-Answer:
 """
 class SimpleRAGPipeline(weave.Model):
-    chat_llm: str = "gpt-4"
     embedding_model: str = "text-embedding-3-small"
-    temperature: float = 0.0
-    similarity_top_k: int = 2
     chunk_size: int = 512
     chunk_overlap: int = 128
     prompt_template: str = SYSTEM_PROMPT_TEMPLATE
@@ -46,7 +93,7 @@ class SimpleRAGPipeline(weave.Model):
     def _get_llm(self):
         return OpenAI(
             model=self.chat_llm,
-            temperature=0.0,
             max_tokens=4096,
         )
@@ -56,9 +103,9 @@ class SimpleRAGPipeline(weave.Model):
     def _get_text_qa_template(self):
         return PromptTemplate(self.prompt_template)
-    def _load_documents_and_chunk(self, files: pathlib.PosixPath):
         parser = MarkdownNodeParser()
-        nodes = parser.get_nodes_from_documents(docs_files)
         return nodes
     def _create_vector_index(self, nodes):
@@ -109,5 +156,8 @@ if __name__ == "__main__":
     rag_pipeline = SimpleRAGPipeline()
     rag_pipeline.build_query_engine()
-    response = rag_pipeline.predict("What is Llama 3 model?")
-    print(response["response"])

 load_dotenv()
 import pickle
+import weave
+from llama_index.core import PromptTemplate, VectorStoreIndex, get_response_synthesizer
 from llama_index.core.node_parser import MarkdownNodeParser
 from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
 data_dir = "data/raw_docs/documents.pkl"
 with open(data_dir, "rb") as file:
     docs_files = pickle.load(file)
+for i, doc in enumerate(docs_files[:], 1):
+    doc.metadata["page"] = i
 SYSTEM_PROMPT_TEMPLATE = """
+Answer the following question about the newly released Llama 3 405 billion parameter model based on provided snippets from the research paper.
+Provide helpful, complete, and accurate answers to the question using only the information contained in these snippets.
+Here are the relevant snippets from the Llama 3 405B model research paper:
+<snippets>
+{context_str}
+</snippets>
+<question>
+{query_str}
+</question>
+To answer this question:
+1. Carefully read and analyze the provided snippets.
+2. Identify information that is directly relevant to the user's question.
+3. Formulate a comprehensive answer based solely on the information in the snippets.
+4. Do not include any information or claims that are not supported by the provided snippets.
+Guidelines for your answer:
+1. Be technical and informative, providing as much detail as the snippets allow.
+2. If the snippets do not contain enough information to fully answer the question, state this clearly and provide what information you can based on the available snippets.
+3. Do not make up or infer information beyond what is explicitly stated in the snippets.
+4. If the question cannot be answered at all based on the provided snippets, state this clearly and explain why.
+5. Use appropriate technical language and terminology as used in the snippets.
+6. Cite the relevant sentences from the snippets and their page numbers to support your answer.
+7. Answer in MFAQ format (Minimal Facts Answerable Question), providing the most concise and accurate response possible.
+8. Use Markdown to format your response and include citations to indicate the snippets and the page number used to derive your answer.
+Here's an example of a question and an answer. You must use this as a template to format your response:
+<example>
+Question: What was the main mix of the training data ? How much data was used to train the model ?
+### Answer
+The main mix of the training data for the Llama 3 405 billion parameter model is as follows:
+- **General knowledge**: 50%
+- **Mathematical and reasoning tokens**: 25%
+- **Code tokens**: 17%
+- **Multilingual tokens**: 8%[^1^].
+Regarding the amount of data used to train the model, the snippets do not provide a specific total volume of data in terms of tokens or bytes. However, they do mention that the model was pre-trained on a large dataset containing knowledge until the end of 2023[^2^]. Additionally, the training process involved pre-training on 2.87 trillion tokens before further adjustments[^3^].
+### References
+[^1^]: "Scaling Laws for Data Mix," page 6.
+[^2^]: "Pre-Training Data," page 4.
+[^3^]: "Initial Pre-Training," page 14.
+</example>
+Remember, your role is to accurately convey the information from the research paper snippets, not to speculate or provide information from other sources.
+Answer:
 """
 class SimpleRAGPipeline(weave.Model):
+    chat_llm: str = "gpt-4o"
     embedding_model: str = "text-embedding-3-small"
+    temperature: float = 0.1
+    similarity_top_k: int = 15
     chunk_size: int = 512
     chunk_overlap: int = 128
     prompt_template: str = SYSTEM_PROMPT_TEMPLATE
     def _get_llm(self):
         return OpenAI(
             model=self.chat_llm,
+            temperature=self.temperature,
             max_tokens=4096,
         )
     def _get_text_qa_template(self):
         return PromptTemplate(self.prompt_template)
+    def _load_documents_and_chunk(self, documents: list):
         parser = MarkdownNodeParser()
+        nodes = parser.get_nodes_from_documents(documents)
         return nodes
     def _create_vector_index(self, nodes):
     rag_pipeline = SimpleRAGPipeline()
     rag_pipeline.build_query_engine()
+    response = rag_pipeline.predict(
+        "How does the model perform in comparision to gpt4 model?"
+    )
+    for resp in response.response_gen:
+        print(resp, end="")