Spaces:

spark-ds549
/

LibRAG

Running

App Files Files Community

Daniel Foley commited on Dec 9, 2024

Commit

26a5c64

1 Parent(s): 89e7a42

Added query alignment to retrieval

Browse files

Files changed (1) hide show

RAG.py +48 -6

RAG.py CHANGED Viewed

@@ -87,6 +87,19 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
     reranked_docs = reranker.invoke(query)
     return reranked_docs
 def parse_xml_and_check(xml_string: str) -> str:
     """Parse XML-style tags and handle validation."""
     if not xml_string:
@@ -105,12 +118,42 @@ def RAG(llm: Any, query: str, index_name: str, embeddings: Any, top: int = 10, k
     """Main RAG function with improved error handling and validation."""
     try:
         # Retrieve initial documents
-        retrieved, _ = retrieve(index_name=index_name, query=query, embeddings=embeddings, k=k)
         if not retrieved:
             return "No documents found for your query.", []
         # Rerank documents
-        reranked = rerank(documents=retrieved, query=query)
         if not reranked:
             return "Unable to process the retrieved documents.", []
@@ -120,7 +163,7 @@ def RAG(llm: Any, query: str, index_name: str, embeddings: Any, top: int = 10, k
             return "No relevant content found in the documents.", []
         # Prepare prompt
-        prompt_template = PromptTemplate.from_template(
             """Pretend you are a professional librarian. Please Summarize The Following Context as though you had retrieved it for a patron:
             Context:{context}
             Make sure to answer in the following format
@@ -143,9 +186,8 @@ def RAG(llm: Any, query: str, index_name: str, embeddings: Any, top: int = 10, k
         )
         # Generate response
-        prompt = prompt_template.invoke({"context": context, "query": query})
-        print(prompt)
-        response = llm.invoke(prompt)
         # Parse and return response
         parsed = parse_xml_and_check(response.content)

     reranked_docs = reranker.invoke(query)
     return reranked_docs
+def parse_xml_and_query(query:str,xml_string:str) -> str:
+    """parse xml and return rephrased query"""
+    if not xml_string:
+        return "No response generated."
+    pattern = r"<(\w+)>(.*?)</\1>"
+    matches = re.findall(pattern, xml_string, re.DOTALL)
+    parsed_response = dict(matches)
+    if parsed_response.get('VALID') == 'NO':
+        return query
+    return parsed_response.get('STATEMENT', query)
 def parse_xml_and_check(xml_string: str) -> str:
     """Parse XML-style tags and handle validation."""
     if not xml_string:
     """Main RAG function with improved error handling and validation."""
     try:
         # Retrieve initial documents
+        query_template = PromptTemplate.from_template(
+            """
+            Your job is to think about a query and then generate a statement that only includes information from the query that would answer the query.
+            You will be provided with a query in <QUERY></QUERY> tags.
+            Then you will think about what kind of information the query is looking for between <REASONING></REASONING> tags.
+            Then, based on the reasoning, you will generate a sample response to the query that only includes information from the query between <STATEMENT></STATEMENT> tags.
+            Afterwards, you will determine and reason about whether or not the statement you generated only includes information from the original query and would answer the query between <DETERMINATION></DETERMINATION> tags.
+            Finally, you will return a YES, or NO response between <VALID></VALID> tags based on whether or not you determined the statment to be valid.
+            Let me provide you with an exmaple:
+            <QUERY>I would really like to learn more about Bermudan geography<QUERY>
+            <REASONING>This query is interested in geograph as it relates to Bermuda. Some things they might be interested in are Bermudan climate, towns, cities, and geography</REASONING>
+            <STATEMENT>Bermuda's Climate is [blank]. Some of Bermuda's cities and towns are [blank]. Other points of interested about Bermuda's geography are [blank].</STATEMENT>
+            <DETERMINATION>The query originally only mentions bermuda and geography. The answers do not provide any false information, instead replacing meaningful responses with a placeholder [blank]. If it had hallucinated, it would not be valid. Because the statements do not hallucinate anything, this is a valid statement.</DETERMINATION>
+            <VALID>YES</VALID>
+            Now it's your turn! Remember not to hallucinate:
+            <QUERY>{query}</QUERY>
+            """
+        )
+        query_prompt = query_template.invoke({"query":query})
+        query_response = llm.invoke(query_prompt)
+        new_query = parse_xml_and_query(query_response.content)
+        retrieved, _ = retrieve(index_name=index_name, query=new_query, embeddings=embeddings, k=k)
         if not retrieved:
             return "No documents found for your query.", []
         # Rerank documents
+        reranked = rerank(documents=retrieved, query=new_query)
         if not reranked:
             return "Unable to process the retrieved documents.", []
             return "No relevant content found in the documents.", []
         # Prepare prompt
+        answer_template = PromptTemplate.from_template(
             """Pretend you are a professional librarian. Please Summarize The Following Context as though you had retrieved it for a patron:
             Context:{context}
             Make sure to answer in the following format
         )
         # Generate response
+        ans_prompt = answer_template.invoke({"context": context, "query": query})
+        response = llm.invoke(ans_prompt)
         # Parse and return response
         parsed = parse_xml_and_check(response.content)