Spaces:
Sleeping
Sleeping
Daniel Foley
commited on
Commit
·
3e602d9
1
Parent(s):
ea5cbee
more robust time analysis for reranking
Browse files
RAG.py
CHANGED
@@ -68,6 +68,7 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
|
|
68 |
return []
|
69 |
|
70 |
full_docs = []
|
|
|
71 |
for doc in documents:
|
72 |
if not doc.metadata.get('source'):
|
73 |
continue
|
@@ -79,7 +80,7 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
|
|
79 |
text_content = extract_text_from_json(json_data)
|
80 |
if text_content: # Only add documents with actual content
|
81 |
full_docs.append(Document(page_content=text_content, metadata={"source":doc.metadata['source'],"field":doc.metadata['field'],"URL":url}))
|
82 |
-
|
83 |
# If no valid documents were processed, return empty list
|
84 |
if not full_docs:
|
85 |
return []
|
@@ -150,7 +151,7 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
|
|
150 |
query_prompt = query_template.invoke({"query":query})
|
151 |
query_response = llm.invoke(query_prompt)
|
152 |
new_query = parse_xml_and_query(query=query,xml_string=query_response.content)
|
153 |
-
|
154 |
|
155 |
retrieved, _ = retrieve(query=new_query, vectorstore=vectorstore, k=k)
|
156 |
if not retrieved:
|
|
|
68 |
return []
|
69 |
|
70 |
full_docs = []
|
71 |
+
meta_start = time.time()
|
72 |
for doc in documents:
|
73 |
if not doc.metadata.get('source'):
|
74 |
continue
|
|
|
80 |
text_content = extract_text_from_json(json_data)
|
81 |
if text_content: # Only add documents with actual content
|
82 |
full_docs.append(Document(page_content=text_content, metadata={"source":doc.metadata['source'],"field":doc.metadata['field'],"URL":url}))
|
83 |
+
logging.info(f"Took {time.time()-meta_start} seconds to retrieve all metadata")
|
84 |
# If no valid documents were processed, return empty list
|
85 |
if not full_docs:
|
86 |
return []
|
|
|
151 |
query_prompt = query_template.invoke({"query":query})
|
152 |
query_response = llm.invoke(query_prompt)
|
153 |
new_query = parse_xml_and_query(query=query,xml_string=query_response.content)
|
154 |
+
logging.info(f"Old_Query: {query},New_Query: {new_query}")
|
155 |
|
156 |
retrieved, _ = retrieve(query=new_query, vectorstore=vectorstore, k=k)
|
157 |
if not retrieved:
|