Daniel Foley commited on
Commit
3e602d9
·
1 Parent(s): ea5cbee

more robust time analysis for reranking

Browse files
Files changed (1) hide show
  1. RAG.py +3 -2
RAG.py CHANGED
@@ -68,6 +68,7 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
68
  return []
69
 
70
  full_docs = []
 
71
  for doc in documents:
72
  if not doc.metadata.get('source'):
73
  continue
@@ -79,7 +80,7 @@ def rerank(documents: List[Document], query: str) -> List[Document]:
79
  text_content = extract_text_from_json(json_data)
80
  if text_content: # Only add documents with actual content
81
  full_docs.append(Document(page_content=text_content, metadata={"source":doc.metadata['source'],"field":doc.metadata['field'],"URL":url}))
82
-
83
  # If no valid documents were processed, return empty list
84
  if not full_docs:
85
  return []
@@ -150,7 +151,7 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
150
  query_prompt = query_template.invoke({"query":query})
151
  query_response = llm.invoke(query_prompt)
152
  new_query = parse_xml_and_query(query=query,xml_string=query_response.content)
153
- print(f"New_Query: {new_query}")
154
 
155
  retrieved, _ = retrieve(query=new_query, vectorstore=vectorstore, k=k)
156
  if not retrieved:
 
68
  return []
69
 
70
  full_docs = []
71
+ meta_start = time.time()
72
  for doc in documents:
73
  if not doc.metadata.get('source'):
74
  continue
 
80
  text_content = extract_text_from_json(json_data)
81
  if text_content: # Only add documents with actual content
82
  full_docs.append(Document(page_content=text_content, metadata={"source":doc.metadata['source'],"field":doc.metadata['field'],"URL":url}))
83
+ logging.info(f"Took {time.time()-meta_start} seconds to retrieve all metadata")
84
  # If no valid documents were processed, return empty list
85
  if not full_docs:
86
  return []
 
151
  query_prompt = query_template.invoke({"query":query})
152
  query_response = llm.invoke(query_prompt)
153
  new_query = parse_xml_and_query(query=query,xml_string=query_response.content)
154
+ logging.info(f"Old_Query: {query},New_Query: {new_query}")
155
 
156
  retrieved, _ = retrieve(query=new_query, vectorstore=vectorstore, k=k)
157
  if not retrieved: