Daniel Foley commited on
Commit
07f86c6
·
1 Parent(s): 1d172f1

moved length checking upstream to retrieval

Browse files
Files changed (1) hide show
  1. RAG.py +3 -3
RAG.py CHANGED
@@ -70,6 +70,9 @@ def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 1000) -> Tuple
70
  documents = []
71
  scores = []
72
  for res, score in results:
 
 
 
73
  documents.append(res)
74
  scores.append(score)
75
  logging.info(f"Finished Retrieval: {time.time() - start}")
@@ -233,9 +236,6 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
233
 
234
  # Generate response
235
  ans_prompt = answer_template.invoke({"context": context, "query": query})
236
- # Max input tokens is 10,000 for 4o-mini. This is a quick and dirty solution
237
- if len(ans_prompt) > 30000:
238
- ans_prompt = ans_prompt[:30000]
239
  response = llm.invoke(ans_prompt)
240
 
241
  # Parse and return response
 
70
  documents = []
71
  scores = []
72
  for res, score in results:
73
+ # check to make sure response isnt too long for context window of 4o-mini
74
+ if len(res.page_content) > 4000:
75
+ res.page_content = res.page_content[:4000]
76
  documents.append(res)
77
  scores.append(score)
78
  logging.info(f"Finished Retrieval: {time.time() - start}")
 
236
 
237
  # Generate response
238
  ans_prompt = answer_template.invoke({"context": context, "query": query})
 
 
 
239
  response = llm.invoke(ans_prompt)
240
 
241
  # Parse and return response