Spaces:
Running
Running
Daniel Foley
commited on
Commit
·
07f86c6
1
Parent(s):
1d172f1
moved length checking upstream to retrieval
Browse files
RAG.py
CHANGED
@@ -70,6 +70,9 @@ def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 1000) -> Tuple
|
|
70 |
documents = []
|
71 |
scores = []
|
72 |
for res, score in results:
|
|
|
|
|
|
|
73 |
documents.append(res)
|
74 |
scores.append(score)
|
75 |
logging.info(f"Finished Retrieval: {time.time() - start}")
|
@@ -233,9 +236,6 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
|
|
233 |
|
234 |
# Generate response
|
235 |
ans_prompt = answer_template.invoke({"context": context, "query": query})
|
236 |
-
# Max input tokens is 10,000 for 4o-mini. This is a quick and dirty solution
|
237 |
-
if len(ans_prompt) > 30000:
|
238 |
-
ans_prompt = ans_prompt[:30000]
|
239 |
response = llm.invoke(ans_prompt)
|
240 |
|
241 |
# Parse and return response
|
|
|
70 |
documents = []
|
71 |
scores = []
|
72 |
for res, score in results:
|
73 |
+
# check to make sure response isnt too long for context window of 4o-mini
|
74 |
+
if len(res.page_content) > 4000:
|
75 |
+
res.page_content = res.page_content[:4000]
|
76 |
documents.append(res)
|
77 |
scores.append(score)
|
78 |
logging.info(f"Finished Retrieval: {time.time() - start}")
|
|
|
236 |
|
237 |
# Generate response
|
238 |
ans_prompt = answer_template.invoke({"context": context, "query": query})
|
|
|
|
|
|
|
239 |
response = llm.invoke(ans_prompt)
|
240 |
|
241 |
# Parse and return response
|