Update utils.py
Browse files
utils.py
CHANGED
@@ -320,18 +320,20 @@ def rag_chain(llm, prompt, retriever):
|
|
320 |
relevant_docs=[]
|
321 |
most_relevant_docs=[]
|
322 |
relevant_docs = retriever.get_relevant_documents(prompt)
|
|
|
|
|
323 |
|
324 |
print("releant docs1......................")
|
325 |
-
if (len(
|
326 |
print("releant docs2......................")
|
327 |
-
print(
|
328 |
#llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
|
329 |
#result = llm_chain.run({"context": relevant_docs, "question": prompt})
|
330 |
# Erstelle ein PromptTemplate mit Platzhaltern für Kontext und Frage
|
331 |
#RAG_CHAIN_PROMPT = PromptTemplate(template="Context: {context}\n\nQuestion: {question}\n\nAnswer:")
|
332 |
|
333 |
# Inahlte Abrufen der relevanten Dokumente
|
334 |
-
doc_contents = [doc["content"] for doc in
|
335 |
|
336 |
#Berechne die Ähnlichkeiten und finde das relevanteste Dokument
|
337 |
question_embedding = embedder_modell.encode(prompt, convert_to_tensor=True)
|
@@ -340,7 +342,7 @@ def rag_chain(llm, prompt, retriever):
|
|
340 |
most_relevant_doc_indices = similarity_scores.argsort(descending=True).squeeze().tolist()
|
341 |
|
342 |
#Erstelle eine Liste der relevantesten Dokumente
|
343 |
-
most_relevant_docs = [
|
344 |
|
345 |
#Kombiniere die Inhalte aller relevanten Dokumente
|
346 |
combined_content = " ".join([doc["content"] for doc in most_relevant_docs])
|
@@ -370,6 +372,19 @@ def rag_chain(llm, prompt, retriever):
|
|
370 |
return result
|
371 |
|
372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
|
375 |
|
|
|
320 |
relevant_docs=[]
|
321 |
most_relevant_docs=[]
|
322 |
relevant_docs = retriever.get_relevant_documents(prompt)
|
323 |
+
extracted_docs = extract_document_info(relevant_docs)
|
324 |
+
|
325 |
|
326 |
print("releant docs1......................")
|
327 |
+
if (len(extracted_docs)>0):
|
328 |
print("releant docs2......................")
|
329 |
+
print(extracted_docs)
|
330 |
#llm_chain = LLMChain(llm = llm, prompt = RAG_CHAIN_PROMPT)
|
331 |
#result = llm_chain.run({"context": relevant_docs, "question": prompt})
|
332 |
# Erstelle ein PromptTemplate mit Platzhaltern für Kontext und Frage
|
333 |
#RAG_CHAIN_PROMPT = PromptTemplate(template="Context: {context}\n\nQuestion: {question}\n\nAnswer:")
|
334 |
|
335 |
# Inahlte Abrufen der relevanten Dokumente
|
336 |
+
doc_contents = [doc["content"] for doc in extracted_docs]
|
337 |
|
338 |
#Berechne die Ähnlichkeiten und finde das relevanteste Dokument
|
339 |
question_embedding = embedder_modell.encode(prompt, convert_to_tensor=True)
|
|
|
342 |
most_relevant_doc_indices = similarity_scores.argsort(descending=True).squeeze().tolist()
|
343 |
|
344 |
#Erstelle eine Liste der relevantesten Dokumente
|
345 |
+
most_relevant_docs = [extracted_docs[i] for i in most_relevant_doc_indices]
|
346 |
|
347 |
#Kombiniere die Inhalte aller relevanten Dokumente
|
348 |
combined_content = " ".join([doc["content"] for doc in most_relevant_docs])
|
|
|
372 |
return result
|
373 |
|
374 |
|
375 |
+
# Funktion zum Erstellen der Liste von Dictionaries
|
376 |
+
def extract_document_info(documents):
|
377 |
+
extracted_info = []
|
378 |
+
for doc in documents:
|
379 |
+
info = {
|
380 |
+
'content': doc.page_content,
|
381 |
+
'page': doc.metadata['page'],
|
382 |
+
'path': doc.metadata['source']
|
383 |
+
}
|
384 |
+
extracted_info.append(info)
|
385 |
+
return extracted_info
|
386 |
+
|
387 |
+
|
388 |
|
389 |
|
390 |
|