File size: 5,332 Bytes
9e09635 a9c90e6 9e09635 691a796 a9c90e6 78c193e 1863239 78c193e a9c90e6 691a796 9e09635 a9c90e6 9e09635 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# Import modules and classes
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings, NVIDIARerank
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.embeddings.utils import resolve_embed_model
from llama_index.core.query_engine import TransformQueryEngine
from langchain_core.documents import Document as LangDocument
from llama_index.core import Document as LlamaDocument
from llama_index.core import Settings
from llama_parse import LlamaParse
import streamlit as st
import os
# Set environmental variables
nvidia_api_key = os.getenv("NVIDIA_KEY")
llamaparse_api_key = os.getenv("PARSE_KEY")
# Initialize ChatNVIDIA, NVIDIARerank, and NVIDIAEmbeddings
client = ChatNVIDIA(
model="meta/llama-3.1-8b-instruct",
api_key=nvidia_api_key,
temperature=0.2,
top_p=0.7,
max_tokens=1024
)
# Custom resolve_embed_model function without callback_manager for NVIDIAEmbeddings
def custom_resolve_embed_model(embed_model):
if isinstance(embed_model, NVIDIAEmbeddings):
return embed_model
embed_model = resolve_embed_model(embed_model)
if hasattr(embed_model, 'callback_manager'):
embed_model.callback_manager = Settings.callback_manager
return embed_model
embed_model = NVIDIAEmbeddings(
model="nvidia/nv-embedqa-e5-v5",
api_key=nvidia_api_key,
truncate="NONE"
)
reranker = NVIDIARerank(
model="nvidia/nv-rerankqa-mistral-4b-v3",
api_key=nvidia_api_key,
)
# Set the NVIDIA models globally
Settings.embed_model = custom_resolve_embed_model(embed_model)
Settings.llm = client
# Parse the local PDF document
parser = LlamaParse(
api_key=llamaparse_api_key,
result_type="markdown",
verbose=True
)
documents = parser.load_data("C:\\Users\\user\\Documents\\Jan 2024\\Projects\\RAGs\\Files\\PhilDataset.pdf")
print("Document Parsed")
# Split parsed text into chunks for embedding model
def split_text(text, max_tokens=512):
words = text.split()
chunks = []
current_chunk = []
current_length = 0
for word in words:
word_length = len(word)
if current_length + word_length + 1 > max_tokens:
chunks.append(" ".join(current_chunk))
current_chunk = [word]
current_length = word_length + 1
else:
current_chunk.append(word)
current_length += word_length + 1
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
# Generate embeddings for document chunks
all_embeddings = []
all_documents = []
for doc in documents:
text_chunks = split_text(doc.text)
for chunk in text_chunks:
embedding = embed_model.embed_query(chunk)
all_embeddings.append(embedding)
all_documents.append(LlamaDocument(text=chunk))
print("Embeddings generated")
# Create and persist index with NVIDIAEmbeddings
index = VectorStoreIndex.from_documents(all_documents, embeddings=all_embeddings, embed_model=embed_model)
index.set_index_id("vector_index")
index.storage_context.persist("./storage")
print("Index created")
# Load index from storage
storage_context = StorageContext.from_defaults(persist_dir="storage")
index = load_index_from_storage(storage_context, index_id="vector_index")
print("Index loaded")
# Initialize HyDEQueryTransform and TransformQueryEngine
hyde = HyDEQueryTransform(include_original=True)
query_engine = index.as_query_engine()
hyde_query_engine = TransformQueryEngine(query_engine, hyde)
# Query the index with HyDE and use output as LLM context
def query_model_with_context(question):
# Generate a hypothetical document using HyDE
hyde_response = hyde_query_engine.query(question)
print(f"HyDE Response: {hyde_response}")
if isinstance(hyde_response, str):
hyde_query = hyde_response
else:
hyde_query = hyde_response.response
# Use the hypothetical document to retrieve relevant documents
retriever = index.as_retriever(similarity_top_k=3)
nodes = retriever.retrieve(hyde_query)
for node in nodes:
print(node)
# Rerank the retrieved documents
ranked_documents = reranker.compress_documents(
query=question,
documents=[LangDocument(page_content=node.text) for node in nodes]
)
# Print the most relevant and least relevant node
print(f"Most relevant node: {ranked_documents[0].page_content}")
# Use the most relevant node as context
context = ranked_documents[0].page_content
# Send context and question to the client (NVIDIA Llama 3.1 8B model)
messages = [
{"role": "system", "content": context},
{"role": "user", "content": str(question)}
]
completion = client.stream(messages)
# Process response
response_text = ""
for chunk in completion:
if chunk.content is not None:
response_text += chunk.content
return response_text
# Streamlit UI
st.title("Chat with HyDE + Rerank RAG App")
question = st.text_input("Enter your question:")
if st.button("Submit"):
if question:
st.write("**RAG Response:**")
response = query_model_with_context(question)
st.write(response)
else:
st.warning("Please enter a question.") |