Spaces:
Running
Running
import os | |
from dotenv import load_dotenv | |
from langchain_community.document_loaders import UnstructuredPDFLoader | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_chroma import Chroma | |
from langchain_groq import ChatGroq | |
from langchain.chains import RetrievalQA | |
# Load environment variables | |
load_dotenv() | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
os.environ["GROQ_API_KEY"] = GROQ_API_KEY | |
working_dir = os.path.dirname(os.path.abspath(__file__)) | |
# Initialize the embedding model | |
embedding = HuggingFaceEmbeddings() | |
# Initialize the DeepSeek-R1 70B model | |
deepseek_llm = ChatGroq( | |
model="deepseek-r1-distill-llama-70b", | |
temperature=0 | |
) | |
# Initialize the Llama-3 70B model | |
llama3_llm = ChatGroq( | |
model="llama-3.3-70b-versatile", | |
temperature=0 | |
) | |
def process_document_to_chromadb(file_name): | |
"""Processes a PDF document and stores embeddings in ChromaDB.""" | |
loader = UnstructuredPDFLoader(os.path.join(working_dir, file_name)) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) | |
texts = text_splitter.split_documents(documents) | |
vectordb = Chroma.from_documents( | |
documents=texts, | |
embedding=embedding, | |
persist_directory=os.path.join(working_dir, "doc_vectorstore") | |
) | |
return "Document successfully processed and stored." | |
def answer_question(user_question): | |
"""Retrieves answers from stored documents using DeepSeek-R1 and Llama-3.""" | |
vectordb = Chroma( | |
persist_directory=os.path.join(working_dir, "doc_vectorstore"), | |
embedding_function=embedding | |
) | |
retriever = vectordb.as_retriever() | |
# DeepSeek-R1 response | |
qa_chain_deepseek = RetrievalQA.from_chain_type( | |
llm=deepseek_llm, | |
chain_type="stuff", | |
retriever=retriever, | |
return_source_documents=True | |
) | |
response_deepseek = qa_chain_deepseek.invoke({"query": user_question}) | |
answer_deepseek = response_deepseek["result"] | |
# Llama-3 response | |
qa_chain_llama3 = RetrievalQA.from_chain_type( | |
llm=llama3_llm, | |
chain_type="stuff", | |
retriever=retriever, | |
return_source_documents=True | |
) | |
response_llama3 = qa_chain_llama3.invoke({"query": user_question}) | |
answer_llama3 = response_llama3["result"] | |
return {"answer_deepseek": answer_deepseek, "answer_llama3": answer_llama3} | |