import os from dotenv import load_dotenv from langchain_community.document_loaders import UnstructuredPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_groq import ChatGroq from langchain.chains import RetrievalQA # Load environment variables load_dotenv() GROQ_API_KEY = os.getenv("GROQ_API_KEY") os.environ["GROQ_API_KEY"] = GROQ_API_KEY working_dir = os.path.dirname(os.path.abspath(__file__)) # Initialize the embedding model embedding = HuggingFaceEmbeddings() # Initialize the DeepSeek-R1 70B model deepseek_llm = ChatGroq( model="deepseek-r1-distill-llama-70b", temperature=0 ) # Initialize the Llama-3 70B model llama3_llm = ChatGroq( model="llama-3.3-70b-versatile", temperature=0 ) def process_document_to_chromadb(file_name): """Processes a PDF document and stores embeddings in ChromaDB.""" loader = UnstructuredPDFLoader(os.path.join(working_dir, file_name)) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) texts = text_splitter.split_documents(documents) vectordb = Chroma.from_documents( documents=texts, embedding=embedding, persist_directory=os.path.join(working_dir, "doc_vectorstore") ) return "Document successfully processed and stored." def answer_question(user_question): """Retrieves answers from stored documents using DeepSeek-R1 and Llama-3.""" vectordb = Chroma( persist_directory=os.path.join(working_dir, "doc_vectorstore"), embedding_function=embedding ) retriever = vectordb.as_retriever() # DeepSeek-R1 response qa_chain_deepseek = RetrievalQA.from_chain_type( llm=deepseek_llm, chain_type="stuff", retriever=retriever, return_source_documents=True ) response_deepseek = qa_chain_deepseek.invoke({"query": user_question}) answer_deepseek = response_deepseek["result"] # Llama-3 response qa_chain_llama3 = RetrievalQA.from_chain_type( llm=llama3_llm, chain_type="stuff", retriever=retriever, return_source_documents=True ) response_llama3 = qa_chain_llama3.invoke({"query": user_question}) answer_llama3 = response_llama3["result"] return {"answer_deepseek": answer_deepseek, "answer_llama3": answer_llama3}