Spaces:
Running
Running
File size: 2,470 Bytes
04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf 1863754 04707cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
working_dir = os.path.dirname(os.path.abspath(__file__))
# Initialize the embedding model
embedding = HuggingFaceEmbeddings()
# Initialize the DeepSeek-R1 70B model
deepseek_llm = ChatGroq(
model="deepseek-r1-distill-llama-70b",
temperature=0
)
# Initialize the Llama-3 70B model
llama3_llm = ChatGroq(
model="llama-3.3-70b-versatile",
temperature=0
)
def process_document_to_chromadb(file_name):
"""Processes a PDF document and stores embeddings in ChromaDB."""
loader = UnstructuredPDFLoader(os.path.join(working_dir, file_name))
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
vectordb = Chroma.from_documents(
documents=texts,
embedding=embedding,
persist_directory=os.path.join(working_dir, "doc_vectorstore")
)
return "Document successfully processed and stored."
def answer_question(user_question):
"""Retrieves answers from stored documents using DeepSeek-R1 and Llama-3."""
vectordb = Chroma(
persist_directory=os.path.join(working_dir, "doc_vectorstore"),
embedding_function=embedding
)
retriever = vectordb.as_retriever()
# DeepSeek-R1 response
qa_chain_deepseek = RetrievalQA.from_chain_type(
llm=deepseek_llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
response_deepseek = qa_chain_deepseek.invoke({"query": user_question})
answer_deepseek = response_deepseek["result"]
# Llama-3 response
qa_chain_llama3 = RetrievalQA.from_chain_type(
llm=llama3_llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
response_llama3 = qa_chain_llama3.invoke({"query": user_question})
answer_llama3 = response_llama3["result"]
return {"answer_deepseek": answer_deepseek, "answer_llama3": answer_llama3}
|