import os import streamlit as st from langchain_groq import ChatGroq from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain.chains import create_retrieval_chain from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFLoader from langchain_google_genai import GoogleGenerativeAIEmbeddings import tempfile from dotenv import load_dotenv load_dotenv() ## Load the GROQ and Google API key groq_api_key = os.getenv('GROQ_API_KEY') os.environ["GOOGLE_API_KEY"] = os.getenv('GOOGLE_API_KEY') st.title("Gemma Model Document Q&A") llm = ChatGroq(groq_api_key=groq_api_key, model_name="gemma2-9b-it") prompt = ChatPromptTemplate.from_template( """ Answer the questions based on the provided context only. Please provide the most accurate response based on the question {context} Questions: {input} """ ) def vector_embedding(pdf_files): st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") docs = [] for pdf_file in pdf_files: with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(pdf_file.read()) temp_file_path = temp_file.name loader = PyPDFLoader(temp_file_path) docs.extend(loader.load()) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) final_documents = text_splitter.split_documents(docs[:20]) st.session_state.vectors = FAISS.from_documents(final_documents, st.session_state.embeddings) # File uploader uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"]) if uploaded_files and st.button("Process Uploaded Files"): vector_embedding(uploaded_files) st.write("Vector Store DB is Ready") prompt1 = st.text_input("What do you want to ask from the documents?") import time if prompt1: if "vectors" in st.session_state: document_chain = create_stuff_documents_chain(llm, prompt) retriever = st.session_state.vectors.as_retriever() retrieval_chain = create_retrieval_chain(retriever, document_chain) start = time.process_time() response = retrieval_chain.invoke({'input': prompt1}) st.write(response['answer']) # With a Streamlit expander with st.expander("Document Similarity Search"): # Find the relevant chunks for i, doc in enumerate(response["context"]): st.write(doc.page_content) st.write("--------------------------------") else: st.write("Please upload and process PDF files first.")