|
import os |
|
from PyPDF2 import PdfReader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings |
|
import streamlit as st |
|
import google.generativeai as genai |
|
from langchain.vectorstores import FAISS |
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.prompts import PromptTemplate |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
os.getenv("GOOGLE_API_KEY") |
|
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) |
|
|
|
DEFAULT_PDF_FILES = ["2024_25_Annex_Budget.pdf", "2024_25_Budget_Speech.pdf"] |
|
|
|
|
|
|
|
def get_pdf_text(pdf_docs): |
|
text = "" |
|
for pdf in pdf_docs: |
|
pdf_reader = PdfReader(pdf) |
|
for page in pdf_reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
|
|
|
|
|
|
def get_text_chunks(text): |
|
splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=10000, chunk_overlap=1000) |
|
chunks = splitter.split_text(text) |
|
return chunks |
|
|
|
|
|
|
|
|
|
def get_vector_store(chunks): |
|
embeddings = GoogleGenerativeAIEmbeddings( |
|
model="models/embedding-001") |
|
vector_store = FAISS.from_texts(chunks, embedding=embeddings) |
|
vector_store.save_local("faiss_index") |
|
|
|
|
|
def get_conversational_chain(): |
|
prompt_template = """ |
|
You an economist.Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in |
|
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n |
|
Context:\n {context}?\n |
|
Question: \n{question}\n |
|
|
|
Answer: |
|
""" |
|
|
|
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", |
|
client=genai, |
|
temperature=0.3, |
|
) |
|
prompt = PromptTemplate(template=prompt_template, |
|
input_variables=["context", "question"]) |
|
chain = load_qa_chain(llm=model, chain_type="stuff", prompt=prompt) |
|
return chain |
|
|
|
|
|
def clear_chat_history(): |
|
st.session_state.messages = [ |
|
{"role": "assistant", "content": "upload some pdfs and ask me a question"}] |
|
|
|
|
|
def user_input(user_question): |
|
embeddings = GoogleGenerativeAIEmbeddings( |
|
model="models/embedding-001") |
|
|
|
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) |
|
docs = new_db.similarity_search(user_question) |
|
|
|
chain = get_conversational_chain() |
|
|
|
response = chain( |
|
{"input_documents": docs, "question": user_question}, return_only_outputs=True, ) |
|
|
|
print(response) |
|
return response |
|
|
|
|
|
def main(): |
|
st.set_page_config( |
|
page_title="Gemini PDF Chatbot", |
|
page_icon="🤖" |
|
) |
|
|
|
|
|
with st.sidebar: |
|
st.title("Menu:") |
|
pdf_docs = st.file_uploader( |
|
"Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True) |
|
if st.button("Submit & Process"): |
|
with st.spinner("Processing..."): |
|
for file_name in pdf_docs: |
|
if not os.path.exists(file_name): |
|
st.error(f"Default file '{file_name}' not found!") |
|
return |
|
raw_text = get_pdf_text(pdf_docs) |
|
text_chunks = get_text_chunks(raw_text) |
|
get_vector_store(text_chunks) |
|
st.success("Done") |
|
|
|
|
|
st.title("Chat with the Budget 2024-2025 using Gemini🤖") |
|
st.write("Welcome to the Budget 2024-2025 chatbot!") |
|
st.sidebar.button('Clear Chat History', on_click=clear_chat_history) |
|
|
|
|
|
|
|
|
|
if "messages" not in st.session_state.keys(): |
|
st.session_state.messages = [ |
|
{"role": "assistant", "content": "upload some pdfs and ask me a question"}] |
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.write(message["content"]) |
|
|
|
if prompt := st.chat_input(): |
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
with st.chat_message("user"): |
|
st.write(prompt) |
|
|
|
|
|
if st.session_state.messages[-1]["role"] != "assistant": |
|
with st.chat_message("assistant"): |
|
with st.spinner("Thinking..."): |
|
response = user_input(prompt) |
|
placeholder = st.empty() |
|
full_response = '' |
|
for item in response['output_text']: |
|
full_response += item |
|
placeholder.markdown(full_response) |
|
placeholder.markdown(full_response) |
|
if response is not None: |
|
message = {"role": "assistant", "content": full_response} |
|
st.session_state.messages.append(message) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |