|
import tempfile |
|
import streamlit as st |
|
from streamlit_chat import message |
|
|
|
import torch |
|
import torch.nn |
|
|
|
import transformers |
|
from transformers import ( |
|
AutoModelForCausalLM, |
|
AutoTokenizer, |
|
BitsAndBytesConfig, |
|
HfArgumentParser, |
|
TrainingArguments, |
|
pipeline, |
|
logging, |
|
) |
|
|
|
|
|
import pandas as pd |
|
import numpy as np |
|
import os |
|
import io |
|
|
|
from langchain.document_loaders import TextLoader |
|
from langchain import PromptTemplate |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.chains import RetrievalQA |
|
from langchain import HuggingFacePipeline |
|
from langchain.chains import ConversationalRetrievalChain |
|
|
|
from helper import pdf_loader,splitDoc,makeEmbeddings,create_flan_t5_base |
|
|
|
|
|
def conversational_chat(chain,query): |
|
result = chain({"question": query, |
|
"chat_history": st.session_state['history']}) |
|
st.session_state['history'].append((query, result["answer"])) |
|
|
|
return result["answer"] |
|
|
|
|
|
def ui(): |
|
st.title('PDF Question Answer Bot') |
|
|
|
llm = create_flan_t5_base(load_in_8bit=False) |
|
hf_llm = HuggingFacePipeline(pipeline=llm) |
|
|
|
uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"]) |
|
|
|
if uploaded_file is not None: |
|
save_path = "./uploaded_file.pdf" |
|
with open(save_path, "wb") as f: |
|
f.write(uploaded_file.read()) |
|
|
|
|
|
pdf_doc=pdf_loader('./uploaded_file.pdf') |
|
pdf_doc=splitDoc(pdf_doc) |
|
vector_database = makeEmbeddings(pdf_doc) |
|
|
|
retriever = vector_database.as_retriever(search_kwargs={"k":10}) |
|
qa_chain = ConversationalRetrievalChain.from_llm(llm = hf_llm, |
|
retriever=vector_database.as_retriever()) |
|
|
|
|
|
pdf_loader_container = st.empty() |
|
|
|
|
|
if uploaded_file is not None: |
|
st.text("The file has been uploaded successfully") |
|
|
|
pdf_loader_container.empty() |
|
|
|
show_chat_interface(qa_chain) |
|
|
|
def show_chat_interface(qa_chain): |
|
if 'history' not in st.session_state: |
|
st.session_state['history'] = [] |
|
|
|
if 'generated' not in st.session_state: |
|
st.session_state['generated'] = ["Hello ! Ask me anything about the Uploaded PDF " + " π€"] |
|
|
|
if 'past' not in st.session_state: |
|
st.session_state['past'] = ["Hey ! π"] |
|
|
|
response_container = st.container() |
|
|
|
container = st.container() |
|
|
|
with container: |
|
with st.form(key='my_form', clear_on_submit=True): |
|
|
|
user_input = st.text_input("Query:", placeholder="Talk about your PDF data here (:", key='input') |
|
submit_button = st.form_submit_button(label='Send') |
|
|
|
if submit_button and user_input: |
|
output = conversational_chat(qa_chain,user_input) |
|
|
|
st.session_state['past'].append(user_input) |
|
st.session_state['generated'].append(output) |
|
|
|
if st.session_state['generated']: |
|
with response_container: |
|
for i in range(len(st.session_state['generated'])): |
|
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile") |
|
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs") |
|
|
|
|
|
if __name__=='__main__': |
|
ui() |