import streamlit as st from dotenv import load_dotenv from streamlit_extras.add_vertical_space import add_vertical_space from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS import pickle from langchain import HuggingFaceHub from langchain.chains.question_answering import load_qa_chain import os def main(pdf): st.header('Chat With PDF') if pdf is not None: pdf_reader = PdfReader(pdf) text = '' for page in pdf_reader.pages: text += page.extract_text() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text=text) store_name = pdf.name[:-4] if os.path.exists(f'{store_name}.pkl'): with open(f'{store_name}.pkl', 'rb') as f: VectorStore = pickle.load(f) else: embeddings = HuggingFaceEmbeddings() VectorStore = FAISS.from_texts(chunks, embedding=embeddings) with open(f'{store_name}.pkl', 'wb') as f: pickle.dump(VectorStore, f) ask_query = st.text_input('Ask question about PDF: ') if ask_query: docs = VectorStore.similarity_search(query=ask_query, k=3) llm = HuggingFaceHub(repo_id="Salesforce/xgen-7b-8k-base", model_kwargs={"temperature": 0, "max_length": 64}) chain = load_qa_chain(llm=llm, chain_type='stuff') response = chain.run(input_documents=docs, question=ask_query) st.write(response) if __name__ == "__main__": load_dotenv() st.sidebar.title('LLM PDF Chats') st.sidebar.markdown(''' ## About - This is LLM power chatbot - By [Prathamesh Shete]('https://www.linkedin.com/in/prathameshshete') ''') add_vertical_space(5) st.sidebar.write('Made By Prathamesh') pdf = st.file_uploader('Upload Your PDF', type='pdf') main(pdf)