Spaces:
Runtime error
Runtime error
import streamlit as st | |
from dotenv import load_dotenv | |
import pickle | |
from PyPDF2 import PdfReader | |
from streamlit_extras.add_vertical_space import add_vertical_space | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.llms import OpenAI | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.callbacks import get_openai_callback | |
import os | |
with st.sidebar: | |
st.title('PDF Chat App') | |
st.markdown(''' | |
## About | |
This app is an LLM-powered PDF chatbot built using: | |
- [Streamlit](https://streamlit.io/) | |
- [LangChain](https://python.langchain.com/) | |
- [OpenAI](https://platform.openai.com/docs/models) LLM model | |
## How it works | |
- Load up a PDF file | |
- Extract the text from the PDF file | |
- Split the text into chunks | |
- Create embeddings using OpenAI, which are vectors of floating-point numbers that measure the relatedness of text strings | |
- Save these embeddings as vectors in a vector store, such as FAISS | |
- Use a similarity search to ask a question | |
- Get the answer and tokens used from OpenAI | |
''') | |
st.write('Made with 🤖 by [Cazimir Roman](https://cazimir.dev)') | |
def load_app(): | |
# upload a PDF file | |
pdf = st.file_uploader("Upload your PDF", type='pdf') | |
if pdf is not None: | |
pdf_reader = PdfReader(pdf) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size = 1000, | |
chunk_overlap=200, | |
length_function=len | |
) | |
chunks = text_splitter.split_text(text=text) | |
store_name = pdf.name[:-4] | |
# check if vector store exists. if not, create one | |
if os.path.exists(f"{store_name}.pkl"): | |
with open(f"{store_name}.pkl", "rb") as f: | |
vectorStore = pickle.load(f) | |
st.success('Text embeddings loaded from disk') | |
else: | |
with st.spinner("Creating vector store embeddings..."): | |
embeddings = OpenAIEmbeddings() | |
vectorStore = FAISS.from_texts(chunks, embeddings) | |
with open(f"{store_name}.pkl", "wb") as f: | |
pickle.dump(vectorStore, f) | |
st.success('Embeddings computation completed') | |
# Accept user question/query | |
st.divider() | |
query = st.text_input("Ask a question about your PDF file") | |
if query: | |
st.write(f"You asked: {query}") | |
with st.spinner("Thinking..."): | |
# top 3 that are most similar to our query | |
docs = vectorStore.similarity_search(query) | |
llm = OpenAI(temperature=0) | |
chain = load_qa_chain(llm=llm, chain_type="stuff") | |
with get_openai_callback() as cb: | |
response = chain.run(input_documents=docs, question=query) | |
st.write(response) | |
def main(): | |
print("Main called") | |
st.header("Chat with your PDF") | |
container = st.container() | |
with container: | |
open_ai_key = os.getenv("OPENAI_API_KEY") | |
api_key = container.text_input("Enter your OpenAI API key", type="password", value="" if open_ai_key == None else open_ai_key) | |
# You can find it here: https://platform.openai.com/account/api-keys | |
submit = container.button("Submit") | |
if open_ai_key: | |
load_app() | |
# submit button is pressed | |
if submit: | |
# check if api key length correct | |
if len(api_key) == 51: | |
os.environ["OPENAI_API_KEY"] = api_key | |
load_app() | |
else: | |
st.error("Api key is not correct") | |
if __name__ == '__main__': | |
main() |