pdfchat / app.py
ogegadavis254's picture
Update app.py
399202c verified
import streamlit as st
import requests
import os
import json
from dotenv import load_dotenv
import PyPDF2
import io
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
load_dotenv()
# Initialize session state variables
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
def reset_conversation():
st.session_state.vectorstore = None
st.session_state.chat_history = []
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PyPDF2.PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_together_response(prompt, history):
url = "https://api.together.xyz/v1/chat/completions"
model_link = "NousResearch/Nous-Hermes-2-Yi-34B"
messages = [{"role": "system", "content": "You are an AI assistant that helps users understand the content of their PDFs. Provide concise and relevant answers based on the information in the documents."}]
for human, ai in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": ai})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model_link,
"messages": messages,
"temperature": 0.7,
"top_p": 0.95,
"top_k": 50,
"repetition_penalty": 1,
"max_tokens": 1024
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"
}
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
return response.json()['choices'][0]['message']['content']
except requests.exceptions.RequestException as e:
return f"Error: {str(e)}"
def handle_userinput(user_question):
if st.session_state.vectorstore:
docs = st.session_state.vectorstore.similarity_search(user_question)
context = "\n".join([doc.page_content for doc in docs])
prompt = f"Context from PDFs:\n{context}\n\nQuestion: {user_question}\nAnswer:"
response = get_together_response(prompt, st.session_state.chat_history)
st.session_state.chat_history.append((user_question, response))
return response
else:
return "Please upload and process PDF documents first."
# Streamlit application
st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:")
st.header("Chat with your PDFs :books:")
# Sidebar
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
# Get PDF text
raw_text = get_pdf_text(pdf_docs)
# Get the text chunks
text_chunks = get_text_chunks(raw_text)
# Create vector store
st.session_state.vectorstore = get_vectorstore(text_chunks)
st.success("PDFs processed successfully!")
st.button('Reset Chat', on_click=reset_conversation)
# Main chat interface
if st.session_state.vectorstore is None:
st.write("Please upload PDF documents and click 'Process' to start chatting.")
else:
user_question = st.text_input("Ask a question about your documents:")
if user_question:
response = handle_userinput(user_question)
st.write("Human: " + user_question)
st.write("AI: " + response)
# Display chat history
st.subheader("Chat History")
for human, ai in st.session_state.chat_history:
st.write("Human: " + human)
st.write("AI: " + ai)
st.write("---")