pdfchat

Sleeping

File size: 4,360 Bytes

import streamlit as st
import requests
import os
import json
from dotenv import load_dotenv
import PyPDF2
import io
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

load_dotenv()

# Initialize session state variables
if "vectorstore" not in st.session_state:
    st.session_state.vectorstore = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

def reset_conversation():
    st.session_state.vectorstore = None
    st.session_state.chat_history = []

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PyPDF2.PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = HuggingFaceEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

def get_together_response(prompt, history):
    url = "https://api.together.xyz/v1/chat/completions"
    model_link = "NousResearch/Nous-Hermes-2-Yi-34B"

    messages = [{"role": "system", "content": "You are an AI assistant that helps users understand the content of their PDFs. Provide concise and relevant answers based on the information in the documents."}]
    
    for human, ai in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": ai})
    
    messages.append({"role": "user", "content": prompt})

    payload = {
        "model": model_link,
        "messages": messages,
        "temperature": 0.7,
        "top_p": 0.95,
        "top_k": 50,
        "repetition_penalty": 1,
        "max_tokens": 1024
    }

    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"
    }

    try:
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
    except requests.exceptions.RequestException as e:
        return f"Error: {str(e)}"

def handle_userinput(user_question):
    if st.session_state.vectorstore:
        docs = st.session_state.vectorstore.similarity_search(user_question)
        context = "\n".join([doc.page_content for doc in docs])
        prompt = f"Context from PDFs:\n{context}\n\nQuestion: {user_question}\nAnswer:"
        
        response = get_together_response(prompt, st.session_state.chat_history)
        st.session_state.chat_history.append((user_question, response))

        return response
    else:
        return "Please upload and process PDF documents first."

# Streamlit application
st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:")

st.header("Chat with your PDFs :books:")

# Sidebar
with st.sidebar:
    st.subheader("Your documents")
    pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
    if st.button("Process"):
        with st.spinner("Processing"):
            # Get PDF text
            raw_text = get_pdf_text(pdf_docs)

            # Get the text chunks
            text_chunks = get_text_chunks(raw_text)

            # Create vector store
            st.session_state.vectorstore = get_vectorstore(text_chunks)
            
            st.success("PDFs processed successfully!")

    st.button('Reset Chat', on_click=reset_conversation)

# Main chat interface
if st.session_state.vectorstore is None:
    st.write("Please upload PDF documents and click 'Process' to start chatting.")
else:
    user_question = st.text_input("Ask a question about your documents:")
    if user_question:
        response = handle_userinput(user_question)
        
        st.write("Human: " + user_question)
        st.write("AI: " + response)

# Display chat history
st.subheader("Chat History")
for human, ai in st.session_state.chat_history:
    st.write("Human: " + human)
    st.write("AI: " + ai)
    st.write("---")