File size: 4,360 Bytes
9f54a3b
26d6aae
dde565b
0e00146
dde565b
1110d7a
 
 
 
 
dde565b
 
a9c7401
1110d7a
399202c
 
1110d7a
 
26d6aae
dde565b
399202c
1110d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399202c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1110d7a
 
399202c
 
 
 
 
 
 
 
 
 
 
866f7a8
26d6aae
1110d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399202c
 
 
1110d7a
 
 
 
399202c
1110d7a
 
 
 
399202c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
import requests
import os
import json
from dotenv import load_dotenv
import PyPDF2
import io
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

load_dotenv()

# Initialize session state variables
if "vectorstore" not in st.session_state:
    st.session_state.vectorstore = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

def reset_conversation():
    st.session_state.vectorstore = None
    st.session_state.chat_history = []

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PyPDF2.PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vectorstore(text_chunks):
    embeddings = HuggingFaceEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

def get_together_response(prompt, history):
    url = "https://api.together.xyz/v1/chat/completions"
    model_link = "NousResearch/Nous-Hermes-2-Yi-34B"

    messages = [{"role": "system", "content": "You are an AI assistant that helps users understand the content of their PDFs. Provide concise and relevant answers based on the information in the documents."}]
    
    for human, ai in history:
        messages.append({"role": "user", "content": human})
        messages.append({"role": "assistant", "content": ai})
    
    messages.append({"role": "user", "content": prompt})

    payload = {
        "model": model_link,
        "messages": messages,
        "temperature": 0.7,
        "top_p": 0.95,
        "top_k": 50,
        "repetition_penalty": 1,
        "max_tokens": 1024
    }

    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"
    }

    try:
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
    except requests.exceptions.RequestException as e:
        return f"Error: {str(e)}"

def handle_userinput(user_question):
    if st.session_state.vectorstore:
        docs = st.session_state.vectorstore.similarity_search(user_question)
        context = "\n".join([doc.page_content for doc in docs])
        prompt = f"Context from PDFs:\n{context}\n\nQuestion: {user_question}\nAnswer:"
        
        response = get_together_response(prompt, st.session_state.chat_history)
        st.session_state.chat_history.append((user_question, response))

        return response
    else:
        return "Please upload and process PDF documents first."

# Streamlit application
st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:")

st.header("Chat with your PDFs :books:")

# Sidebar
with st.sidebar:
    st.subheader("Your documents")
    pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
    if st.button("Process"):
        with st.spinner("Processing"):
            # Get PDF text
            raw_text = get_pdf_text(pdf_docs)

            # Get the text chunks
            text_chunks = get_text_chunks(raw_text)

            # Create vector store
            st.session_state.vectorstore = get_vectorstore(text_chunks)
            
            st.success("PDFs processed successfully!")

    st.button('Reset Chat', on_click=reset_conversation)

# Main chat interface
if st.session_state.vectorstore is None:
    st.write("Please upload PDF documents and click 'Process' to start chatting.")
else:
    user_question = st.text_input("Ask a question about your documents:")
    if user_question:
        response = handle_userinput(user_question)
        
        st.write("Human: " + user_question)
        st.write("AI: " + response)

# Display chat history
st.subheader("Chat History")
for human, ai in st.session_state.chat_history:
    st.write("Human: " + human)
    st.write("AI: " + ai)
    st.write("---")