Spaces:
Sleeping
Sleeping
File size: 4,360 Bytes
9f54a3b 26d6aae dde565b 0e00146 dde565b 1110d7a dde565b a9c7401 1110d7a 399202c 1110d7a 26d6aae dde565b 399202c 1110d7a 399202c 1110d7a 399202c 866f7a8 26d6aae 1110d7a 399202c 1110d7a 399202c 1110d7a 399202c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import streamlit as st
import requests
import os
import json
from dotenv import load_dotenv
import PyPDF2
import io
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
load_dotenv()
# Initialize session state variables
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
def reset_conversation():
st.session_state.vectorstore = None
st.session_state.chat_history = []
def get_pdf_text(pdf_docs):
text = ""
for pdf in pdf_docs:
pdf_reader = PyPDF2.PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
embeddings = HuggingFaceEmbeddings()
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_together_response(prompt, history):
url = "https://api.together.xyz/v1/chat/completions"
model_link = "NousResearch/Nous-Hermes-2-Yi-34B"
messages = [{"role": "system", "content": "You are an AI assistant that helps users understand the content of their PDFs. Provide concise and relevant answers based on the information in the documents."}]
for human, ai in history:
messages.append({"role": "user", "content": human})
messages.append({"role": "assistant", "content": ai})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model_link,
"messages": messages,
"temperature": 0.7,
"top_p": 0.95,
"top_k": 50,
"repetition_penalty": 1,
"max_tokens": 1024
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"
}
try:
response = requests.post(url, json=payload, headers=headers)
response.raise_for_status()
return response.json()['choices'][0]['message']['content']
except requests.exceptions.RequestException as e:
return f"Error: {str(e)}"
def handle_userinput(user_question):
if st.session_state.vectorstore:
docs = st.session_state.vectorstore.similarity_search(user_question)
context = "\n".join([doc.page_content for doc in docs])
prompt = f"Context from PDFs:\n{context}\n\nQuestion: {user_question}\nAnswer:"
response = get_together_response(prompt, st.session_state.chat_history)
st.session_state.chat_history.append((user_question, response))
return response
else:
return "Please upload and process PDF documents first."
# Streamlit application
st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:")
st.header("Chat with your PDFs :books:")
# Sidebar
with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
if st.button("Process"):
with st.spinner("Processing"):
# Get PDF text
raw_text = get_pdf_text(pdf_docs)
# Get the text chunks
text_chunks = get_text_chunks(raw_text)
# Create vector store
st.session_state.vectorstore = get_vectorstore(text_chunks)
st.success("PDFs processed successfully!")
st.button('Reset Chat', on_click=reset_conversation)
# Main chat interface
if st.session_state.vectorstore is None:
st.write("Please upload PDF documents and click 'Process' to start chatting.")
else:
user_question = st.text_input("Ask a question about your documents:")
if user_question:
response = handle_userinput(user_question)
st.write("Human: " + user_question)
st.write("AI: " + response)
# Display chat history
st.subheader("Chat History")
for human, ai in st.session_state.chat_history:
st.write("Human: " + human)
st.write("AI: " + ai)
st.write("---") |