import streamlit as st import requests import os import json from dotenv import load_dotenv import PyPDF2 import io from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS load_dotenv() # Initialize session state variables if "vectorstore" not in st.session_state: st.session_state.vectorstore = None if "chat_history" not in st.session_state: st.session_state.chat_history = [] def reset_conversation(): st.session_state.vectorstore = None st.session_state.chat_history = [] def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PyPDF2.PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text def get_text_chunks(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text) return chunks def get_vectorstore(text_chunks): embeddings = HuggingFaceEmbeddings() vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) return vectorstore def get_together_response(prompt, history): url = "https://api.together.xyz/v1/chat/completions" model_link = "NousResearch/Nous-Hermes-2-Yi-34B" messages = [{"role": "system", "content": "You are an AI assistant that helps users understand the content of their PDFs. Provide concise and relevant answers based on the information in the documents."}] for human, ai in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": ai}) messages.append({"role": "user", "content": prompt}) payload = { "model": model_link, "messages": messages, "temperature": 0.7, "top_p": 0.95, "top_k": 50, "repetition_penalty": 1, "max_tokens": 1024 } headers = { "accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}" } try: response = requests.post(url, json=payload, headers=headers) response.raise_for_status() return response.json()['choices'][0]['message']['content'] except requests.exceptions.RequestException as e: return f"Error: {str(e)}" def handle_userinput(user_question): if st.session_state.vectorstore: docs = st.session_state.vectorstore.similarity_search(user_question) context = "\n".join([doc.page_content for doc in docs]) prompt = f"Context from PDFs:\n{context}\n\nQuestion: {user_question}\nAnswer:" response = get_together_response(prompt, st.session_state.chat_history) st.session_state.chat_history.append((user_question, response)) return response else: return "Please upload and process PDF documents first." # Streamlit application st.set_page_config(page_title="Chat with your PDFs", page_icon=":books:") st.header("Chat with your PDFs :books:") # Sidebar with st.sidebar: st.subheader("Your documents") pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'", accept_multiple_files=True) if st.button("Process"): with st.spinner("Processing"): # Get PDF text raw_text = get_pdf_text(pdf_docs) # Get the text chunks text_chunks = get_text_chunks(raw_text) # Create vector store st.session_state.vectorstore = get_vectorstore(text_chunks) st.success("PDFs processed successfully!") st.button('Reset Chat', on_click=reset_conversation) # Main chat interface if st.session_state.vectorstore is None: st.write("Please upload PDF documents and click 'Process' to start chatting.") else: user_question = st.text_input("Ask a question about your documents:") if user_question: response = handle_userinput(user_question) st.write("Human: " + user_question) st.write("AI: " + response) # Display chat history st.subheader("Chat History") for human, ai in st.session_state.chat_history: st.write("Human: " + human) st.write("AI: " + ai) st.write("---")