import streamlit as st #from langchain.retrievers import KNNRetriever from langchain.storage import LocalFileStore from langchain.embeddings import CacheBackedEmbeddings from langchain.vectorstores import FAISS #from streamapp import * from langchain.document_loaders import WebBaseLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from src import vectorstore print("Loading Index Page!!") #if 'vectorstore' in st.session_state.keys(): # vectorstore = st.session_state['vectorstore'] # else: # retriever = initialize_vectorstore() # vectorstore = st.session_state['vectorstore'] def _text_splitter(doc): text_splitter = RecursiveCharacterTextSplitter( chunk_size=600, chunk_overlap=50, length_function=len, ) return text_splitter.transform_documents(doc) def _load_docs(path: str): load_doc = WebBaseLoader(path).load() doc = _text_splitter(load_doc) return doc with st.form("Index documents to Vector Store"): file_path = st.text_input(label="Enter the web link", value="", placeholder="", label_visibility="visible", disabled=False) print("file_path " ,file_path) submitted = st.form_submit_button("Submit") if submitted: st.write("Submitted web link: " + file_path) webpage_loader = _load_docs(file_path) webpage_chunks = _text_splitter(webpage_loader) # store embeddings in vector store print("vectorstore length before addition, ", len(vectorstore.serialize_to_bytes())) vectorstore.add_documents(webpage_chunks) print("vectorstore length after addition, ", len(vectorstore.serialize_to_bytes())) st.session_state['vectorstore'] = vectorstore st.markdown('

Document loaded to vector store successfully!!

', unsafe_allow_html=True)