Spaces:

MOHAMMED-N
/

Chat_with_NDMO

Running

MOHAMMED-N commited on 4 days ago

Commit

7867b31

verified ·

1 Parent(s): cca3d6b

Create vectorstore.py

Files changed (1) hide show

vectorstore.py ADDED Viewed

+# vectorstore.py
+import os
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_experimental.text_splitter import SemanticChunker
+from langchain_community.vectorstores import FAISS
+def load_or_build_vectorstore(local_file: str, index_folder: str, embeddings):
+    """
+    Loads a local FAISS index if it exists; otherwise,
+    builds a new index from the specified PDF file.
+    """
+    if os.path.exists(index_folder):
+        print("Loading existing FAISS index from disk...")
+        vectorstore = FAISS.load_local(index_folder, embeddings, allow_dangerous_deserialization=True)
+    else:
+        print("Building a new FAISS index...")
+        loader = PyPDFLoader(local_file)
+        documents = loader.load()
+        text_splitter = SemanticChunker(
+            embeddings=embeddings,
+            breakpoint_threshold_type='percentile',
+            breakpoint_threshold_amount=90
+        )
+        chunked_docs = text_splitter.split_documents(documents)
+        print(f"Document split into {len(chunked_docs)} chunks.")
+        vectorstore = FAISS.from_documents(chunked_docs, embeddings)
+        vectorstore.save_local(index_folder)
+    return vectorstore