gauravgulati619 aryanxxvii commited on
Commit
b72fb48
·
verified ·
1 Parent(s): 329e03a

Update app.py (#3)

Browse files

- Update app.py (3b889f09a94995033c8a5d8899a6b88e7f3b0347)


Co-authored-by: Aryan Wadhawan <aryanxxvii@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +49 -48
app.py CHANGED
@@ -38,56 +38,57 @@ embeddings = SentenceTransformerEmbeddings(
38
  device=device
39
  )
40
 
41
- # Define vectorstore paths consistently
42
- VECTORSTORE_DIR = "vectorstore/db_faiss"
43
- vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
44
-
45
- # Create vectorstore directory if it doesn't exist
46
- vectorstore_path.mkdir(parents=True, exist_ok=True)
47
-
48
- if not (vectorstore_path / "index.faiss").exists():
49
- print("Creating new vectorstore...")
50
- # Load and split the PDF
51
- loader = PyPDFLoader("medical.pdf")
52
- documents = loader.load()
53
 
54
- # Split documents into chunks
55
- text_splitter = RecursiveCharacterTextSplitter(
56
- chunk_size=2000,
57
- chunk_overlap=100,
58
- length_function=len,
59
- )
60
- texts = text_splitter.split_documents(documents)
61
 
62
- # Create and save the vectorstore
63
- vectorstore = FAISS.from_documents(texts, embeddings)
64
-
65
- # If CUDA is available, convert index to GPU
66
- if device == "cuda":
67
- res = faiss.StandardGpuResources() # Initialize GPU resources
68
- index = vectorstore.index
69
- gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
70
- vectorstore.index = gpu_index
71
-
72
- # Save the vectorstore
73
- vectorstore.save_local(VECTORSTORE_DIR)
74
- print("Vectorstore created and saved successfully.")
75
- else:
76
- print("Loading existing vectorstore...")
77
- # Load existing vectorstore
78
- vectorstore = FAISS.load_local(
79
- folder_path=VECTORSTORE_DIR,
80
- embeddings=embeddings,
81
- allow_dangerous_deserialization=True
82
- )
83
-
84
- # If CUDA is available, convert loaded index to GPU
85
- if device == "cuda":
86
- res = faiss.StandardGpuResources() # Initialize GPU resources
87
- index = vectorstore.index
88
- gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
89
- vectorstore.index = gpu_index
90
- print("Vectorstore loaded successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def get_relevant_context(query):
93
  try:
 
38
  device=device
39
  )
40
 
41
+ def create_vectorstore():
42
+ # Define vectorstore paths consistently
43
+ VECTORSTORE_DIR = "vectorstore/db_faiss"
44
+ vectorstore_path = pathlib.Path(VECTORSTORE_DIR)
 
 
 
 
 
 
 
 
45
 
46
+ # Create vectorstore directory if it doesn't exist
47
+ vectorstore_path.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
48
 
49
+ if not (vectorstore_path / "index.faiss").exists():
50
+ print("Creating new vectorstore...")
51
+ # Load and split the PDF
52
+ loader = PyPDFLoader("medical.pdf")
53
+ documents = loader.load()
54
+
55
+ # Split documents into chunks
56
+ text_splitter = RecursiveCharacterTextSplitter(
57
+ chunk_size=2000,
58
+ chunk_overlap=100,
59
+ length_function=len,
60
+ )
61
+ texts = text_splitter.split_documents(documents)
62
+
63
+ # Create and save the vectorstore
64
+ vectorstore = FAISS.from_documents(texts, embeddings)
65
+
66
+ # If CUDA is available, convert index to GPU
67
+ if device == "cuda":
68
+ res = faiss.StandardGpuResources() # Initialize GPU resources
69
+ index = vectorstore.index
70
+ gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
71
+ vectorstore.index = gpu_index
72
+
73
+ # Save the vectorstore
74
+ vectorstore.save_local(VECTORSTORE_DIR)
75
+ print("Vectorstore created and saved successfully.")
76
+ else:
77
+ print("Loading existing vectorstore...")
78
+ # Load existing vectorstore
79
+ vectorstore = FAISS.load_local(
80
+ folder_path=VECTORSTORE_DIR,
81
+ embeddings=embeddings,
82
+ allow_dangerous_deserialization=True
83
+ )
84
+
85
+ # If CUDA is available, convert loaded index to GPU
86
+ if device == "cuda":
87
+ res = faiss.StandardGpuResources() # Initialize GPU resources
88
+ index = vectorstore.index
89
+ gpu_index = faiss.index_cpu_to_gpu(res, 0, index) # Move to GPU
90
+ vectorstore.index = gpu_index
91
+ print("Vectorstore loaded successfully.")
92
 
93
  def get_relevant_context(query):
94
  try: