alexkueck commited on
Commit
9faf264
·
verified ·
1 Parent(s): 29813fe

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +14 -6
utils.py CHANGED
@@ -390,17 +390,17 @@ def document_storage_chroma(splits):
390
  embedding_fn = HuggingFaceEmbeddings(model_name=EMBEDDING_MODELL, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
391
 
392
  # Vectorstore initialisieren und Dokumente hinzufügen
393
- vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn) #, persist_directory = PATH_WORK + CHROMA_DIR)
394
 
395
  return vectorstore
396
 
397
  ########################################################
398
  #Vektorstore speichern - bzw. laden
399
- def save_vectorstore(vectorstore, filename="vectorstore.pkl"):
400
  with open(filename, "wb") as f:
401
- pickle.dump(vectorstore, f)
402
 
403
- def load_vectorstore(filename="vectorstore.pkl"):
404
  if os.path.exists(filename):
405
  with open(filename, "rb") as f:
406
  return pickle.load(f)
@@ -413,11 +413,19 @@ def create_vectorstore():
413
  if PREPROCESSED_SPLITS:
414
  # Vektordatenbank zu den Splits erstellen
415
  vektordatenbank = document_storage_chroma(PREPROCESSED_SPLITS)
416
- # Speichern des Vektorstores
417
- save_vectorstore(vektordatenbank)
418
  return vektordatenbank
419
  else:
420
  return None
 
 
 
 
 
 
 
 
421
 
422
  # Beispiel-Upload-Funktion
423
  def upload_file_to_huggingface(file_path, upload_path):
 
390
  embedding_fn = HuggingFaceEmbeddings(model_name=EMBEDDING_MODELL, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
391
 
392
  # Vectorstore initialisieren und Dokumente hinzufügen
393
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
394
 
395
  return vectorstore
396
 
397
  ########################################################
398
  #Vektorstore speichern - bzw. laden
399
+ def save_splits_and_metadata(splits, filename="splits_and_metadata.pkl"):
400
  with open(filename, "wb") as f:
401
+ pickle.dump(splits, f)
402
 
403
+ def load_splits_and_metadata(filename="splits_and_metadata.pkl"):
404
  if os.path.exists(filename):
405
  with open(filename, "rb") as f:
406
  return pickle.load(f)
 
413
  if PREPROCESSED_SPLITS:
414
  # Vektordatenbank zu den Splits erstellen
415
  vektordatenbank = document_storage_chroma(PREPROCESSED_SPLITS)
416
+ # Speichern der Splits und Metadaten
417
+ save_splits_and_metadata((PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING))
418
  return vektordatenbank
419
  else:
420
  return None
421
+
422
+ #Laden des Vektorstores - aus den gespeicherten splits
423
+ def load_vectorstore():
424
+ splits_and_metadata = load_splits_and_metadata()
425
+ if splits_and_metadata is not None:
426
+ PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING = splits_and_metadata
427
+ return document_storage_chroma(PREPROCESSED_SPLITS)
428
+ return None
429
 
430
  # Beispiel-Upload-Funktion
431
  def upload_file_to_huggingface(file_path, upload_path):