Update utils.py
Browse files
utils.py
CHANGED
@@ -390,17 +390,17 @@ def document_storage_chroma(splits):
|
|
390 |
embedding_fn = HuggingFaceEmbeddings(model_name=EMBEDDING_MODELL, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
391 |
|
392 |
# Vectorstore initialisieren und Dokumente hinzufügen
|
393 |
-
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
|
394 |
|
395 |
return vectorstore
|
396 |
|
397 |
########################################################
|
398 |
#Vektorstore speichern - bzw. laden
|
399 |
-
def
|
400 |
with open(filename, "wb") as f:
|
401 |
-
pickle.dump(
|
402 |
|
403 |
-
def
|
404 |
if os.path.exists(filename):
|
405 |
with open(filename, "rb") as f:
|
406 |
return pickle.load(f)
|
@@ -413,11 +413,19 @@ def create_vectorstore():
|
|
413 |
if PREPROCESSED_SPLITS:
|
414 |
# Vektordatenbank zu den Splits erstellen
|
415 |
vektordatenbank = document_storage_chroma(PREPROCESSED_SPLITS)
|
416 |
-
# Speichern
|
417 |
-
|
418 |
return vektordatenbank
|
419 |
else:
|
420 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
|
422 |
# Beispiel-Upload-Funktion
|
423 |
def upload_file_to_huggingface(file_path, upload_path):
|
|
|
390 |
embedding_fn = HuggingFaceEmbeddings(model_name=EMBEDDING_MODELL, model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
391 |
|
392 |
# Vectorstore initialisieren und Dokumente hinzufügen
|
393 |
+
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
|
394 |
|
395 |
return vectorstore
|
396 |
|
397 |
########################################################
|
398 |
#Vektorstore speichern - bzw. laden
|
399 |
+
def save_splits_and_metadata(splits, filename="splits_and_metadata.pkl"):
|
400 |
with open(filename, "wb") as f:
|
401 |
+
pickle.dump(splits, f)
|
402 |
|
403 |
+
def load_splits_and_metadata(filename="splits_and_metadata.pkl"):
|
404 |
if os.path.exists(filename):
|
405 |
with open(filename, "rb") as f:
|
406 |
return pickle.load(f)
|
|
|
413 |
if PREPROCESSED_SPLITS:
|
414 |
# Vektordatenbank zu den Splits erstellen
|
415 |
vektordatenbank = document_storage_chroma(PREPROCESSED_SPLITS)
|
416 |
+
# Speichern der Splits und Metadaten
|
417 |
+
save_splits_and_metadata((PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING))
|
418 |
return vektordatenbank
|
419 |
else:
|
420 |
return None
|
421 |
+
|
422 |
+
#Laden des Vektorstores - aus den gespeicherten splits
|
423 |
+
def load_vectorstore():
|
424 |
+
splits_and_metadata = load_splits_and_metadata()
|
425 |
+
if splits_and_metadata is not None:
|
426 |
+
PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING = splits_and_metadata
|
427 |
+
return document_storage_chroma(PREPROCESSED_SPLITS)
|
428 |
+
return None
|
429 |
|
430 |
# Beispiel-Upload-Funktion
|
431 |
def upload_file_to_huggingface(file_path, upload_path):
|