alexkueck commited on
Commit
9d8843e
·
verified ·
1 Parent(s): 28aefb0

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +10 -10
utils.py CHANGED
@@ -335,8 +335,8 @@ def document_loading_splitting():
335
 
336
  # Dateien im Hugging Face Space auflisten
337
  files_in_repo = list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=hf_token)
338
- pdf_files = [f for f in files_in_repo if f.endswith('.pdf') and f.startswith("chroma/kkg/pdf/")]
339
- word_files = [f for f in files_in_repo if f.endswith('.docx') and f.startswith("chroma/kkg/word/")]
340
 
341
 
342
  # Erstellen von DirectoryLoader für jeden Dateityp
@@ -403,7 +403,7 @@ def document_storage_chroma(splits):
403
  ########################################################
404
  #Splits für den Vektorstore speichern - bzw. laden
405
  ########################################################
406
- def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
407
  # Erstellen des Verzeichnisses, falls es nicht existiert
408
  if not os.path.exists(directory):
409
  os.makedirs(directory)
@@ -422,7 +422,7 @@ def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", pr
422
  upload_file_to_huggingface(preprocessed_filepath, f"{directory}/{preprocessed_filename}")
423
  upload_file_to_huggingface(original_filepath, f"{directory}/{original_filename}")
424
 
425
- def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
426
  preprocessed_splits = None
427
  original_splits = None
428
 
@@ -457,7 +457,7 @@ def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_spli
457
  ########################################
458
  #das Mapping der orginal-Splits und der preprocessed Splits speichern - und laden
459
  ########################################
460
- def save_split_to_original_mapping(mapping, directory="chroma/kkg", filename="mapping.pkl"):
461
  # Erstellen des Verzeichnisses, falls es nicht existiert
462
  if not os.path.exists(directory):
463
  os.makedirs(directory)
@@ -471,7 +471,7 @@ def save_split_to_original_mapping(mapping, directory="chroma/kkg", filename="ma
471
  upload_file_to_huggingface(filepath, f"{directory}/{filename}")
472
 
473
 
474
- def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
475
  try:
476
  # Laden des Mappings aus dem Hugging Face Repository
477
  file_path = hf_hub_download(
@@ -739,9 +739,9 @@ def download_link(doc):
739
 
740
  # Bestimmen des Dokumenttyps und Anpassen des Pfads
741
  if doc_path.lower().endswith('.pdf'):
742
- file_url = f"{base_url}/chroma/kkg/pdf/{quote(title)}?token={hf_token}"
743
  elif doc_path.lower().endswith('.docx'):
744
- file_url = f"{base_url}/chroma/kkg/word/{quote(title)}?token={hf_token}"
745
  else:
746
  # Fallback für andere Dateitypen
747
  file_url = f"{base_url}/{quote(doc_path)}?token={hf_token}"
@@ -760,7 +760,7 @@ def display_files():
760
 
761
  # PDF-Dateien
762
  files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname - PDF-Ordner</th></tr>"
763
- pdf_files = [f for f in list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=hf_token) if f.endswith('.pdf') and f.startswith("chroma/kkg/pdf/")]
764
  for i, file in enumerate(pdf_files):
765
  row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a"
766
  files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"
@@ -768,7 +768,7 @@ def display_files():
768
 
769
  # Word-Dateien
770
  files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname - Word-Ordner</th></tr>"
771
- word_files = [f for f in list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=hf_token) if f.endswith('.docx') and f.startswith("chroma/kkg/word/")]
772
  for i, file in enumerate(word_files):
773
  row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a"
774
  files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"
 
335
 
336
  # Dateien im Hugging Face Space auflisten
337
  files_in_repo = list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=hf_token)
338
+ pdf_files = [f for f in files_in_repo if f.endswith('.pdf') and f.startswith("chroma/demo/pdf/")]
339
+ word_files = [f for f in files_in_repo if f.endswith('.docx') and f.startswith("chroma/demo/word/")]
340
 
341
 
342
  # Erstellen von DirectoryLoader für jeden Dateityp
 
403
  ########################################################
404
  #Splits für den Vektorstore speichern - bzw. laden
405
  ########################################################
406
+ def save_splits(preprocessed_splits, original_splits, directory="chroma/demo", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
407
  # Erstellen des Verzeichnisses, falls es nicht existiert
408
  if not os.path.exists(directory):
409
  os.makedirs(directory)
 
422
  upload_file_to_huggingface(preprocessed_filepath, f"{directory}/{preprocessed_filename}")
423
  upload_file_to_huggingface(original_filepath, f"{directory}/{original_filename}")
424
 
425
+ def load_splits(directory="chroma/demo", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
426
  preprocessed_splits = None
427
  original_splits = None
428
 
 
457
  ########################################
458
  #das Mapping der orginal-Splits und der preprocessed Splits speichern - und laden
459
  ########################################
460
+ def save_split_to_original_mapping(mapping, directory="chroma/demo", filename="mapping.pkl"):
461
  # Erstellen des Verzeichnisses, falls es nicht existiert
462
  if not os.path.exists(directory):
463
  os.makedirs(directory)
 
471
  upload_file_to_huggingface(filepath, f"{directory}/{filename}")
472
 
473
 
474
+ def load_split_to_original_mapping(directory="chroma/demo", filename="mapping.pkl"):
475
  try:
476
  # Laden des Mappings aus dem Hugging Face Repository
477
  file_path = hf_hub_download(
 
739
 
740
  # Bestimmen des Dokumenttyps und Anpassen des Pfads
741
  if doc_path.lower().endswith('.pdf'):
742
+ file_url = f"{base_url}/chroma/demo/pdf/{quote(title)}?token={hf_token}"
743
  elif doc_path.lower().endswith('.docx'):
744
+ file_url = f"{base_url}/chroma/demo/word/{quote(title)}?token={hf_token}"
745
  else:
746
  # Fallback für andere Dateitypen
747
  file_url = f"{base_url}/{quote(doc_path)}?token={hf_token}"
 
760
 
761
  # PDF-Dateien
762
  files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname - PDF-Ordner</th></tr>"
763
+ pdf_files = [f for f in list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=hf_token) if f.endswith('.pdf') and f.startswith("chroma/demo/pdf/")]
764
  for i, file in enumerate(pdf_files):
765
  row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a"
766
  files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"
 
768
 
769
  # Word-Dateien
770
  files_table += "<tr style='background-color: #930BBA; color: white; font-weight: bold; font-size: larger;'><th>Dateiname - Word-Ordner</th></tr>"
771
+ word_files = [f for f in list_repo_files(repo_id=STORAGE_REPO_ID, repo_type="space", token=hf_token) if f.endswith('.docx') and f.startswith("chroma/demo/word/")]
772
  for i, file in enumerate(word_files):
773
  row_color = "#4f4f4f" if i % 2 == 0 else "#3a3a3a"
774
  files_table += f"<tr style='background-color: {row_color}; border-bottom: 1px solid #ddd;'>"