alexkueck commited on
Commit
8b46265
·
verified ·
1 Parent(s): 4a6e453

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +17 -12
utils.py CHANGED
@@ -321,7 +321,11 @@ def load_word_with_metadata(file_path):
321
  ################################################
322
  #Vektorstore
323
  ################################################
324
-
 
 
 
 
325
  ################################################
326
  # Document Splitting
327
  ################################################
@@ -353,21 +357,25 @@ def document_loading_splitting():
353
 
354
 
355
  # Verzeichnis für heruntergeladene Dateien
356
- download_dir = "downloaded_files"
357
- os.makedirs(download_dir, exist_ok=True)
358
 
359
  # Dateien im Hugging Face Space auflisten
360
  files_in_repo = list_files_in_hf_repo(STORAGE_REPO_ID)
361
 
362
- # Dateien aus dem Hugging Face Space herunterladen
363
  for file_name in files_in_repo:
364
- if file_name.endswith('.pdf') or file_name.endswith('.docx'):
365
- local_file_path = os.path.join(download_dir, os.path.basename(file_name))
366
  download_file_from_hf(file_name, local_file_path)
367
-
 
 
 
 
368
  # Erstellen von DirectoryLoader für jeden Dateityp
369
- pdf_loader = create_directory_loader('.pdf', download_dir)
370
- word_loader = create_directory_loader('.word', download_dir)
371
 
372
 
373
 
@@ -531,9 +539,6 @@ def download_file_from_hf(file_name, save_path):
531
  file.write(response.content)
532
  return save_path
533
 
534
- def list_files_in_hf_repo(repo_id):
535
- repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
536
- return repo_info
537
 
538
 
539
 
 
321
  ################################################
322
  #Vektorstore
323
  ################################################
324
+ #liste aller files in einem dir...
325
+ def list_files_in_hf_repo(repo_id):
326
+ repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
327
+ return repo_info
328
+
329
  ################################################
330
  # Document Splitting
331
  ################################################
 
357
 
358
 
359
  # Verzeichnis für heruntergeladene Dateien
360
+ #download_dir = CHROMA_PDF
361
+ #os.makedirs(download_dir, exist_ok=True)
362
 
363
  # Dateien im Hugging Face Space auflisten
364
  files_in_repo = list_files_in_hf_repo(STORAGE_REPO_ID)
365
 
366
+ # Dateien aus dem Hugging Face Space mit der STORAGE_REPO_ID herunterladen
367
  for file_name in files_in_repo:
368
+ if file_name.endswith('.pdf'):
369
+ local_file_path = os.path.join(CHROMA_PDF, os.path.basename(file_name))
370
  download_file_from_hf(file_name, local_file_path)
371
+ if file_name.endswith('.docx'):
372
+ local_file_path = os.path.join(CHROMA_WORD, os.path.basename(file_name))
373
+ download_file_from_hf(file_name, local_file_path)
374
+ print("file_name..................."+str(file_name))
375
+ print("local_file_path..................."+str(local_file_path))
376
  # Erstellen von DirectoryLoader für jeden Dateityp
377
+ pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
378
+ word_loader = create_directory_loader('.word', CHROMA_WORD)
379
 
380
 
381
 
 
539
  file.write(response.content)
540
  return save_path
541
 
 
 
 
542
 
543
 
544