Update utils.py
Browse files
utils.py
CHANGED
@@ -321,7 +321,11 @@ def load_word_with_metadata(file_path):
|
|
321 |
################################################
|
322 |
#Vektorstore
|
323 |
################################################
|
324 |
-
|
|
|
|
|
|
|
|
|
325 |
################################################
|
326 |
# Document Splitting
|
327 |
################################################
|
@@ -353,21 +357,25 @@ def document_loading_splitting():
|
|
353 |
|
354 |
|
355 |
# Verzeichnis für heruntergeladene Dateien
|
356 |
-
download_dir =
|
357 |
-
os.makedirs(download_dir, exist_ok=True)
|
358 |
|
359 |
# Dateien im Hugging Face Space auflisten
|
360 |
files_in_repo = list_files_in_hf_repo(STORAGE_REPO_ID)
|
361 |
|
362 |
-
# Dateien aus dem Hugging Face Space herunterladen
|
363 |
for file_name in files_in_repo:
|
364 |
-
if file_name.endswith('.pdf')
|
365 |
-
local_file_path = os.path.join(
|
366 |
download_file_from_hf(file_name, local_file_path)
|
367 |
-
|
|
|
|
|
|
|
|
|
368 |
# Erstellen von DirectoryLoader für jeden Dateityp
|
369 |
-
pdf_loader = create_directory_loader('.pdf',
|
370 |
-
word_loader = create_directory_loader('.word',
|
371 |
|
372 |
|
373 |
|
@@ -531,9 +539,6 @@ def download_file_from_hf(file_name, save_path):
|
|
531 |
file.write(response.content)
|
532 |
return save_path
|
533 |
|
534 |
-
def list_files_in_hf_repo(repo_id):
|
535 |
-
repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
|
536 |
-
return repo_info
|
537 |
|
538 |
|
539 |
|
|
|
321 |
################################################
|
322 |
#Vektorstore
|
323 |
################################################
|
324 |
+
#liste aller files in einem dir...
|
325 |
+
def list_files_in_hf_repo(repo_id):
|
326 |
+
repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
|
327 |
+
return repo_info
|
328 |
+
|
329 |
################################################
|
330 |
# Document Splitting
|
331 |
################################################
|
|
|
357 |
|
358 |
|
359 |
# Verzeichnis für heruntergeladene Dateien
|
360 |
+
#download_dir = CHROMA_PDF
|
361 |
+
#os.makedirs(download_dir, exist_ok=True)
|
362 |
|
363 |
# Dateien im Hugging Face Space auflisten
|
364 |
files_in_repo = list_files_in_hf_repo(STORAGE_REPO_ID)
|
365 |
|
366 |
+
# Dateien aus dem Hugging Face Space mit der STORAGE_REPO_ID herunterladen
|
367 |
for file_name in files_in_repo:
|
368 |
+
if file_name.endswith('.pdf'):
|
369 |
+
local_file_path = os.path.join(CHROMA_PDF, os.path.basename(file_name))
|
370 |
download_file_from_hf(file_name, local_file_path)
|
371 |
+
if file_name.endswith('.docx'):
|
372 |
+
local_file_path = os.path.join(CHROMA_WORD, os.path.basename(file_name))
|
373 |
+
download_file_from_hf(file_name, local_file_path)
|
374 |
+
print("file_name..................."+str(file_name))
|
375 |
+
print("local_file_path..................."+str(local_file_path))
|
376 |
# Erstellen von DirectoryLoader für jeden Dateityp
|
377 |
+
pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
378 |
+
word_loader = create_directory_loader('.word', CHROMA_WORD)
|
379 |
|
380 |
|
381 |
|
|
|
539 |
file.write(response.content)
|
540 |
return save_path
|
541 |
|
|
|
|
|
|
|
542 |
|
543 |
|
544 |
|