Update utils.py
Browse files
utils.py
CHANGED
@@ -101,6 +101,25 @@ HF_WRITE = os.getenv("HF_WRITE")
|
|
101 |
# HfApi-Instanz erstellen
|
102 |
api = HfApi()
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
#Maoing für die Splits (orginal und Preprocessed
|
105 |
split_to_original_mapping = []
|
106 |
|
@@ -331,23 +350,6 @@ def split_documents_with_id(docs, text_splitter):
|
|
331 |
splits.append(split_doc)
|
332 |
return splits
|
333 |
|
334 |
-
#######################################
|
335 |
-
# Dokumente aus anderem Space laden
|
336 |
-
#######################################
|
337 |
-
#ein File aus dem Space mit der REPO_ID laden
|
338 |
-
def download_file_from_hf(file_name, save_path):
|
339 |
-
url = f"https://huggingface.co/{STORAGE_REPO_ID}/resolve/main/{file_name}"
|
340 |
-
response = requests.get(url)
|
341 |
-
response.raise_for_status() # Raise an error for bad status codes
|
342 |
-
with open(save_path, 'wb') as file:
|
343 |
-
file.write(response.content)
|
344 |
-
return save_path
|
345 |
-
|
346 |
-
#Liste aller Files in dem Space mit der Repo_id
|
347 |
-
def list_files_in_hf_repo(repo_id):
|
348 |
-
repo_info = api.list_repo_files(repo_id=repo_id)
|
349 |
-
return repo_info
|
350 |
-
|
351 |
|
352 |
|
353 |
########################################
|
@@ -523,6 +525,23 @@ def upload_file_to_huggingface(file_path, upload_path):
|
|
523 |
repo_id=STORAGE_REPO_ID,
|
524 |
repo_type=REPO_TYPE,
|
525 |
token=HF_WRITE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
)
|
527 |
|
528 |
|
|
|
101 |
# HfApi-Instanz erstellen
|
102 |
api = HfApi()
|
103 |
|
104 |
+
|
105 |
+
|
106 |
+
# Login mit dem Token
|
107 |
+
login(token=hf_token)
|
108 |
+
|
109 |
+
# API-Instanz erstellen
|
110 |
+
api = HfApi()
|
111 |
+
|
112 |
+
# Überprüfen, ob das Repository existiert und zugänglich ist
|
113 |
+
try:
|
114 |
+
repo_info = api.list_repo_files(repo_id=STORAGE_REPO_ID, repo_type=REPO_TYPE)
|
115 |
+
print(f"Repository '{STORAGE_REPO_ID}' enthält folgende Dateien: {repo_info}")
|
116 |
+
except Exception as e:
|
117 |
+
print(f"Fehler beim Zugriff auf das Repository: {e}")
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
#Maoing für die Splits (orginal und Preprocessed
|
124 |
split_to_original_mapping = []
|
125 |
|
|
|
350 |
splits.append(split_doc)
|
351 |
return splits
|
352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
|
354 |
|
355 |
########################################
|
|
|
525 |
repo_id=STORAGE_REPO_ID,
|
526 |
repo_type=REPO_TYPE,
|
527 |
token=HF_WRITE
|
528 |
+
|
529 |
+
#######################################
|
530 |
+
# Dokumente aus anderem Space laden
|
531 |
+
#######################################
|
532 |
+
#ein File aus dem Space mit der REPO_ID laden
|
533 |
+
def download_file_from_hf(file_name, save_path):
|
534 |
+
url = f"https://huggingface.co/{STORAGE_REPO_ID}/resolve/main/{file_name}"
|
535 |
+
headers = {"Authorization": f"Bearer {HF_READ}"}
|
536 |
+
response = requests.get(url, headers=headers)
|
537 |
+
response.raise_for_status() # Raise an error for bad status codes
|
538 |
+
with open(save_path, 'wb') as file:
|
539 |
+
file.write(response.content)
|
540 |
+
return save_path
|
541 |
+
|
542 |
+
def list_files_in_hf_repo(repo_id):
|
543 |
+
repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
|
544 |
+
return repo_info
|
545 |
)
|
546 |
|
547 |
|