Spaces:
Sleeping
Sleeping
import os, shutil | |
os.environ['HF_HUB_DISABLE_PROGRESS_BARS'] = '1' | |
import time | |
from fastcore.utils import * | |
from datetime import datetime | |
from huggingface_hub import snapshot_download, upload_folder, create_repo, repo_exists, whoami | |
__all__ = ['download', 'upload', 'setup_hf_backup'] | |
def _token(): return os.getenv("HF_TOKEN") | |
def get_cfg(): | |
return Config('.', 'config.ini', | |
types=dict(dataset_id=str, db_dir=str, private_backup=bool, interval=int), | |
create=dict(dataset_id='space-backup', db_dir='data', private_backup=True, interval=15)) | |
def get_dataset_id(cfg): | |
did = cfg.dataset_id | |
if "/" in did or _token() is None: return did | |
return f"{whoami(_token())['name']}/{did}" | |
def download(): | |
cfg = get_cfg() | |
did = get_dataset_id(cfg) | |
upload_on_schedule() | |
if os.getenv("SPACE_ID") and repo_exists(did, repo_type="dataset", token=_token()): | |
cache_path = snapshot_download(repo_id=did, repo_type='dataset', token=_token()) | |
shutil.copytree(cache_path, cfg.db_dir, dirs_exist_ok=True) | |
def upload(): | |
cfg = get_cfg() | |
if not os.getenv("SPACE_ID"): return | |
did = get_dataset_id(cfg) | |
create_repo(did, token=_token(), private=cfg.private_backup, repo_type='dataset', exist_ok=True) | |
upload_folder(folder_path=cfg.db_dir, token=_token(), repo_id=did, | |
repo_type='dataset', commit_message=f"backup {datetime.now()}") | |
def upload_on_schedule(): | |
cfg = get_cfg() | |
while True: | |
time.sleep(cfg.interval*60) | |
upload() | |
def setup_hf_backup(app): | |
app.on_event("startup")(download) | |
app.on_event("shutdown")(upload) | |