|
import argparse |
|
import json |
|
import logging |
|
import os |
|
import tempfile |
|
import zipfile |
|
from datetime import datetime |
|
from pathlib import Path |
|
from typing import Optional, Union |
|
|
|
import huggingface_hub |
|
|
|
from relik.common.log import get_logger |
|
from relik.common.utils import SAPIENZANLP_DATE_FORMAT, get_md5 |
|
|
|
logger = get_logger(level=logging.DEBUG) |
|
|
|
|
|
def create_info_file(tmpdir: Path): |
|
logger.debug("Computing md5 of model.zip") |
|
md5 = get_md5(tmpdir / "model.zip") |
|
date = datetime.now().strftime(SAPIENZANLP_DATE_FORMAT) |
|
|
|
logger.debug("Dumping info.json file") |
|
with (tmpdir / "info.json").open("w") as f: |
|
json.dump(dict(md5=md5, upload_date=date), f, indent=2) |
|
|
|
|
|
def zip_run( |
|
dir_path: Union[str, os.PathLike], |
|
tmpdir: Union[str, os.PathLike], |
|
zip_name: str = "model.zip", |
|
) -> Path: |
|
logger.debug(f"zipping {dir_path} to {tmpdir}") |
|
|
|
run_dir = Path(dir_path) |
|
zip_path = tmpdir / zip_name |
|
|
|
with zipfile.ZipFile(zip_path, "w") as zip_file: |
|
|
|
for file in run_dir.rglob("*.*"): |
|
if file.is_dir(): |
|
continue |
|
|
|
zip_file.write(file, arcname=file.relative_to(run_dir)) |
|
|
|
return zip_path |
|
|
|
|
|
def upload( |
|
model_dir: Union[str, os.PathLike], |
|
model_name: str, |
|
organization: Optional[str] = None, |
|
repo_name: Optional[str] = None, |
|
commit: Optional[str] = None, |
|
archive: bool = False, |
|
): |
|
token = huggingface_hub.HfFolder.get_token() |
|
if token is None: |
|
print( |
|
"No HuggingFace token found. You need to execute `huggingface-cli login` first!" |
|
) |
|
return |
|
|
|
repo_id = repo_name or model_name |
|
if organization is not None: |
|
repo_id = f"{organization}/{repo_id}" |
|
with tempfile.TemporaryDirectory() as tmpdir: |
|
api = huggingface_hub.HfApi() |
|
repo_url = api.create_repo( |
|
token=token, |
|
repo_id=repo_id, |
|
exist_ok=True, |
|
) |
|
repo = huggingface_hub.Repository( |
|
str(tmpdir), clone_from=repo_url, use_auth_token=token |
|
) |
|
|
|
tmp_path = Path(tmpdir) |
|
if archive: |
|
|
|
logger.debug(f"Zipping {model_dir} to {tmp_path}") |
|
zip_run(model_dir, tmp_path) |
|
create_info_file(tmp_path) |
|
else: |
|
|
|
|
|
logger.debug(f"Copying {model_dir} to {tmpdir}") |
|
os.system(f"cp -r {model_dir}/* {tmpdir}") |
|
|
|
|
|
repo.push_to_hub(commit_message=commit or "Automatic push from sapienzanlp") |
|
|
|
|
|
def parse_args() -> argparse.Namespace: |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"model_dir", help="The directory of the model you want to upload" |
|
) |
|
parser.add_argument("model_name", help="The model you want to upload") |
|
parser.add_argument( |
|
"--organization", |
|
help="the name of the organization where you want to upload the model", |
|
) |
|
parser.add_argument( |
|
"--repo_name", |
|
help="Optional name to use when uploading to the HuggingFace repository", |
|
) |
|
parser.add_argument( |
|
"--commit", help="Commit message to use when pushing to the HuggingFace Hub" |
|
) |
|
parser.add_argument( |
|
"--archive", |
|
action="store_true", |
|
help=""" |
|
Whether to compress the model directory before uploading it. |
|
If True, the model directory will be zipped and the zip file will be uploaded. |
|
If False, the model directory will be uploaded as is.""", |
|
) |
|
return parser.parse_args() |
|
|
|
|
|
def main(): |
|
upload(**vars(parse_args())) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|