Spaces:
Running
Running
"""Various utilities (logger, time benchmark, args dump, numerical and stats info)""" | |
from copy import deepcopy | |
from aip_trainer import app_logger | |
from aip_trainer.utils.serialize import serialize | |
def hash_calculate(arr_or_path, is_file: bool, read_mode: str = "rb") -> str | bytes: | |
""" | |
Return computed hash from input variable (typically a numpy array). | |
Args: | |
arr: input variable | |
Returns: | |
computed hash from input variable | |
""" | |
from hashlib import sha256 | |
from base64 import b64encode | |
from numpy import ndarray as np_ndarray | |
if is_file: | |
with open(arr_or_path, read_mode) as file_to_check: | |
# read contents of the file | |
arr_or_path = file_to_check.read() | |
# # pipe contents of the file through | |
# try: | |
# return hashlib.sha256(data).hexdigest() | |
# except TypeError: | |
# app_logger.warning( | |
# f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}." | |
# ) | |
# return hashlib.sha256(data.encode("utf-8")).hexdigest() | |
if isinstance(arr_or_path, np_ndarray): | |
hash_fn = sha256(arr_or_path.data) | |
elif isinstance(arr_or_path, dict): | |
import json | |
serialized = serialize(arr_or_path) | |
variable_to_hash = json.dumps(serialized, sort_keys=True).encode("utf-8") | |
hash_fn = sha256(variable_to_hash) | |
elif isinstance(arr_or_path, str): | |
try: | |
hash_fn = sha256(arr_or_path) | |
except TypeError: | |
app_logger.warning( | |
f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}." | |
) | |
hash_fn = sha256(arr_or_path.encode("utf-8")) | |
elif isinstance(arr_or_path, bytes): | |
hash_fn = sha256(arr_or_path) | |
else: | |
raise ValueError( | |
f"variable 'arr':{arr_or_path} of type '{type(arr_or_path)}' not yet handled." | |
) | |
return b64encode(hash_fn.digest()) | |