Spaces:
Running
Running
File size: 2,015 Bytes
c1390d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""Various utilities (logger, time benchmark, args dump, numerical and stats info)"""
from copy import deepcopy
from aip_trainer import app_logger
from aip_trainer.utils.serialize import serialize
def hash_calculate(arr_or_path, is_file: bool, read_mode: str = "rb") -> str | bytes:
"""
Return computed hash from input variable (typically a numpy array).
Args:
arr: input variable
Returns:
computed hash from input variable
"""
from hashlib import sha256
from base64 import b64encode
from numpy import ndarray as np_ndarray
if is_file:
with open(arr_or_path, read_mode) as file_to_check:
# read contents of the file
arr_or_path = file_to_check.read()
# # pipe contents of the file through
# try:
# return hashlib.sha256(data).hexdigest()
# except TypeError:
# app_logger.warning(
# f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
# )
# return hashlib.sha256(data.encode("utf-8")).hexdigest()
if isinstance(arr_or_path, np_ndarray):
hash_fn = sha256(arr_or_path.data)
elif isinstance(arr_or_path, dict):
import json
serialized = serialize(arr_or_path)
variable_to_hash = json.dumps(serialized, sort_keys=True).encode("utf-8")
hash_fn = sha256(variable_to_hash)
elif isinstance(arr_or_path, str):
try:
hash_fn = sha256(arr_or_path)
except TypeError:
app_logger.warning(
f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
)
hash_fn = sha256(arr_or_path.encode("utf-8"))
elif isinstance(arr_or_path, bytes):
hash_fn = sha256(arr_or_path)
else:
raise ValueError(
f"variable 'arr':{arr_or_path} of type '{type(arr_or_path)}' not yet handled."
)
return b64encode(hash_fn.digest())
|