alessandro trinca tornidor
test: update tests
c1390d7
raw
history blame
2.02 kB
"""Various utilities (logger, time benchmark, args dump, numerical and stats info)"""
from copy import deepcopy
from aip_trainer import app_logger
from aip_trainer.utils.serialize import serialize
def hash_calculate(arr_or_path, is_file: bool, read_mode: str = "rb") -> str | bytes:
"""
Return computed hash from input variable (typically a numpy array).
Args:
arr: input variable
Returns:
computed hash from input variable
"""
from hashlib import sha256
from base64 import b64encode
from numpy import ndarray as np_ndarray
if is_file:
with open(arr_or_path, read_mode) as file_to_check:
# read contents of the file
arr_or_path = file_to_check.read()
# # pipe contents of the file through
# try:
# return hashlib.sha256(data).hexdigest()
# except TypeError:
# app_logger.warning(
# f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
# )
# return hashlib.sha256(data.encode("utf-8")).hexdigest()
if isinstance(arr_or_path, np_ndarray):
hash_fn = sha256(arr_or_path.data)
elif isinstance(arr_or_path, dict):
import json
serialized = serialize(arr_or_path)
variable_to_hash = json.dumps(serialized, sort_keys=True).encode("utf-8")
hash_fn = sha256(variable_to_hash)
elif isinstance(arr_or_path, str):
try:
hash_fn = sha256(arr_or_path)
except TypeError:
app_logger.warning(
f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
)
hash_fn = sha256(arr_or_path.encode("utf-8"))
elif isinstance(arr_or_path, bytes):
hash_fn = sha256(arr_or_path)
else:
raise ValueError(
f"variable 'arr':{arr_or_path} of type '{type(arr_or_path)}' not yet handled."
)
return b64encode(hash_fn.digest())