File size: 2,015 Bytes
c1390d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""Various utilities (logger, time benchmark, args dump, numerical and stats info)"""

from copy import deepcopy
from aip_trainer import app_logger
from aip_trainer.utils.serialize import serialize


def hash_calculate(arr_or_path, is_file: bool, read_mode: str = "rb") -> str | bytes:
    """
    Return computed hash from input variable (typically a numpy array).

    Args:
        arr: input variable

    Returns:
        computed hash from input variable
    """
    from hashlib import sha256
    from base64 import b64encode
    from numpy import ndarray as np_ndarray

    if is_file:
        with open(arr_or_path, read_mode) as file_to_check:
            # read contents of the file
            arr_or_path = file_to_check.read()
            # # pipe contents of the file through
            # try:
            #     return hashlib.sha256(data).hexdigest()
            # except TypeError:
            #     app_logger.warning(
            #         f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
            #     )
            #     return hashlib.sha256(data.encode("utf-8")).hexdigest()

    if isinstance(arr_or_path, np_ndarray):
        hash_fn = sha256(arr_or_path.data)
    elif isinstance(arr_or_path, dict):
        import json

        serialized = serialize(arr_or_path)
        variable_to_hash = json.dumps(serialized, sort_keys=True).encode("utf-8")
        hash_fn = sha256(variable_to_hash)
    elif isinstance(arr_or_path, str):
        try:
            hash_fn = sha256(arr_or_path)
        except TypeError:
            app_logger.warning(
                f"TypeError, re-try encoding arg:{arr_or_path},type:{type(arr_or_path)}."
            )
            hash_fn = sha256(arr_or_path.encode("utf-8"))
    elif isinstance(arr_or_path, bytes):
        hash_fn = sha256(arr_or_path)
    else:
        raise ValueError(
            f"variable 'arr':{arr_or_path} of type '{type(arr_or_path)}' not yet handled."
        )
    return b64encode(hash_fn.digest())