Spaces:

vrodriguezf
/

deepvats

Sleeping

File size: 9,424 Bytes

6d51833

# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.

# %% auto 0
__all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
           'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function',
           'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves',
           'learner_module_leaves_subtables']

# %% ../nbs/utils.ipynb 3
from .imports import *
from fastcore.all import *
import wandb
import pickle
import pandas as pd
import numpy as np
#import tensorflow as tf
import torch.nn as nn
from fastai.basics import *

# %% ../nbs/utils.ipynb 5
def generate_TS_df(rows, cols):
    "Generates a dataframe containing a multivariate time series, where each column \
    represents a variable and each row a time point (sample). The timestamp is in the \
    index of the dataframe, and it is created with a even space of 1 second between samples"
    index = np.arange(pd.Timestamp.now(),
                      pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
                      pd.Timedelta(1, 'seconds'))
    data = np.random.randn(len(index), cols)
    return pd.DataFrame(data, index=index)

# %% ../nbs/utils.ipynb 10
def normalize_columns(df:pd.DataFrame):
    "Normalize columns from `df` to have 0 mean and 1 standard deviation"
    mean = df.mean()
    std = df.std() + 1e-7
    return (df-mean)/std

# %% ../nbs/utils.ipynb 16
def remove_constant_columns(df:pd.DataFrame):
    return df.loc[:, (df != df.iloc[0]).any()]

# %% ../nbs/utils.ipynb 21
class ReferenceArtifact(wandb.Artifact):
    default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
    "This class is meant to create an artifact with a single reference to an object \
    passed as argument in the contructor. The object will be pickled, hashed and stored \
    in a specified folder."
    @delegates(wandb.Artifact.__init__)
    def __init__(self, obj, name, type='object', folder=None, **kwargs):
        super().__init__(type=type, name=name, **kwargs)
        # pickle dumps the object and then hash it
        hash_code = str(hash(pickle.dumps(obj)))
        folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
        with open(f'{folder}/{hash_code}', 'wb') as f:
            pickle.dump(obj, f)
        self.add_reference(f'file://{folder}/{hash_code}')
        if self.metadata is None:
            self.metadata = dict()
        self.metadata['ref'] = dict()
        self.metadata['ref']['hash'] = hash_code
        self.metadata['ref']['type'] = str(obj.__class__)

# %% ../nbs/utils.ipynb 24
@patch
def to_obj(self:wandb.apis.public.Artifact):
    """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
    come from a call to `run.use_artifact` with a proper wandb run."""
    if self.metadata.get('ref') is None:
        print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
        return None
    original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
    path = original_path if original_path.exists() else Path(self.download()).ls()[0]
    with open(path, 'rb') as f:
        obj = pickle.load(f)
    return obj

# %% ../nbs/utils.ipynb 33
import torch.nn as nn
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()

    def forward(self, x):
        # Do your print / debug stuff here
        print(x.shape)
        return x

# %% ../nbs/utils.ipynb 34
@patch
def export_and_get(self:Learner, keep_exported_file=False):
    """
        Export the learner into an auxiliary file, load it and return it back.
    """
    aux_path = Path('aux.pkl')
    self.export(fname='aux.pkl')
    aux_learn = load_learner('aux.pkl')
    if not keep_exported_file: aux_path.unlink()
    return aux_learn

# %% ../nbs/utils.ipynb 35
def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
    """
        Get the artifacts logged in a wandb project.
        Input:
        - `project_path` (str): entity/project_name
        - `type` (str): whether to return only one type of artifacts
        - `name` (str): Leave none to have all artifact names
        - `last_version`: whether to return only the last version of each artifact or not

        Output: List of artifacts
    """
    public_api = wandb.Api()
    if type is not None:
        types = [public_api.artifact_type(type, project_path)]
    else:
        types = public_api.artifact_types(project_path)

    res = L()
    for kind in types:
        for collection in kind.collections():
            if name is None or name == collection.name:
                versions = public_api.artifact_versions(
                    kind.type,
                    "/".join([kind.entity, kind.project, collection.name]),
                    per_page=1,
                )
                if last_version: res += next(versions)
                else: res += L(versions)
    return list(res)

# %% ../nbs/utils.ipynb 39
def get_pickle_artifact(filename):

    with open(filename, "rb") as f:
        df = pickle.load(f)
    
    return df

# %% ../nbs/utils.ipynb 41
import pyarrow.feather as ft
import pickle

# %% ../nbs/utils.ipynb 42
def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs):
    result = None
    if not (path is none):
        if print_flag: print("--> Exec with feather | reading input from ", path)
        input = ft.read_feather(path)
        if print_flag: print("--> Exec with feather | Apply function ", path)
        result = function(input, *args, **kwargs)
        if print_flag: print("Exec with feather --> ", path)
    return result

# %% ../nbs/utils.ipynb 43
def py_function(module_name, function_name, print_flag = False):
    try:
        function = getattr(__import__('__main__'), function_name)
    except:
        module = __import__(module_name, fromlist=[''])
        function = getattr(module, function_name)
    print("py function: ", function_name, ": ", function)
    return function

# %% ../nbs/utils.ipynb 46
import time
def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
    result = None
    function = py_function(module_name, function_name, print_flag)
    if time_flag: t_start = time.time()
    if not (path is None):
        if print_flag: print("--> Exec with feather | reading input from ", path)
        input = ft.read_feather(path)
        if print_flag: print("--> Exec with feather | Apply function ", path)
        result = function(input, *args, **kwargs)[k_output]
    if time_flag:
        t_end = time.time()
        print("Exec with feather | time: ", t_end-t_start)
    if print_flag: print("Exec with feather --> ", path)
    return result

# %% ../nbs/utils.ipynb 48
def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
    result = None
    function = py_function(module_name, function_name, print_flag)
    if time_flag: t_start = time.time()
    if not (path_input is None):
        if print_flag: print("--> Exec with feather | reading input from ", path_input)
        input = ft.read_feather(path_input)
        if print_flag: 
            print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input))
        
        result = function(input, *args, **kwargs)[k_output]
        ft.write_feather(df, path, compression = 'lz4')
    if time_flag:
        t_end = time.time()
        print("Exec with feather | time: ", t_end-t_start)
    if print_flag: print("Exec with feather --> ", path_output)
    return path_output

# %% ../nbs/utils.ipynb 52
def learner_module_leaves(learner):
    modules = list(learner.modules())[0]  # Obtener el módulo raíz
    rows = []

    def find_leave_modules(module, path=[]):
        for name, sub_module in module.named_children():
            current_path = path + [f"{type(sub_module).__name__}"]
            if not list(sub_module.children()):
                leave_name = ' -> '.join(current_path)
                leave_params = str(sub_module).strip()  
                rows.append([
                    leave_name,
                    f"{type(sub_module).__name__}",
                    name,
                    leave_params
                ]
                )

            find_leave_modules(sub_module, current_path)

    find_leave_modules(modules)
    
    df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module'])
    return df

# %% ../nbs/utils.ipynb 56
def learner_module_leaves_subtables(learner, print_flag = False):
    df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module'])
    md = learner_module_leaves(learner).drop(
            'Path', axis = 1
        ).sort_values(
            by = 'Module_type'
        )
    if print_flag: print("The layers are of this types:")

    md_types = pd.DataFrame(md['Module_type'].drop_duplicates())
    if print_flag: 
        display(md_types)
        print("And they are called with this parameters:")
    
    md_modules = pd.DataFrame(md['Module'].drop_duplicates())
    
    if print_flag: display(md_modules)
    
    return md_types, md_modules