Spaces:
Sleeping
Sleeping
File size: 9,424 Bytes
6d51833 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.
# %% auto 0
__all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function',
'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves',
'learner_module_leaves_subtables']
# %% ../nbs/utils.ipynb 3
from .imports import *
from fastcore.all import *
import wandb
import pickle
import pandas as pd
import numpy as np
#import tensorflow as tf
import torch.nn as nn
from fastai.basics import *
# %% ../nbs/utils.ipynb 5
def generate_TS_df(rows, cols):
"Generates a dataframe containing a multivariate time series, where each column \
represents a variable and each row a time point (sample). The timestamp is in the \
index of the dataframe, and it is created with a even space of 1 second between samples"
index = np.arange(pd.Timestamp.now(),
pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
pd.Timedelta(1, 'seconds'))
data = np.random.randn(len(index), cols)
return pd.DataFrame(data, index=index)
# %% ../nbs/utils.ipynb 10
def normalize_columns(df:pd.DataFrame):
"Normalize columns from `df` to have 0 mean and 1 standard deviation"
mean = df.mean()
std = df.std() + 1e-7
return (df-mean)/std
# %% ../nbs/utils.ipynb 16
def remove_constant_columns(df:pd.DataFrame):
return df.loc[:, (df != df.iloc[0]).any()]
# %% ../nbs/utils.ipynb 21
class ReferenceArtifact(wandb.Artifact):
default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
"This class is meant to create an artifact with a single reference to an object \
passed as argument in the contructor. The object will be pickled, hashed and stored \
in a specified folder."
@delegates(wandb.Artifact.__init__)
def __init__(self, obj, name, type='object', folder=None, **kwargs):
super().__init__(type=type, name=name, **kwargs)
# pickle dumps the object and then hash it
hash_code = str(hash(pickle.dumps(obj)))
folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
with open(f'{folder}/{hash_code}', 'wb') as f:
pickle.dump(obj, f)
self.add_reference(f'file://{folder}/{hash_code}')
if self.metadata is None:
self.metadata = dict()
self.metadata['ref'] = dict()
self.metadata['ref']['hash'] = hash_code
self.metadata['ref']['type'] = str(obj.__class__)
# %% ../nbs/utils.ipynb 24
@patch
def to_obj(self:wandb.apis.public.Artifact):
"""Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
come from a call to `run.use_artifact` with a proper wandb run."""
if self.metadata.get('ref') is None:
print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
return None
original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
path = original_path if original_path.exists() else Path(self.download()).ls()[0]
with open(path, 'rb') as f:
obj = pickle.load(f)
return obj
# %% ../nbs/utils.ipynb 33
import torch.nn as nn
class PrintLayer(nn.Module):
def __init__(self):
super(PrintLayer, self).__init__()
def forward(self, x):
# Do your print / debug stuff here
print(x.shape)
return x
# %% ../nbs/utils.ipynb 34
@patch
def export_and_get(self:Learner, keep_exported_file=False):
"""
Export the learner into an auxiliary file, load it and return it back.
"""
aux_path = Path('aux.pkl')
self.export(fname='aux.pkl')
aux_learn = load_learner('aux.pkl')
if not keep_exported_file: aux_path.unlink()
return aux_learn
# %% ../nbs/utils.ipynb 35
def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
"""
Get the artifacts logged in a wandb project.
Input:
- `project_path` (str): entity/project_name
- `type` (str): whether to return only one type of artifacts
- `name` (str): Leave none to have all artifact names
- `last_version`: whether to return only the last version of each artifact or not
Output: List of artifacts
"""
public_api = wandb.Api()
if type is not None:
types = [public_api.artifact_type(type, project_path)]
else:
types = public_api.artifact_types(project_path)
res = L()
for kind in types:
for collection in kind.collections():
if name is None or name == collection.name:
versions = public_api.artifact_versions(
kind.type,
"/".join([kind.entity, kind.project, collection.name]),
per_page=1,
)
if last_version: res += next(versions)
else: res += L(versions)
return list(res)
# %% ../nbs/utils.ipynb 39
def get_pickle_artifact(filename):
with open(filename, "rb") as f:
df = pickle.load(f)
return df
# %% ../nbs/utils.ipynb 41
import pyarrow.feather as ft
import pickle
# %% ../nbs/utils.ipynb 42
def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs):
result = None
if not (path is none):
if print_flag: print("--> Exec with feather | reading input from ", path)
input = ft.read_feather(path)
if print_flag: print("--> Exec with feather | Apply function ", path)
result = function(input, *args, **kwargs)
if print_flag: print("Exec with feather --> ", path)
return result
# %% ../nbs/utils.ipynb 43
def py_function(module_name, function_name, print_flag = False):
try:
function = getattr(__import__('__main__'), function_name)
except:
module = __import__(module_name, fromlist=[''])
function = getattr(module, function_name)
print("py function: ", function_name, ": ", function)
return function
# %% ../nbs/utils.ipynb 46
import time
def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
result = None
function = py_function(module_name, function_name, print_flag)
if time_flag: t_start = time.time()
if not (path is None):
if print_flag: print("--> Exec with feather | reading input from ", path)
input = ft.read_feather(path)
if print_flag: print("--> Exec with feather | Apply function ", path)
result = function(input, *args, **kwargs)[k_output]
if time_flag:
t_end = time.time()
print("Exec with feather | time: ", t_end-t_start)
if print_flag: print("Exec with feather --> ", path)
return result
# %% ../nbs/utils.ipynb 48
def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
result = None
function = py_function(module_name, function_name, print_flag)
if time_flag: t_start = time.time()
if not (path_input is None):
if print_flag: print("--> Exec with feather | reading input from ", path_input)
input = ft.read_feather(path_input)
if print_flag:
print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input))
result = function(input, *args, **kwargs)[k_output]
ft.write_feather(df, path, compression = 'lz4')
if time_flag:
t_end = time.time()
print("Exec with feather | time: ", t_end-t_start)
if print_flag: print("Exec with feather --> ", path_output)
return path_output
# %% ../nbs/utils.ipynb 52
def learner_module_leaves(learner):
modules = list(learner.modules())[0] # Obtener el módulo raíz
rows = []
def find_leave_modules(module, path=[]):
for name, sub_module in module.named_children():
current_path = path + [f"{type(sub_module).__name__}"]
if not list(sub_module.children()):
leave_name = ' -> '.join(current_path)
leave_params = str(sub_module).strip()
rows.append([
leave_name,
f"{type(sub_module).__name__}",
name,
leave_params
]
)
find_leave_modules(sub_module, current_path)
find_leave_modules(modules)
df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module'])
return df
# %% ../nbs/utils.ipynb 56
def learner_module_leaves_subtables(learner, print_flag = False):
df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module'])
md = learner_module_leaves(learner).drop(
'Path', axis = 1
).sort_values(
by = 'Module_type'
)
if print_flag: print("The layers are of this types:")
md_types = pd.DataFrame(md['Module_type'].drop_duplicates())
if print_flag:
display(md_types)
print("And they are called with this parameters:")
md_modules = pd.DataFrame(md['Module'].drop_duplicates())
if print_flag: display(md_modules)
return md_types, md_modules
|