misantamaria commited on
Commit
6d51833
·
1 Parent(s): aa710b9

trying to fix cuda error

Browse files
dvats_xai/.ipynb_checkpoints/__init__-checkpoint.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.0.1"
dvats_xai/.ipynb_checkpoints/__init__.py ADDED
File without changes
dvats_xai/.ipynb_checkpoints/_modidx-checkpoint.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'master',
4
+ 'doc_baseurl': '/dvats/',
5
+ 'doc_host': 'https://vrodriguezf.github.io',
6
+ 'git_url': 'https://github.com/vrodriguezf/deepvats',
7
+ 'lib_path': 'dvats'},
8
+ 'syms': { 'dvats.all': {},
9
+ 'dvats.dr': {},
10
+ 'dvats.encoder': {},
11
+ 'dvats.imports': {},
12
+ 'dvats.load': {},
13
+ 'dvats.utils': {},
14
+ 'dvats.visualization': {}}}
dvats_xai/.ipynb_checkpoints/_nbdev-checkpoint.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED BY NBDEV! DO NOT EDIT!
2
+
3
+ __all__ = ["index", "modules", "custom_doc_links", "git_url"]
4
+
5
+ index = {"check_compatibility": "dr.ipynb",
6
+ "get_UMAP_prjs": "dr.ipynb",
7
+ "get_PCA_prjs": "dr.ipynb",
8
+ "get_TSNE_prjs": "dr.ipynb",
9
+ "DCAE_torch": "encoder.ipynb",
10
+ "ENCODER_EMBS_MODULE_NAME": "encoder.ipynb",
11
+ "get_enc_embs": "encoder.ipynb",
12
+ "TSArtifact": "load.ipynb",
13
+ "wandb.apis.public.Artifact.to_df": "load.ipynb",
14
+ "wandb.apis.public.Artifact.to_tsartifact": "load.ipynb",
15
+ "infer_or_inject_freq": "load.ipynb",
16
+ "generate_TS_df": "utils.ipynb",
17
+ "normalize_columns": "utils.ipynb",
18
+ "remove_constant_columns": "utils.ipynb",
19
+ "ReferenceArtifact": "utils.ipynb",
20
+ "wandb.apis.public.Artifact.to_obj": "utils.ipynb",
21
+ "PrintLayer": "utils.ipynb",
22
+ "Learner.export_and_get": "utils.ipynb",
23
+ "get_wandb_artifacts": "utils.ipynb",
24
+ "get_pickle_artifact": "utils.ipynb",
25
+ "plot_TS": "visualization.ipynb",
26
+ "plot_validation_ts_ae": "visualization.ipynb",
27
+ "plot_mask": "visualization.ipynb"}
28
+
29
+ modules = ["dr.py",
30
+ "encoder.py",
31
+ "load.py",
32
+ "utils.py",
33
+ "visualization.py"]
34
+
35
+ doc_url = "https://vrodriguezf.github.io/tchub/"
36
+
37
+ git_url = "https://gitlab.geist.re/pml/x_timecluster_extension/tree/master/"
38
+
39
+ def custom_doc_links(name): return None
dvats_xai/.ipynb_checkpoints/all-checkpoint.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import dvats
2
+ from .imports import *
3
+ from .load import *
4
+ from .utils import *
5
+ from .dr import *
6
+ from .encoder import *
7
+ from .visualization import *
8
+ from .xai import *
dvats_xai/.ipynb_checkpoints/dr-checkpoint.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
5
+ 'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
6
+
7
+ # %% ../nbs/dr.ipynb 2
8
+ import subprocess
9
+ def get_gpu_memory(device = 0):
10
+ total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
11
+ total_memory = int(total_memory.decode().split('\n')[0])
12
+ used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
13
+ used_memory = int(used_memory.decode().split('\n')[0])
14
+
15
+ percentage = round((used_memory / total_memory) * 100)
16
+ return used_memory, total_memory, percentage
17
+
18
+ def color_for_percentage(percentage):
19
+ if percentage < 20:
20
+ return "\033[90m" # Gray
21
+ elif percentage < 40:
22
+ return "\033[94m" # Blue
23
+ elif percentage < 60:
24
+ return "\033[92m" # Green
25
+ elif percentage < 80:
26
+ return "\033[93m" # Orange
27
+ else:
28
+ return "\033[91m" # Red
29
+
30
+ def create_bar(percentage, color_code, length=20):
31
+ filled_length = int(length * percentage // 100)
32
+ bar = "█" * filled_length + "-" * (length - filled_length)
33
+ return color_code + bar + "\033[0m" # Apply color and reset after bar
34
+
35
+ def gpu_memory_status(device=0):
36
+ used, total, percentage = get_gpu_memory(device)
37
+ color_code = color_for_percentage(percentage)
38
+ bar = create_bar(percentage, color_code)
39
+ print(f"Used mem: {used}")
40
+ print(f"Used mem: {total}")
41
+ print(f"Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
42
+
43
+ # %% ../nbs/dr.ipynb 4
44
+ import umap
45
+ import cudf
46
+ import cuml
47
+ import pandas as pd
48
+ import numpy as np
49
+ from fastcore.all import *
50
+ from .imports import *
51
+ from .load import TSArtifact
52
+
53
+ # %% ../nbs/dr.ipynb 5
54
+ def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
55
+ "Function to check that the artifact used by the encoder model and the artifact that is \
56
+ going to be passed through the DR are compatible"
57
+ try:
58
+ # Check that both artifacts have the same variables
59
+ chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
60
+ # Check that both artifacts have the same freq
61
+ chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
62
+ # Check that the dr artifact is not normalized (not normalized data has not the key normalization)
63
+ chk_norm = dr_ar.metadata['TS'].get('normalization') is None
64
+ # Check that the dr artifact has not missing values
65
+ chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
66
+ # Check all logical vars.
67
+ if chk_vars and chk_freq and chk_norm and chk_miss:
68
+ print("Artifacts are compatible.")
69
+ else:
70
+ raise Exception
71
+ except Exception as e:
72
+ print("Artifacts are not compatible.")
73
+ raise e
74
+ return None
75
+
76
+ # %% ../nbs/dr.ipynb 7
77
+ #Comment this part after 4_seconds debugged
78
+ import hashlib
79
+
80
+ # %% ../nbs/dr.ipynb 8
81
+ import warnings
82
+ import sys
83
+ from numba.core.errors import NumbaPerformanceWarning
84
+ @delegates(cuml.UMAP)
85
+ def get_UMAP_prjs(
86
+ input_data,
87
+ cpu=True,
88
+ print_flag = False,
89
+ check_memory_usage = True,
90
+ **kwargs
91
+ ):
92
+ "Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
93
+ if print_flag:
94
+ print("--> get_UMAP_prjs")
95
+ print("kwargs: ", kwargs)
96
+ sys.stdout.flush()
97
+ ####
98
+ checksum = hashlib.md5(input_data.tobytes()).hexdigest()
99
+ print(checksum)
100
+ ####
101
+
102
+ if check_memory_usage: gpu_memory_status()
103
+
104
+ warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
105
+
106
+ #reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
107
+ if cpu:
108
+ print("-- umap.UMAP --", cpu)
109
+ sys.stdout.flush()
110
+ reducer = umap.UMAP(**kwargs)
111
+ else:
112
+ print("-- cuml.UMAP --", cpu)
113
+ sys.stdout.flush()
114
+ if 'random_state' in kwargs:
115
+ kwargs['random_state'] = np.uint64(kwargs['random_state'])
116
+ reducer = cuml.UMAP(**kwargs)
117
+
118
+ if print_flag:
119
+ print("------- reducer --------")
120
+ print(reducer)
121
+ print(reducer.get_params())
122
+ print("------- reducer --------")
123
+ sys.stdout.flush()
124
+
125
+ projections = reducer.fit_transform(input_data)
126
+
127
+ if check_memory_usage: gpu_memory_status()
128
+ if print_flag:
129
+ checksum = hashlib.md5(projections.tobytes()).hexdigest()
130
+ print("prjs checksum ", checksum)
131
+ print("get_UMAP_prjs -->")
132
+ sys.stdout.flush()
133
+ return projections
134
+
135
+ # %% ../nbs/dr.ipynb 13
136
+ @delegates(cuml.PCA)
137
+ def get_PCA_prjs(X, cpu=False, **kwargs):
138
+ r"""
139
+ Computes PCA projections of X
140
+ """
141
+ if cpu:
142
+ raise NotImplementedError
143
+ else:
144
+ reducer = cuml.PCA(**kwargs)
145
+ projections = reducer.fit_transform(X)
146
+ return projections
147
+
148
+ # %% ../nbs/dr.ipynb 15
149
+ @delegates(cuml.TSNE)
150
+ def get_TSNE_prjs(X, cpu=False, **kwargs):
151
+ r"""
152
+ Computes TSNE projections of X
153
+ """
154
+ if cpu:
155
+ raise NotImplementedError
156
+ else:
157
+ reducer = cuml.TSNE(**kwargs)
158
+ projections = reducer.fit_transform(X)
159
+ return projections
160
+
161
+ # %% ../nbs/dr.ipynb 18
162
+ from sklearn.metrics import silhouette_score
163
+ def cluster_score(prjs, clusters_labels, print_flag):
164
+ score = silhouette_score(prjs, clusters_labels)
165
+ if print_flag: print("Silhouette_score:", score)
166
+ return score
dvats_xai/.ipynb_checkpoints/encoder-checkpoint.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """encoder.ipynb
3
+
4
+ Automatically generated.
5
+
6
+ Original file is located at:
7
+ /home/macu/work/nbs/encoder.ipynb
8
+ """
9
+
10
+ #default_exp encoder
11
+
12
+ #hide
13
+ %load_ext autoreload
14
+ %autoreload 2
15
+
16
+ #export
17
+ import pandas as pd
18
+ import numpy as np
19
+ from fastcore.all import *
20
+ from tsai.callback.MVP import *
21
+ from tsai.imports import *
22
+ from tsai.models.InceptionTimePlus import InceptionTimePlus
23
+ from tsai.models.explainability import get_acts_and_grads
24
+ from tsai.models.layers import *
25
+ from tsai.data.validation import combine_split_data
26
+
27
+ #hide
28
+ from tsai.all import *
29
+
30
+ #export
31
+ class DCAE_torch(Module):
32
+ def __init__(self, c_in, seq_len, delta, nfs=[64, 32, 12], kss=[10, 5, 5],
33
+ pool_szs=[2,2,3], output_fsz=10):
34
+ """
35
+ Create a Deep Convolutional Autoencoder for multivariate time series of `d` dimensions,
36
+ sliced with a window size of `w`. The parameter `delta` sets the number of latent features that will be
37
+ contained in the Dense layer of the network. The the number of features
38
+ maps (filters), the filter size and the pool size can also be adjusted."
39
+ """
40
+ assert all_equal([len(x) for x in [nfs, kss, pool_szs]], np.repeat(len(nfs), 3)), \
41
+ 'nfs, kss, and pool_szs must have the same length'
42
+ assert np.prod(pool_szs) == nfs[-1], \
43
+ 'The number of filters in the last conv layer must be equal to the product of pool sizes'
44
+ assert seq_len % np.prod(pool_szs) == 0, \
45
+ 'The product of pool sizes must be a divisor of the window size'
46
+ layers = []
47
+ for i in range_of(kss):
48
+ layers += [Conv1d(ni=nfs[i-1] if i>0 else c_in, nf=nfs[i], ks=kss[i]),
49
+ nn.MaxPool1d(kernel_size=pool_szs[i])]
50
+ self.downsample = nn.Sequential(*layers)
51
+ self.bottleneck = nn.Sequential(OrderedDict([
52
+ ('flatten', nn.Flatten()),
53
+ ('latent_in', nn.Linear(seq_len, delta)),
54
+ ('latent_out', nn.Linear(delta, seq_len)),
55
+ ('reshape', Reshape(nfs[-1], seq_len // np.prod(pool_szs)))
56
+ ]))
57
+ layers = []
58
+ for i in reversed(range_of(kss)):
59
+ layers += [Conv1d(ni=nfs[i+1] if i != (len(nfs)-1) else nfs[-1],
60
+ nf=nfs[i], ks=kss[i]),
61
+ nn.Upsample(scale_factor=pool_szs[i])]
62
+ layers += [Conv1d(ni=nfs[0], nf=c_in, kernel_size=output_fsz)]
63
+ self.upsample = nn.Sequential(*layers)
64
+
65
+ def forward(self, x):
66
+ x = self.downsample(x)
67
+ x = self.bottleneck(x)
68
+ x = self.upsample(x)
69
+ return x
70
+
71
+ #hide
72
+ foo = torch.rand(3, 1, 48)
73
+ m = DCAE_torch(c_in=foo.shape[1], seq_len=foo.shape[2], delta=12)
74
+ m(foo).shape
75
+
76
+ #export
77
+ ENCODER_EMBS_MODULE_NAME = {
78
+ InceptionTimePlus: 'backbone', # for mvp based models
79
+ DCAE_torch: 'bottleneck.latent_in'
80
+ }
81
+
82
+ #export
83
+ def get_enc_embs(X, enc_learn, module=None, cpu=False, average_seq_dim=True, to_numpy=True):
84
+ """
85
+ Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
86
+ learner. By default, the embeddings are obtained from the last layer
87
+ before the model head, although any layer can be passed to `model`.
88
+ Input
89
+ - `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
90
+ - `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
91
+ - `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
92
+ """
93
+ if cpu:
94
+ print("--> Get enc embs CPU")
95
+ enc_learn.dls.cpu()
96
+ enc_learn.cpu()
97
+ else:
98
+ print("--> Use CUDA |Get enc embs GPU")
99
+ enc_learn.dls.cuda()
100
+ enc_learn.cuda()
101
+ print("devices: ", enc_learn.dls.device, enc_learn.model.device)
102
+ print("Use CUDA -->")
103
+ if enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
104
+ print("--> Get enc embs bs: ", enc_learn.dls.bs)
105
+ aux_dl = enc_learn.dls.valid.new_dl(X=X)
106
+ aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
107
+ module = nested_attr(enc_learn.model,
108
+ ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) \
109
+ if module is None else module
110
+ embs = [get_acts_and_grads(model=enc_learn.model,
111
+ modules=module,
112
+ x=xb[0], cpu=cpu)[0] for xb in aux_dl]
113
+ embs = to_concat(embs)
114
+ if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
115
+ if to_numpy: embs = embs.numpy() if cpu else embs.cpu().numpy()
116
+ return embs
117
+
118
+ #hide
119
+ import wandb
120
+ from dvats.utils import *
121
+ wandb_api = wandb.Api()
122
+ enc_artifact = wandb_api.artifact('deepvats/mvp:latest')
123
+ enc_learner = enc_artifact.to_obj()
124
+ X = torch.rand(9, 1, 48)
125
+
126
+ #hide
127
+ #slow
128
+ #%%time
129
+ embs = get_enc_embs(X, enc_learner, cpu=True)
130
+ test_eq(embs.shape[0], X.shape[0])
131
+ embs.shape, embs.__class__
132
+
133
+ #hide
134
+ %%time
135
+ embs = get_enc_embs(X, enc_learner, cpu=False, to_numpy=False)
136
+ test_eq(embs.shape[0], X.shape[0])
137
+ embs.shape, embs.__class__, embs.device
138
+
139
+ #hide
140
+ %%time
141
+ embs = get_enc_embs(X, enc_learner, cpu=False, to_numpy=True)
142
+ test_eq(embs.shape[0], X.shape[0])
143
+ embs.shape, embs.__class__
144
+
145
+ #hide
146
+
147
+ #from nbdev.export import notebook2script
148
+
149
+ #notebook2script()
150
+
151
+ #from tsai import nb2py
152
+ #nb2py
153
+ #beep(1)
dvats_xai/.ipynb_checkpoints/imports-checkpoint.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from IPython.display import Audio, display, HTML, Javascript, clear_output # from tsai
2
+ import importlib
3
+ import numpy as np
4
+ import time
5
+ import sys
6
+
7
+ ##
8
+ # Constants
9
+ ##
10
+ WANDB_ARTIFACTS_DIR = 'data/wandb_artifacts'
11
+
12
+ # General purpose functions
13
+ def beep(inp=1, duration=.1, n=1):
14
+ rate = 10000
15
+ mult = 1.6 * inp if inp else .08
16
+ wave = np.sin(mult*np.arange(rate*duration))
17
+ for i in range(n):
18
+ display(Audio(wave, rate=10000, autoplay=True))
19
+ time.sleep(duration / .1)
20
+
21
+ def m_reload(package_name):
22
+ for k,v in sys.modules.items():
23
+ if k.startswith(package_name):
24
+ importlib.reload(v)
dvats_xai/.ipynb_checkpoints/load-checkpoint.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/load.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['TSArtifact', 'infer_or_inject_freq']
5
+
6
+ # %% ../nbs/load.ipynb 2
7
+ import pandas as pd
8
+ import numpy as np
9
+ from fastcore.all import *
10
+ import wandb
11
+ from datetime import datetime, timedelta
12
+ from .imports import *
13
+ from .utils import *
14
+ import pickle
15
+ import pyarrow.feather as ft
16
+
17
+ # %% ../nbs/load.ipynb 7
18
+ class TSArtifact(wandb.Artifact):
19
+
20
+ default_storage_path = Path(Path.home()/'data/wandb_artifacts/')
21
+ date_format = '%Y-%m-%d %H:%M:%S' # TODO add milliseconds
22
+ handle_missing_values_techniques = {
23
+ 'linear_interpolation': lambda df : df.interpolate(method='linear', limit_direction='both'),
24
+ 'overall_mean': lambda df : df.fillna(df.mean()),
25
+ 'overall_median': lambda df : df.fillna(df.median()),
26
+ 'backward_fill' : lambda df : df.fillna(method='bfill'),
27
+ 'forward_fill' : lambda df : df.fillna(method='ffill')
28
+ }
29
+
30
+ "Class that represents a wandb artifact containing time series data. sd stands for start_date \
31
+ and ed for end_date. Both should be pd.Timestamps"
32
+
33
+ @delegates(wandb.Artifact.__init__)
34
+ def __init__(self, name, sd:pd.Timestamp, ed:pd.Timestamp, **kwargs):
35
+ super().__init__(type='dataset', name=name, **kwargs)
36
+ self.sd = sd
37
+ self.ed = ed
38
+ if self.metadata is None:
39
+ self.metadata = dict()
40
+ self.metadata['TS'] = dict(sd = self.sd.strftime(self.date_format),
41
+ ed = self.ed.strftime(self.date_format))
42
+
43
+
44
+ @classmethod
45
+ def from_daily_csv_files(cls, root_path, fread=pd.read_csv, start_date=None, end_date=None, metadata=None, **kwargs):
46
+
47
+ "Create a wandb artifact of type `dataset`, containing the CSV files from `start_date` \
48
+ to `end_date`. Dates must be pased as `datetime.datetime` objects. If a `wandb_run` is \
49
+ defined, the created artifact will be logged to that run, using the longwall name as \
50
+ artifact name, and the date range as version."
51
+
52
+ return None
53
+
54
+
55
+ @classmethod
56
+ @delegates(__init__)
57
+ def from_df(cls, df:pd.DataFrame, name:str, path:str=None, sd:pd.Timestamp=None, ed:pd.Timestamp=None,
58
+ normalize:bool=False, missing_values_technique:str=None, resampling_freq:str=None, **kwargs):
59
+
60
+ """
61
+ Create a TSArtifact of type `dataset`, using the DataFrame `df` samples from \
62
+ `sd` (start date) to `ed` (end date). Dates must be passed as `datetime.datetime` \
63
+ objects. The transformed DataFrame is stored as a pickle file in the path `path` \
64
+ and its reference is added to the artifact entries. Additionally, the dataset can \
65
+ be normalized (see `normalize` argument) or transformed using missing values \
66
+ handling techniques (see `missing_values_technique` argument) or resampling (see \
67
+ `resampling_freq` argument).
68
+
69
+ Arguments:
70
+ df: (DataFrame) The dataframe you want to convert into an artifact.
71
+ name: (str) The artifact name.
72
+ path: (str, optional) The path where the file, containing the new transformed \
73
+ dataframe, is saved. Default None.
74
+ sd: (sd, optional) Start date. By default, the first index of `df` is taken.
75
+ ed: (ed, optional) End date. By default, the last index of `df` is taken.
76
+ normalize: (bool, optional) If the dataset values should be normalized. Default\
77
+ False.
78
+ missing_values_technique: (str, optional) The technique used to handle missing \
79
+ values. Options: "linear_iterpolation", "overall_mean", "overall_median" or \
80
+ None. Default None.
81
+ resampling_freq: (str, optional) The offset string or object representing \
82
+ frequency conversion for time series resampling. Default None.
83
+
84
+ Returns:
85
+ TSArtifact object.
86
+ """
87
+ sd = df.index[0] if sd is None else sd
88
+ ed = df.index[-1] if ed is None else ed
89
+ obj = cls(name, sd=sd, ed=ed, **kwargs)
90
+ df = df.query('@obj.sd <= index <= @obj.ed')
91
+ obj.metadata['TS']['created'] = 'from-df'
92
+ obj.metadata['TS']['n_vars'] = df.columns.__len__()
93
+
94
+ # Handle Missing Values
95
+ df = obj.handle_missing_values_techniques[missing_values_technique](df) if missing_values_technique is not None else df
96
+ obj.metadata['TS']['handle_missing_values_technique'] = missing_values_technique.__str__()
97
+ obj.metadata['TS']['has_missing_values'] = np.any(df.isna().values).__str__()
98
+
99
+ # Indexing and Resampling
100
+ if resampling_freq: df = df.resample(resampling_freq).mean()
101
+ obj.metadata['TS']['n_samples'] = len(df)
102
+ obj.metadata['TS']['freq'] = str(df.index.freq)
103
+
104
+ # Time Series Variables
105
+ obj.metadata['TS']['vars'] = list(df.columns)
106
+
107
+ # Normalization - Save the previous means and stds
108
+ if normalize:
109
+ obj.metadata['TS']['normalization'] = dict(means = df.describe().loc['mean'].to_dict(),
110
+ stds = df.describe().loc['std'].to_dict())
111
+ df = normalize_columns(df)
112
+
113
+ # Hash and save
114
+ hash_code = str(pd.util.hash_pandas_object(df).sum()) # str(hash(df.values.tobytes()))
115
+ path = obj.default_storage_path/f'{hash_code}' if path is None else Path(path)/f'{hash_code}.feather'
116
+ ft.write_feather(df, path)
117
+ obj.metadata['TS']['hash'] = hash_code
118
+ obj.add_file(str(path))
119
+
120
+ return obj
121
+
122
+ # %% ../nbs/load.ipynb 11
123
+ @patch
124
+ def to_df(self:wandb.apis.public.Artifact):
125
+ "Download the files of a saved wandb artifact and process them as a single dataframe. The artifact must \
126
+ come from a call to `run.use_artifact` with a proper wandb run."
127
+ # The way we have to ensure that the argument comes from a TS arfitact is the metadata
128
+ if self.metadata.get('TS') is None:
129
+ print(f'ERROR:{self} does not come from a logged TSArtifact')
130
+ return None
131
+ dir = Path(self.download())
132
+ if self.metadata['TS']['created'] == 'from-df':
133
+ # Call read_pickle with the single file from dir
134
+ #return pd.read_pickle(dir.ls()[0])
135
+ return ft.read_feather(dir.ls()[0])
136
+ else:
137
+ print("ERROR: Only from_df method is allowed yet")
138
+
139
+ # %% ../nbs/load.ipynb 13
140
+ @patch
141
+ def to_tsartifact(self:wandb.apis.public.Artifact):
142
+ "Cast an artifact as a TS artifact. The artifact must have been created from one of the \
143
+ class creation methods of the class `TSArtifact`. This is useful to go back to a TSArtifact \
144
+ after downloading an artifact through the wand API"
145
+ return TSArtifact(name=self.digest, #TODO change this
146
+ sd=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
147
+ ed=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
148
+ description=self.description,
149
+ metadata=self.metadata)
150
+
151
+ # %% ../nbs/load.ipynb 15
152
+ @delegates(pd.to_datetime)
153
+ def infer_or_inject_freq(df, injected_freq='1s', start_date=None, **kwargs):
154
+ """
155
+ Infer index frequency. If there's not a proper time index, create fake timestamps,
156
+ keeping the desired `injected_freq`. If that is None, set a default one of 1 second.
157
+ start_date: the first date of the index (int or string).
158
+ """
159
+ inferred_freq = pd.infer_freq(df.index)
160
+ if inferred_freq == 'N':
161
+ timedelta = pd.to_timedelta(injected_freq)
162
+ df.index = pd.to_datetime(ifnone(start_date, 0), **kwargs) + timedelta*df.index
163
+ df.index.freq = pd.infer_freq(df.index)
164
+ else:
165
+ df.index.freq = inferred_freq
166
+ return df
dvats_xai/.ipynb_checkpoints/utils-checkpoint.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/utils.ipynb (unless otherwise specified).
2
+
3
+ __all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
4
+ 'get_wandb_artifacts', 'get_pickle_artifact']
5
+
6
+ # Cell
7
+ from .imports import *
8
+ from fastcore.all import *
9
+ import wandb
10
+ import pickle
11
+ import pandas as pd
12
+ import numpy as np
13
+ #import tensorflow as tf
14
+ import torch.nn as nn
15
+ from fastai.basics import *
16
+
17
+ # Cell
18
+ def generate_TS_df(rows, cols):
19
+ "Generates a dataframe containing a multivariate time series, where each column \
20
+ represents a variable and each row a time point (sample). The timestamp is in the \
21
+ index of the dataframe, and it is created with a even space of 1 second between samples"
22
+ index = np.arange(pd.Timestamp.now(),
23
+ pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
24
+ pd.Timedelta(1, 'seconds'))
25
+ data = np.random.randn(len(index), cols)
26
+ return pd.DataFrame(data, index=index)
27
+
28
+ # Cell
29
+ def normalize_columns(df:pd.DataFrame):
30
+ "Normalize columns from `df` to have 0 mean and 1 standard deviation"
31
+ mean = df.mean()
32
+ std = df.std() + 1e-7
33
+ return (df-mean)/std
34
+
35
+ # Cell
36
+ def remove_constant_columns(df:pd.DataFrame):
37
+ return df.loc[:, (df != df.iloc[0]).any()]
38
+
39
+ # Cell
40
+ class ReferenceArtifact(wandb.Artifact):
41
+ default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
42
+ "This class is meant to create an artifact with a single reference to an object \
43
+ passed as argument in the contructor. The object will be pickled, hashed and stored \
44
+ in a specified folder."
45
+ @delegates(wandb.Artifact.__init__)
46
+ def __init__(self, obj, name, type='object', folder=None, **kwargs):
47
+ super().__init__(type=type, name=name, **kwargs)
48
+ # pickle dumps the object and then hash it
49
+ hash_code = str(hash(pickle.dumps(obj)))
50
+ folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
51
+ with open(f'{folder}/{hash_code}', 'wb') as f:
52
+ pickle.dump(obj, f)
53
+ self.add_reference(f'file://{folder}/{hash_code}')
54
+ if self.metadata is None:
55
+ self.metadata = dict()
56
+ self.metadata['ref'] = dict()
57
+ self.metadata['ref']['hash'] = hash_code
58
+ self.metadata['ref']['type'] = str(obj.__class__)
59
+
60
+ # Cell
61
+ @patch
62
+ def to_obj(self:wandb.apis.public.Artifact):
63
+ """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
64
+ come from a call to `run.use_artifact` with a proper wandb run."""
65
+ if self.metadata.get('ref') is None:
66
+ print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
67
+ return None
68
+ original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
69
+ path = original_path if original_path.exists() else Path(self.download()).ls()[0]
70
+ with open(path, 'rb') as f:
71
+ obj = pickle.load(f)
72
+ return obj
73
+
74
+ # Cell
75
+ import torch.nn as nn
76
+ class PrintLayer(nn.Module):
77
+ def __init__(self):
78
+ super(PrintLayer, self).__init__()
79
+
80
+ def forward(self, x):
81
+ # Do your print / debug stuff here
82
+ print(x.shape)
83
+ return x
84
+
85
+ # Cell
86
+ @patch
87
+ def export_and_get(self:Learner, keep_exported_file=False):
88
+ """
89
+ Export the learner into an auxiliary file, load it and return it back.
90
+ """
91
+ aux_path = Path('aux.pkl')
92
+ self.export(fname='aux.pkl')
93
+ aux_learn = load_learner('aux.pkl')
94
+ if not keep_exported_file: aux_path.unlink()
95
+ return aux_learn
96
+
97
+ # Cell
98
+ def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
99
+ """
100
+ Get the artifacts logged in a wandb project.
101
+ Input:
102
+ - `project_path` (str): entity/project_name
103
+ - `type` (str): whether to return only one type of artifacts
104
+ - `name` (str): Leave none to have all artifact names
105
+ - `last_version`: whether to return only the last version of each artifact or not
106
+
107
+ Output: List of artifacts
108
+ """
109
+ public_api = wandb.Api()
110
+ if type is not None:
111
+ types = [public_api.artifact_type(type, project_path)]
112
+ else:
113
+ types = public_api.artifact_types(project_path)
114
+
115
+ res = L()
116
+ for kind in types:
117
+ for collection in kind.collections():
118
+ if name is None or name == collection.name:
119
+ versions = public_api.artifact_versions(
120
+ kind.type,
121
+ "/".join([kind.entity, kind.project, collection.name]),
122
+ per_page=1,
123
+ )
124
+ if last_version: res += next(versions)
125
+ else: res += L(versions)
126
+ return list(res)
127
+
128
+ # Cell
129
+ def get_pickle_artifact(filename):
130
+
131
+ with open(filename, "rb") as f:
132
+ df = pickle.load(f)
133
+
134
+ return df
dvats_xai/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.0.1"
dvats_xai/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (147 Bytes). View file
 
dvats_xai/__pycache__/all.cpython-310.pyc ADDED
Binary file (273 Bytes). View file
 
dvats_xai/__pycache__/dr.cpython-310.pyc ADDED
Binary file (4.12 kB). View file
 
dvats_xai/__pycache__/encoder.cpython-310.pyc ADDED
Binary file (10.5 kB). View file
 
dvats_xai/__pycache__/imports.cpython-310.pyc ADDED
Binary file (940 Bytes). View file
 
dvats_xai/__pycache__/load.cpython-310.pyc ADDED
Binary file (7.04 kB). View file
 
dvats_xai/__pycache__/utils.cpython-310.pyc ADDED
Binary file (7.84 kB). View file
 
dvats_xai/__pycache__/visualization.cpython-310.pyc ADDED
Binary file (2.22 kB). View file
 
dvats_xai/__pycache__/xai.cpython-310.pyc ADDED
Binary file (26.1 kB). View file
 
dvats_xai/_modidx.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'master',
4
+ 'doc_baseurl': '/dvats/',
5
+ 'doc_host': 'https://vrodriguezf.github.io',
6
+ 'git_url': 'https://github.com/vrodriguezf/deepvats',
7
+ 'lib_path': 'dvats'},
8
+ 'syms': { 'dvats.all': {},
9
+ 'dvats.dr': { 'dvats.dr.check_compatibility': ('dr.html#check_compatibility', 'dvats/dr.py'),
10
+ 'dvats.dr.cluster_score': ('dr.html#cluster_score', 'dvats/dr.py'),
11
+ 'dvats.dr.color_for_percentage': ('dr.html#color_for_percentage', 'dvats/dr.py'),
12
+ 'dvats.dr.create_bar': ('dr.html#create_bar', 'dvats/dr.py'),
13
+ 'dvats.dr.get_PCA_prjs': ('dr.html#get_pca_prjs', 'dvats/dr.py'),
14
+ 'dvats.dr.get_TSNE_prjs': ('dr.html#get_tsne_prjs', 'dvats/dr.py'),
15
+ 'dvats.dr.get_UMAP_prjs': ('dr.html#get_umap_prjs', 'dvats/dr.py'),
16
+ 'dvats.dr.get_gpu_memory': ('dr.html#get_gpu_memory', 'dvats/dr.py'),
17
+ 'dvats.dr.gpu_memory_status': ('dr.html#gpu_memory_status', 'dvats/dr.py')},
18
+ 'dvats.encoder': { 'dvats.encoder.DCAE_torch': ('encoder.html#dcae_torch', 'dvats/encoder.py'),
19
+ 'dvats.encoder.DCAE_torch.__init__': ('encoder.html#__init__', 'dvats/encoder.py'),
20
+ 'dvats.encoder.DCAE_torch.forward': ('encoder.html#forward', 'dvats/encoder.py'),
21
+ 'dvats.encoder.color_for_percentage': ('encoder.html#color_for_percentage', 'dvats/encoder.py'),
22
+ 'dvats.encoder.create_bar': ('encoder.html#create_bar', 'dvats/encoder.py'),
23
+ 'dvats.encoder.get_enc_embs': ('encoder.html#get_enc_embs', 'dvats/encoder.py'),
24
+ 'dvats.encoder.get_enc_embs_set_stride_set_batch_size': ( 'encoder.html#get_enc_embs_set_stride_set_batch_size',
25
+ 'dvats/encoder.py'),
26
+ 'dvats.encoder.get_gpu_memory_': ('encoder.html#get_gpu_memory_', 'dvats/encoder.py'),
27
+ 'dvats.encoder.gpu_memory_status_': ('encoder.html#gpu_memory_status_', 'dvats/encoder.py')},
28
+ 'dvats.imports': {},
29
+ 'dvats.load': { 'dvats.load.TSArtifact': ('load.html#tsartifact', 'dvats/load.py'),
30
+ 'dvats.load.TSArtifact.__init__': ('load.html#__init__', 'dvats/load.py'),
31
+ 'dvats.load.TSArtifact.from_daily_csv_files': ('load.html#from_daily_csv_files', 'dvats/load.py'),
32
+ 'dvats.load.TSArtifact.from_df': ('load.html#from_df', 'dvats/load.py'),
33
+ 'dvats.load.infer_or_inject_freq': ('load.html#infer_or_inject_freq', 'dvats/load.py'),
34
+ 'dvats.load.wandb.apis.public.Artifact.to_df': ('load.html#wandb.apis.public.artifact.to_df', 'dvats/load.py'),
35
+ 'dvats.load.wandb.apis.public.Artifact.to_tsartifact': ( 'load.html#wandb.apis.public.artifact.to_tsartifact',
36
+ 'dvats/load.py')},
37
+ 'dvats.utils': { 'dvats.utils.Learner.export_and_get': ('utils.html#learner.export_and_get', 'dvats/utils.py'),
38
+ 'dvats.utils.PrintLayer': ('utils.html#printlayer', 'dvats/utils.py'),
39
+ 'dvats.utils.PrintLayer.__init__': ('utils.html#__init__', 'dvats/utils.py'),
40
+ 'dvats.utils.PrintLayer.forward': ('utils.html#forward', 'dvats/utils.py'),
41
+ 'dvats.utils.ReferenceArtifact': ('utils.html#referenceartifact', 'dvats/utils.py'),
42
+ 'dvats.utils.ReferenceArtifact.__init__': ('utils.html#__init__', 'dvats/utils.py'),
43
+ 'dvats.utils.exec_with_and_feather_k_output': ('utils.html#exec_with_and_feather_k_output', 'dvats/utils.py'),
44
+ 'dvats.utils.exec_with_feather': ('utils.html#exec_with_feather', 'dvats/utils.py'),
45
+ 'dvats.utils.exec_with_feather_k_output': ('utils.html#exec_with_feather_k_output', 'dvats/utils.py'),
46
+ 'dvats.utils.generate_TS_df': ('utils.html#generate_ts_df', 'dvats/utils.py'),
47
+ 'dvats.utils.get_pickle_artifact': ('utils.html#get_pickle_artifact', 'dvats/utils.py'),
48
+ 'dvats.utils.get_wandb_artifacts': ('utils.html#get_wandb_artifacts', 'dvats/utils.py'),
49
+ 'dvats.utils.learner_module_leaves': ('utils.html#learner_module_leaves', 'dvats/utils.py'),
50
+ 'dvats.utils.learner_module_leaves_subtables': ( 'utils.html#learner_module_leaves_subtables',
51
+ 'dvats/utils.py'),
52
+ 'dvats.utils.normalize_columns': ('utils.html#normalize_columns', 'dvats/utils.py'),
53
+ 'dvats.utils.py_function': ('utils.html#py_function', 'dvats/utils.py'),
54
+ 'dvats.utils.remove_constant_columns': ('utils.html#remove_constant_columns', 'dvats/utils.py'),
55
+ 'dvats.utils.wandb.apis.public.Artifact.to_obj': ( 'utils.html#wandb.apis.public.artifact.to_obj',
56
+ 'dvats/utils.py')},
57
+ 'dvats.visualization': { 'dvats.visualization.plot_TS': ('visualization.html#plot_ts', 'dvats/visualization.py'),
58
+ 'dvats.visualization.plot_mask': ('visualization.html#plot_mask', 'dvats/visualization.py'),
59
+ 'dvats.visualization.plot_validation_ts_ae': ( 'visualization.html#plot_validation_ts_ae',
60
+ 'dvats/visualization.py')},
61
+ 'dvats.xai': { 'dvats.xai.InteractiveAnomalyPlot': ('xai.html#interactiveanomalyplot', 'dvats/xai.py'),
62
+ 'dvats.xai.InteractiveAnomalyPlot.__init__': ('xai.html#__init__', 'dvats/xai.py'),
63
+ 'dvats.xai.InteractiveAnomalyPlot.plot_projections_clusters_interactive': ( 'xai.html#plot_projections_clusters_interactive',
64
+ 'dvats/xai.py'),
65
+ 'dvats.xai.InteractiveTSPlot': ('xai.html#interactivetsplot', 'dvats/xai.py'),
66
+ 'dvats.xai.InteractiveTSPlot.__init__': ('xai.html#__init__', 'dvats/xai.py'),
67
+ 'dvats.xai.add_movement_buttons': ('xai.html#add_movement_buttons', 'dvats/xai.py'),
68
+ 'dvats.xai.add_selected_features': ('xai.html#add_selected_features', 'dvats/xai.py'),
69
+ 'dvats.xai.add_windows': ('xai.html#add_windows', 'dvats/xai.py'),
70
+ 'dvats.xai.anomaly_score': ('xai.html#anomaly_score', 'dvats/xai.py'),
71
+ 'dvats.xai.calculate_cluster_stats': ('xai.html#calculate_cluster_stats', 'dvats/xai.py'),
72
+ 'dvats.xai.delta_x_bigger': ('xai.html#delta_x_bigger', 'dvats/xai.py'),
73
+ 'dvats.xai.delta_x_lower': ('xai.html#delta_x_lower', 'dvats/xai.py'),
74
+ 'dvats.xai.delta_y_bigger': ('xai.html#delta_y_bigger', 'dvats/xai.py'),
75
+ 'dvats.xai.delta_y_lower': ('xai.html#delta_y_lower', 'dvats/xai.py'),
76
+ 'dvats.xai.detector': ('xai.html#detector', 'dvats/xai.py'),
77
+ 'dvats.xai.get_anomalies': ('xai.html#get_anomalies', 'dvats/xai.py'),
78
+ 'dvats.xai.get_anomaly_styles': ('xai.html#get_anomaly_styles', 'dvats/xai.py'),
79
+ 'dvats.xai.get_dataset': ('xai.html#get_dataset', 'dvats/xai.py'),
80
+ 'dvats.xai.get_dateformat': ('xai.html#get_dateformat', 'dvats/xai.py'),
81
+ 'dvats.xai.get_df_selected': ('xai.html#get_df_selected', 'dvats/xai.py'),
82
+ 'dvats.xai.get_embeddings': ('xai.html#get_embeddings', 'dvats/xai.py'),
83
+ 'dvats.xai.get_prjs': ('xai.html#get_prjs', 'dvats/xai.py'),
84
+ 'dvats.xai.initial_plot': ('xai.html#initial_plot', 'dvats/xai.py'),
85
+ 'dvats.xai.merge_overlapping_windows': ('xai.html#merge_overlapping_windows', 'dvats/xai.py'),
86
+ 'dvats.xai.move_down': ('xai.html#move_down', 'dvats/xai.py'),
87
+ 'dvats.xai.move_left': ('xai.html#move_left', 'dvats/xai.py'),
88
+ 'dvats.xai.move_right': ('xai.html#move_right', 'dvats/xai.py'),
89
+ 'dvats.xai.move_up': ('xai.html#move_up', 'dvats/xai.py'),
90
+ 'dvats.xai.plot_anomaly_scores_distribution': ('xai.html#plot_anomaly_scores_distribution', 'dvats/xai.py'),
91
+ 'dvats.xai.plot_clusters_with_anomalies': ('xai.html#plot_clusters_with_anomalies', 'dvats/xai.py'),
92
+ 'dvats.xai.plot_clusters_with_anomalies_interactive_plot': ( 'xai.html#plot_clusters_with_anomalies_interactive_plot',
93
+ 'dvats/xai.py'),
94
+ 'dvats.xai.plot_initial_config': ('xai.html#plot_initial_config', 'dvats/xai.py'),
95
+ 'dvats.xai.plot_projections': ('xai.html#plot_projections', 'dvats/xai.py'),
96
+ 'dvats.xai.plot_projections_clusters': ('xai.html#plot_projections_clusters', 'dvats/xai.py'),
97
+ 'dvats.xai.plot_save': ('xai.html#plot_save', 'dvats/xai.py'),
98
+ 'dvats.xai.set_features_buttons': ('xai.html#set_features_buttons', 'dvats/xai.py'),
99
+ 'dvats.xai.setup_boxes': ('xai.html#setup_boxes', 'dvats/xai.py'),
100
+ 'dvats.xai.setup_style': ('xai.html#setup_style', 'dvats/xai.py'),
101
+ 'dvats.xai.shift_datetime': ('xai.html#shift_datetime', 'dvats/xai.py'),
102
+ 'dvats.xai.show': ('xai.html#show', 'dvats/xai.py'),
103
+ 'dvats.xai.toggle_trace': ('xai.html#toggle_trace', 'dvats/xai.py'),
104
+ 'dvats.xai.umap_parameters': ('xai.html#umap_parameters', 'dvats/xai.py'),
105
+ 'dvats.xai.update_plot': ('xai.html#update_plot', 'dvats/xai.py')}}}
dvats_xai/_nbdev.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED BY NBDEV! DO NOT EDIT!
2
+
3
+ __all__ = ["index", "modules", "custom_doc_links", "git_url"]
4
+
5
+ index = {"check_compatibility": "dr.ipynb",
6
+ "get_UMAP_prjs": "dr.ipynb",
7
+ "get_PCA_prjs": "dr.ipynb",
8
+ "get_TSNE_prjs": "dr.ipynb",
9
+ "DCAE_torch": "encoder.ipynb",
10
+ "ENCODER_EMBS_MODULE_NAME": "encoder.ipynb",
11
+ "get_enc_embs": "encoder.ipynb",
12
+ "TSArtifact": "load.ipynb",
13
+ "wandb.apis.public.Artifact.to_df": "load.ipynb",
14
+ "wandb.apis.public.Artifact.to_tsartifact": "load.ipynb",
15
+ "infer_or_inject_freq": "load.ipynb",
16
+ "generate_TS_df": "utils.ipynb",
17
+ "normalize_columns": "utils.ipynb",
18
+ "remove_constant_columns": "utils.ipynb",
19
+ "ReferenceArtifact": "utils.ipynb",
20
+ "wandb.apis.public.Artifact.to_obj": "utils.ipynb",
21
+ "PrintLayer": "utils.ipynb",
22
+ "Learner.export_and_get": "utils.ipynb",
23
+ "get_wandb_artifacts": "utils.ipynb",
24
+ "get_pickle_artifact": "utils.ipynb",
25
+ "plot_TS": "visualization.ipynb",
26
+ "plot_validation_ts_ae": "visualization.ipynb",
27
+ "plot_mask": "visualization.ipynb"}
28
+
29
+ modules = ["dr.py",
30
+ "encoder.py",
31
+ "load.py",
32
+ "utils.py",
33
+ "visualization.py"]
34
+
35
+ doc_url = "https://vrodriguezf.github.io/tchub/"
36
+
37
+ git_url = "https://gitlab.geist.re/pml/x_timecluster_extension/tree/master/"
38
+
39
+ def custom_doc_links(name): return None
dvats_xai/all.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import dvats
2
+ from .imports import *
3
+ from .load import *
4
+ from .utils import *
5
+ from .dr import *
6
+ from .encoder import *
7
+ from .visualization import *
8
+ from .xai import *
dvats_xai/dr.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
5
+ 'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
6
+
7
+ # %% ../nbs/dr.ipynb 2
8
+ import subprocess
9
+ def get_gpu_memory(device = 0):
10
+ total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
11
+ total_memory = int(total_memory.decode().split('\n')[0])
12
+ used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
13
+ used_memory = int(used_memory.decode().split('\n')[0])
14
+
15
+ percentage = round((used_memory / total_memory) * 100)
16
+ return used_memory, total_memory, percentage
17
+
18
+ def color_for_percentage(percentage):
19
+ if percentage < 20:
20
+ return "\033[90m" # Gray
21
+ elif percentage < 40:
22
+ return "\033[94m" # Blue
23
+ elif percentage < 60:
24
+ return "\033[92m" # Green
25
+ elif percentage < 80:
26
+ return "\033[93m" # Orange
27
+ else:
28
+ return "\033[91m" # Red
29
+
30
+ def create_bar(percentage, color_code, length=20):
31
+ filled_length = int(length * percentage // 100)
32
+ bar = "█" * filled_length + "-" * (length - filled_length)
33
+ return color_code + bar + "\033[0m" # Apply color and reset after bar
34
+
35
+ def gpu_memory_status(device=0):
36
+ used, total, percentage = get_gpu_memory(device)
37
+ color_code = color_for_percentage(percentage)
38
+ bar = create_bar(percentage, color_code)
39
+ print(f"GPU | Used mem: {used}")
40
+ print(f"GPU | Used mem: {total}")
41
+ print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
42
+
43
+ # %% ../nbs/dr.ipynb 4
44
+ import umap
45
+ import cudf
46
+ import cuml
47
+ import pandas as pd
48
+ import numpy as np
49
+ from fastcore.all import *
50
+ from .imports import *
51
+ from .load import TSArtifact
52
+
53
+ # %% ../nbs/dr.ipynb 5
54
+ def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
55
+ "Function to check that the artifact used by the encoder model and the artifact that is \
56
+ going to be passed through the DR are compatible"
57
+ try:
58
+ # Check that both artifacts have the same variables
59
+ chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
60
+ # Check that both artifacts have the same freq
61
+ chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
62
+ # Check that the dr artifact is not normalized (not normalized data has not the key normalization)
63
+ chk_norm = dr_ar.metadata['TS'].get('normalization') is None
64
+ # Check that the dr artifact has not missing values
65
+ chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
66
+ # Check all logical vars.
67
+ if chk_vars and chk_freq and chk_norm and chk_miss:
68
+ print("Artifacts are compatible.")
69
+ else:
70
+ raise Exception
71
+ except Exception as e:
72
+ print("Artifacts are not compatible.")
73
+ raise e
74
+ return None
75
+
76
+ # %% ../nbs/dr.ipynb 7
77
+ #Comment this part after 4_seconds debugged
78
+ import hashlib
79
+
80
+ # %% ../nbs/dr.ipynb 8
81
+ import warnings
82
+ import sys
83
+ from numba.core.errors import NumbaPerformanceWarning
84
+ @delegates(cuml.UMAP)
85
+ def get_UMAP_prjs(
86
+ input_data,
87
+ cpu=True,
88
+ print_flag = False,
89
+ check_memory_usage = True,
90
+ **kwargs
91
+ ):
92
+ "Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
93
+ if print_flag:
94
+ print("--> get_UMAP_prjs")
95
+ print("kwargs: ", kwargs)
96
+ sys.stdout.flush()
97
+ ####
98
+ checksum = hashlib.md5(input_data.tobytes()).hexdigest()
99
+ print(checksum)
100
+ ####
101
+
102
+ if check_memory_usage: gpu_memory_status()
103
+
104
+ warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
105
+
106
+ #reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
107
+ if cpu:
108
+ print("-- umap.UMAP --", cpu)
109
+ sys.stdout.flush()
110
+ reducer = umap.UMAP(**kwargs)
111
+ else:
112
+ print("-- cuml.UMAP --", cpu)
113
+ sys.stdout.flush()
114
+ if 'random_state' in kwargs:
115
+ kwargs['random_state'] = np.uint64(kwargs['random_state'])
116
+ reducer = cuml.UMAP(**kwargs)
117
+
118
+ if print_flag:
119
+ print("------- reducer --------")
120
+ print(reducer)
121
+ print(reducer.get_params())
122
+ print("------- reducer --------")
123
+ sys.stdout.flush()
124
+
125
+ projections = reducer.fit_transform(input_data)
126
+
127
+ if check_memory_usage: gpu_memory_status()
128
+ if print_flag:
129
+ checksum = hashlib.md5(projections.tobytes()).hexdigest()
130
+ print("prjs checksum ", checksum)
131
+ print("get_UMAP_prjs -->")
132
+ sys.stdout.flush()
133
+ return projections
134
+
135
+ # %% ../nbs/dr.ipynb 13
136
+ @delegates(cuml.PCA)
137
+ def get_PCA_prjs(X, cpu=False, **kwargs):
138
+ r"""
139
+ Computes PCA projections of X
140
+ """
141
+ if cpu:
142
+ raise NotImplementedError
143
+ else:
144
+ reducer = cuml.PCA(**kwargs)
145
+ projections = reducer.fit_transform(X)
146
+ return projections
147
+
148
+ # %% ../nbs/dr.ipynb 15
149
+ @delegates(cuml.TSNE)
150
+ def get_TSNE_prjs(X, cpu=False, **kwargs):
151
+ r"""
152
+ Computes TSNE projections of X
153
+ """
154
+ if cpu:
155
+ raise NotImplementedError
156
+ else:
157
+ reducer = cuml.TSNE(**kwargs)
158
+ projections = reducer.fit_transform(X)
159
+ return projections
160
+
161
+ # %% ../nbs/dr.ipynb 18
162
+ from sklearn.metrics import silhouette_score
163
+ def cluster_score(prjs, clusters_labels, print_flag):
164
+ score = silhouette_score(prjs, clusters_labels)
165
+ if print_flag: print("Silhouette_score:", score)
166
+ return score
dvats_xai/encoder.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/encoder.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['ENCODER_EMBS_MODULE_NAME', 'get_gpu_memory_', 'color_for_percentage', 'create_bar', 'gpu_memory_status_',
5
+ 'DCAE_torch', 'get_enc_embs', 'get_enc_embs_set_stride_set_batch_size']
6
+
7
+ # %% ../nbs/encoder.ipynb 2
8
+ import subprocess
9
+ def get_gpu_memory_(device = 0):
10
+ total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
11
+ total_memory = int(total_memory.decode().split('\n')[0])
12
+ used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
13
+ used_memory = int(used_memory.decode().split('\n')[0])
14
+
15
+ percentage = round((used_memory / total_memory) * 100)
16
+ return used_memory, total_memory, percentage
17
+
18
+ def color_for_percentage(percentage):
19
+ if percentage < 20:
20
+ return "\033[90m" # Gray
21
+ elif percentage < 40:
22
+ return "\033[94m" # Blue
23
+ elif percentage < 60:
24
+ return "\033[92m" # Green
25
+ elif percentage < 80:
26
+ return "\033[93m" # Orange
27
+ else:
28
+ return "\033[91m" # Red
29
+
30
+ def create_bar(percentage, color_code, length=20):
31
+ filled_length = int(length * percentage // 100)
32
+ bar = "█" * filled_length + "-" * (length - filled_length)
33
+ return color_code + bar + "\033[0m" # Apply color and reset after bar
34
+
35
+ def gpu_memory_status_(device=0):
36
+ used, total, percentage = get_gpu_memory_(device)
37
+ color_code = color_for_percentage(percentage)
38
+ bar = create_bar(percentage, color_code)
39
+ print(f"GPU | Used mem: {used}")
40
+ print(f"GPU | Used mem: {total}")
41
+ print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
42
+
43
+
44
+ # %% ../nbs/encoder.ipynb 4
45
+ import pandas as pd
46
+ import numpy as np
47
+ from fastcore.all import *
48
+ from tsai.callback.MVP import *
49
+ from tsai.imports import *
50
+ from tsai.models.InceptionTimePlus import InceptionTimePlus
51
+ from tsai.models.explainability import get_acts_and_grads
52
+ from tsai.models.layers import *
53
+ from tsai.data.validation import combine_split_data
54
+ import time
55
+
56
+ # %% ../nbs/encoder.ipynb 7
57
+ class DCAE_torch(Module):
58
+ def __init__(self, c_in, seq_len, delta, nfs=[64, 32, 12], kss=[10, 5, 5],
59
+ pool_szs=[2,2,3], output_fsz=10):
60
+ """
61
+ Create a Deep Convolutional Autoencoder for multivariate time series of `d` dimensions,
62
+ sliced with a window size of `w`. The parameter `delta` sets the number of latent features that will be
63
+ contained in the Dense layer of the network. The the number of features
64
+ maps (filters), the filter size and the pool size can also be adjusted."
65
+ """
66
+ assert all_equal([len(x) for x in [nfs, kss, pool_szs]], np.repeat(len(nfs), 3)), \
67
+ 'nfs, kss, and pool_szs must have the same length'
68
+ assert np.prod(pool_szs) == nfs[-1], \
69
+ 'The number of filters in the last conv layer must be equal to the product of pool sizes'
70
+ assert seq_len % np.prod(pool_szs) == 0, \
71
+ 'The product of pool sizes must be a divisor of the window size'
72
+ layers = []
73
+ for i in range_of(kss):
74
+ layers += [Conv1d(ni=nfs[i-1] if i>0 else c_in, nf=nfs[i], ks=kss[i]),
75
+ nn.MaxPool1d(kernel_size=pool_szs[i])]
76
+ self.downsample = nn.Sequential(*layers)
77
+ self.bottleneck = nn.Sequential(OrderedDict([
78
+ ('flatten', nn.Flatten()),
79
+ ('latent_in', nn.Linear(seq_len, delta)),
80
+ ('latent_out', nn.Linear(delta, seq_len)),
81
+ ('reshape', Reshape(nfs[-1], seq_len // np.prod(pool_szs)))
82
+ ]))
83
+ layers = []
84
+ for i in reversed(range_of(kss)):
85
+ layers += [Conv1d(ni=nfs[i+1] if i != (len(nfs)-1) else nfs[-1],
86
+ nf=nfs[i], ks=kss[i]),
87
+ nn.Upsample(scale_factor=pool_szs[i])]
88
+ layers += [Conv1d(ni=nfs[0], nf=c_in, kernel_size=output_fsz)]
89
+ self.upsample = nn.Sequential(*layers)
90
+
91
+
92
+ def forward(self, x):
93
+ x = self.downsample(x)
94
+ x = self.bottleneck(x)
95
+ x = self.upsample(x)
96
+ return x
97
+
98
+ # %% ../nbs/encoder.ipynb 10
99
+ ENCODER_EMBS_MODULE_NAME = {
100
+ InceptionTimePlus: 'backbone', # for mvp based models
101
+ DCAE_torch: 'bottleneck.latent_in'
102
+ }
103
+
104
+ # %% ../nbs/encoder.ipynb 12
105
+ def get_enc_embs(X, enc_learn, module=None, cpu=False, average_seq_dim=True, to_numpy=True):
106
+ """
107
+ Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
108
+ learner. By default, the embeddings are obtained from the last layer
109
+ before the model head, although any layer can be passed to `model`.
110
+ Input
111
+ - `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
112
+ - `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
113
+ - `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
114
+ """
115
+ print("--> Check CUDA")
116
+ if cpu:
117
+ print("--> Get enc embs CPU")
118
+ enc_learn.dls.cpu()
119
+ enc_learn.cpu()
120
+ else:
121
+ print("--> Ensure empty cache")
122
+ torch.cuda.empty_cache()
123
+ print("--> Use CUDA |Get enc embs GPU ")
124
+ enc_learn.dls.cuda()
125
+ enc_learn.cuda()
126
+ if torch.cuda.is_available():
127
+ print("CUDA está disponible")
128
+ print("Dispositivo CUDA actual: ", torch.cuda.current_device())
129
+ print("Nombre del dispositivo CUDA actual: ", torch.cuda.get_device_name(torch.cuda.current_device()))
130
+
131
+ else:
132
+ print("CUDA no está disponible ")
133
+ print("Use CUDA -->")
134
+ if enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
135
+
136
+ print("--> Set dataset from X (enc_learn does not contain dls)")
137
+ aux_dl = enc_learn.dls.valid.new_dl(X=X)
138
+ aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
139
+ print("--> Get module")
140
+ module = nested_attr(enc_learn.model,ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) if module is None else module
141
+
142
+ print("--> Get enc embs bs: ", aux_dl.bs)
143
+ embs = [
144
+ get_acts_and_grads(
145
+ model=enc_learn.model,
146
+ modules=module,
147
+ x=xb[0],
148
+ cpu=cpu
149
+ )[0]
150
+ for xb in aux_dl
151
+ ]
152
+ print("--> Concat")
153
+ if not cpu:
154
+ total_emb_size = sum([emb.element_size() * emb.nelement() for emb in embs])
155
+ free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()
156
+ if (total_emb_size < free_memory):
157
+ print("Fit in GPU")
158
+ embs=[emb.cuda() for emb in embs]
159
+ else:
160
+ print("Dont fit in GPU --> Go to CPU")
161
+ embs=[emb.cpu() for emb in embs]
162
+ embs = to_concat(embs)
163
+ print("--> reduce")
164
+ if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
165
+ print("--> 2 numpy")
166
+ if to_numpy: embs = embs.numpy() if cpu else embs.cpu().numpy()
167
+ return embs
168
+
169
+ # %% ../nbs/encoder.ipynb 13
170
+ def get_enc_embs_set_stride_set_batch_size(
171
+ X, enc_learn, stride, batch_size, module=None, cpu=False, average_seq_dim=True, to_numpy=True,
172
+ print_flag = False, time_flag=False, chunk_size = 0, check_memory_usage = False
173
+ ):
174
+ """
175
+ Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
176
+ learner. By default, the embeddings are obtained from the last layer
177
+ before the model head, although any layer can be passed to `model`.
178
+ Input
179
+ - `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
180
+ - `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
181
+ - `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
182
+ """
183
+ if time_flag:
184
+ t_start = time.time()
185
+ if print_flag:
186
+ print("--> get_enc_embs_set_stride_set_batch_size")
187
+ if check_memory_usage: gpu_memory_status_()
188
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions")
189
+ #import sys
190
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions | Python version", sys.version)
191
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions | PyTorch version", torch.__version__)
192
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions | CUDA version", torch.version.cuda)
193
+ #print("get_enc_embs_set_stride_set_batch_size | Apply stride & batch size")
194
+
195
+ X = X[::stride]
196
+ enc_learn.dls.bs = batch_size
197
+
198
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Check CUDA | X ~ ", X.shape[0])
199
+ if cpu:
200
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Get enc embs CPU")
201
+ enc_learn.dls.cpu()
202
+ enc_learn.cpu()
203
+ else:
204
+ if torch.cuda.is_available():
205
+ if (print_flag):
206
+ print("get_enc_embs_set_stride_set_batch_size | CUDA device id:", torch.cuda.current_device())
207
+ print("get_enc_embs_set_stride_set_batch_size | CUDA device name: ", torch.cuda.get_device_name(torch.cuda.current_device()))
208
+ print("get_enc_embs_set_stride_set_batch_size | Ensure empty cache & move 2 GPU")
209
+ torch.cuda.empty_cache()
210
+ enc_learn.dls.cuda()
211
+ enc_learn.cuda()
212
+ else:
213
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | No cuda available. Set CPU = true")
214
+ cpu = True
215
+
216
+ if enc_learn.dls.bs is None or enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
217
+
218
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Set dataset from X (enc_learn does not contain dls)")
219
+ aux_dl = enc_learn.dls.valid.new_dl(X=X)
220
+ aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
221
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Get module")
222
+ module = nested_attr(enc_learn.model,ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) if module is None else module
223
+
224
+ if (print_flag):
225
+ #print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | module ", module)
226
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len", len(aux_dl))
227
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.batch_len ", len(next(iter(aux_dl))))
228
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.bs ", aux_dl.bs)
229
+ if (not cpu):
230
+ total = torch.cuda.get_device_properties(device).total_memory
231
+ used = torch.cuda.memory_allocated(torch.cuda.current_device())
232
+ reserved = torch.cuda.memory_reserved(torch.cuda.current_device())
233
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | total_mem ", total)
234
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | used_mem ", used)
235
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | reserved_mem ", reserved)
236
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | available_mem ", total-reserved)
237
+ sys.stdout.flush()
238
+
239
+ if (cpu or ( chunk_size == 0 )):
240
+ embs = [
241
+ get_acts_and_grads(
242
+ model=enc_learn.model,
243
+ modules=module,
244
+ x=xb[0],
245
+ cpu=cpu
246
+ )[0]
247
+ for xb in aux_dl
248
+ ]
249
+ if not cpu: embs=[emb.cpu() for emb in embs]
250
+ else:
251
+ embs = []
252
+ total_chunks=max(1,round(len(X)/chunk_size))
253
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len | " + str(len(X)) + " chunk size: " + str(chunk_size) + " => " + str(total_chunks) + " chunks")
254
+ for i in range(0, total_chunks):
255
+ if print_flag:
256
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | Chunk [ " + str(i) + "/"+str(total_chunks)+"] => " + str(round(i*100/total_chunks)) + "%")
257
+ sys.stdout.flush()
258
+ chunk = [batch for (n, batch) in enumerate(aux_dl) if (chunk_size*i <= n and chunk_size*(i+1) > n) ]
259
+ chunk_embs = [
260
+ get_acts_and_grads(
261
+ model=enc_learn.model,
262
+ modules=module,
263
+ x=xb[0],
264
+ cpu=cpu
265
+ )[0]
266
+ for xb in chunk
267
+ ]
268
+ # Mueve los embeddings del bloque a la CPU
269
+ chunk_embs = [emb.cpu() for emb in chunk_embs]
270
+ embs.extend(chunk_embs)
271
+ torch.cuda.empty_cache()
272
+ if print_flag:
273
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | 100%")
274
+ sys.stdout.flush()
275
+
276
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | concat embeddings")
277
+
278
+ embs = to_concat(embs)
279
+
280
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | Reduce")
281
+
282
+ if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
283
+
284
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | Convert to numpy")
285
+
286
+ if to_numpy:
287
+ if cpu or chunk_size > 0:
288
+ embs = embs.numpy()
289
+ else:
290
+ embs = embs.cpu().numpy()
291
+ torch.cuda.empty_cache()
292
+ if time_flag:
293
+ t = time.time()-t_start
294
+ if print_flag:
295
+ print("get_enc_embs_set_stride_set_batch_size " + str(t) + " seconds -->")
296
+ else:
297
+ print("get_enc_embs_set_stride_set_batch_size " + str(t) + " seconds")
298
+ if check_memory_usage: gpu_memory_status_()
299
+ if print_flag:
300
+ print("get_enc_embs_set_stride_set_batch_size -->")
301
+ return embs
dvats_xai/imports.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from IPython.display import Audio, display, HTML, Javascript, clear_output # from tsai
2
+ import importlib
3
+ import numpy as np
4
+ import time
5
+ import sys
6
+
7
+ ##
8
+ # Constants
9
+ ##
10
+ WANDB_ARTIFACTS_DIR = 'data/wandb_artifacts'
11
+
12
+ # General purpose functions
13
+ def beep(inp=1, duration=.1, n=1):
14
+ rate = 10000
15
+ mult = 1.6 * inp if inp else .08
16
+ wave = np.sin(mult*np.arange(rate*duration))
17
+ for i in range(n):
18
+ display(Audio(wave, rate=10000, autoplay=True))
19
+ time.sleep(duration / .1)
20
+
21
+ def m_reload(package_name):
22
+ for k,v in sys.modules.items():
23
+ if k.startswith(package_name):
24
+ importlib.reload(v)
dvats_xai/load.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/load.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['TSArtifact', 'infer_or_inject_freq']
5
+
6
+ # %% ../nbs/load.ipynb 2
7
+ import pandas as pd
8
+ import numpy as np
9
+ from fastcore.all import *
10
+ import wandb
11
+ from datetime import datetime, timedelta
12
+ from .imports import *
13
+ from .utils import *
14
+ import pickle
15
+ import pyarrow.feather as ft
16
+
17
+ # %% ../nbs/load.ipynb 7
18
+ class TSArtifact(wandb.Artifact):
19
+
20
+ default_storage_path = Path(Path.home()/'data/wandb_artifacts/')
21
+ date_format = '%Y-%m-%d %H:%M:%S' # TODO add milliseconds
22
+ handle_missing_values_techniques = {
23
+ 'linear_interpolation': lambda df : df.interpolate(method='linear', limit_direction='both'),
24
+ 'overall_mean': lambda df : df.fillna(df.mean()),
25
+ 'overall_median': lambda df : df.fillna(df.median()),
26
+ 'backward_fill' : lambda df : df.fillna(method='bfill'),
27
+ 'forward_fill' : lambda df : df.fillna(method='ffill')
28
+ }
29
+
30
+ "Class that represents a wandb artifact containing time series data. sd stands for start_date \
31
+ and ed for end_date. Both should be pd.Timestamps"
32
+
33
+ @delegates(wandb.Artifact.__init__)
34
+ def __init__(self, name, sd:pd.Timestamp, ed:pd.Timestamp, **kwargs):
35
+ super().__init__(type='dataset', name=name, **kwargs)
36
+ self.sd = sd
37
+ self.ed = ed
38
+ if self.metadata is None:
39
+ self.metadata = dict()
40
+ self.metadata['TS'] = dict(sd = self.sd.strftime(self.date_format),
41
+ ed = self.ed.strftime(self.date_format))
42
+
43
+
44
+ @classmethod
45
+ def from_daily_csv_files(cls, root_path, fread=pd.read_csv, start_date=None, end_date=None, metadata=None, **kwargs):
46
+
47
+ "Create a wandb artifact of type `dataset`, containing the CSV files from `start_date` \
48
+ to `end_date`. Dates must be pased as `datetime.datetime` objects. If a `wandb_run` is \
49
+ defined, the created artifact will be logged to that run, using the longwall name as \
50
+ artifact name, and the date range as version."
51
+
52
+ return None
53
+
54
+
55
+ @classmethod
56
+ @delegates(__init__)
57
+ def from_df(cls, df:pd.DataFrame, name:str, path:str=None, sd:pd.Timestamp=None, ed:pd.Timestamp=None,
58
+ normalize:bool=False, missing_values_technique:str=None, resampling_freq:str=None, **kwargs):
59
+
60
+ """
61
+ Create a TSArtifact of type `dataset`, using the DataFrame `df` samples from \
62
+ `sd` (start date) to `ed` (end date). Dates must be passed as `datetime.datetime` \
63
+ objects. The transformed DataFrame is stored as a pickle file in the path `path` \
64
+ and its reference is added to the artifact entries. Additionally, the dataset can \
65
+ be normalized (see `normalize` argument) or transformed using missing values \
66
+ handling techniques (see `missing_values_technique` argument) or resampling (see \
67
+ `resampling_freq` argument).
68
+
69
+ Arguments:
70
+ df: (DataFrame) The dataframe you want to convert into an artifact.
71
+ name: (str) The artifact name.
72
+ path: (str, optional) The path where the file, containing the new transformed \
73
+ dataframe, is saved. Default None.
74
+ sd: (sd, optional) Start date. By default, the first index of `df` is taken.
75
+ ed: (ed, optional) End date. By default, the last index of `df` is taken.
76
+ normalize: (bool, optional) If the dataset values should be normalized. Default\
77
+ False.
78
+ missing_values_technique: (str, optional) The technique used to handle missing \
79
+ values. Options: "linear_iterpolation", "overall_mean", "overall_median" or \
80
+ None. Default None.
81
+ resampling_freq: (str, optional) The offset string or object representing \
82
+ frequency conversion for time series resampling. Default None.
83
+
84
+ Returns:
85
+ TSArtifact object.
86
+ """
87
+ sd = df.index[0] if sd is None else sd
88
+ ed = df.index[-1] if ed is None else ed
89
+ obj = cls(name, sd=sd, ed=ed, **kwargs)
90
+ df = df.query('@obj.sd <= index <= @obj.ed')
91
+ obj.metadata['TS']['created'] = 'from-df'
92
+ obj.metadata['TS']['n_vars'] = df.columns.__len__()
93
+
94
+ # Handle Missing Values
95
+ df = obj.handle_missing_values_techniques[missing_values_technique](df) if missing_values_technique is not None else df
96
+ obj.metadata['TS']['handle_missing_values_technique'] = missing_values_technique.__str__()
97
+ obj.metadata['TS']['has_missing_values'] = np.any(df.isna().values).__str__()
98
+
99
+ # Indexing and Resampling
100
+ if resampling_freq: df = df.resample(resampling_freq).mean()
101
+ obj.metadata['TS']['n_samples'] = len(df)
102
+ obj.metadata['TS']['freq'] = str(df.index.freq)
103
+
104
+ # Time Series Variables
105
+ obj.metadata['TS']['vars'] = list(df.columns)
106
+
107
+ # Normalization - Save the previous means and stds
108
+ if normalize:
109
+ obj.metadata['TS']['normalization'] = dict(means = df.describe().loc['mean'].to_dict(),
110
+ stds = df.describe().loc['std'].to_dict())
111
+ df = normalize_columns(df)
112
+
113
+ # Hash and save
114
+ hash_code = str(pd.util.hash_pandas_object(df).sum()) # str(hash(df.values.tobytes()))
115
+ path = obj.default_storage_path/f'{hash_code}' if path is None else Path(path)/f'{hash_code}'
116
+ print("About to write df to ", path)
117
+ ft.write_feather(df, path, compression = 'lz4')
118
+ #feather.write_dataframe
119
+ obj.metadata['TS']['hash'] = hash_code
120
+ obj.add_file(str(path))
121
+
122
+ return obj
123
+
124
+ # %% ../nbs/load.ipynb 14
125
+ @patch
126
+ def to_df(self:wandb.apis.public.Artifact):
127
+ "Download the files of a saved wandb artifact and process them as a single dataframe. The artifact must \
128
+ come from a call to `run.use_artifact` with a proper wandb run."
129
+ # The way we have to ensure that the argument comes from a TS arfitact is the metadata
130
+ if self.metadata.get('TS') is None:
131
+ print(f'ERROR:{self} does not come from a logged TSArtifact')
132
+ return None
133
+ dir = Path(self.download())
134
+ if self.metadata['TS']['created'] == 'from-df':
135
+ # Call read_pickle with the single file from dir
136
+ #return pd.read_pickle(dir.ls()[0])
137
+ return ft.read_feather(dir.ls()[0])
138
+ else:
139
+ print("ERROR: Only from_df method is allowed yet")
140
+
141
+ # %% ../nbs/load.ipynb 16
142
+ @patch
143
+ def to_tsartifact(self:wandb.apis.public.Artifact):
144
+ "Cast an artifact as a TS artifact. The artifact must have been created from one of the \
145
+ class creation methods of the class `TSArtifact`. This is useful to go back to a TSArtifact \
146
+ after downloading an artifact through the wand API"
147
+ return TSArtifact(name=self.digest, #TODO change this
148
+ sd=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
149
+ ed=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
150
+ description=self.description,
151
+ metadata=self.metadata)
152
+
153
+ # %% ../nbs/load.ipynb 18
154
+ @delegates(pd.to_datetime)
155
+ def infer_or_inject_freq(df, injected_freq='1s', start_date=None, **kwargs):
156
+ """
157
+ Infer index frequency. If there's not a proper time index, create fake timestamps,
158
+ keeping the desired `injected_freq`. If that is None, set a default one of 1 second.
159
+ start_date: the first date of the index (int or string).
160
+ """
161
+ inferred_freq = pd.infer_freq(df.index)
162
+ if inferred_freq == 'N':
163
+ timedelta = pd.to_timedelta(injected_freq)
164
+ df.index = pd.to_datetime(ifnone(start_date, 0), **kwargs) + timedelta*df.index
165
+ df.index.freq = pd.infer_freq(df.index)
166
+ else:
167
+ df.index.freq = inferred_freq
168
+ return df
dvats_xai/utils.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
5
+ 'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function',
6
+ 'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves',
7
+ 'learner_module_leaves_subtables']
8
+
9
+ # %% ../nbs/utils.ipynb 3
10
+ from .imports import *
11
+ from fastcore.all import *
12
+ import wandb
13
+ import pickle
14
+ import pandas as pd
15
+ import numpy as np
16
+ #import tensorflow as tf
17
+ import torch.nn as nn
18
+ from fastai.basics import *
19
+
20
+ # %% ../nbs/utils.ipynb 5
21
+ def generate_TS_df(rows, cols):
22
+ "Generates a dataframe containing a multivariate time series, where each column \
23
+ represents a variable and each row a time point (sample). The timestamp is in the \
24
+ index of the dataframe, and it is created with a even space of 1 second between samples"
25
+ index = np.arange(pd.Timestamp.now(),
26
+ pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
27
+ pd.Timedelta(1, 'seconds'))
28
+ data = np.random.randn(len(index), cols)
29
+ return pd.DataFrame(data, index=index)
30
+
31
+ # %% ../nbs/utils.ipynb 10
32
+ def normalize_columns(df:pd.DataFrame):
33
+ "Normalize columns from `df` to have 0 mean and 1 standard deviation"
34
+ mean = df.mean()
35
+ std = df.std() + 1e-7
36
+ return (df-mean)/std
37
+
38
+ # %% ../nbs/utils.ipynb 16
39
+ def remove_constant_columns(df:pd.DataFrame):
40
+ return df.loc[:, (df != df.iloc[0]).any()]
41
+
42
+ # %% ../nbs/utils.ipynb 21
43
+ class ReferenceArtifact(wandb.Artifact):
44
+ default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
45
+ "This class is meant to create an artifact with a single reference to an object \
46
+ passed as argument in the contructor. The object will be pickled, hashed and stored \
47
+ in a specified folder."
48
+ @delegates(wandb.Artifact.__init__)
49
+ def __init__(self, obj, name, type='object', folder=None, **kwargs):
50
+ super().__init__(type=type, name=name, **kwargs)
51
+ # pickle dumps the object and then hash it
52
+ hash_code = str(hash(pickle.dumps(obj)))
53
+ folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
54
+ with open(f'{folder}/{hash_code}', 'wb') as f:
55
+ pickle.dump(obj, f)
56
+ self.add_reference(f'file://{folder}/{hash_code}')
57
+ if self.metadata is None:
58
+ self.metadata = dict()
59
+ self.metadata['ref'] = dict()
60
+ self.metadata['ref']['hash'] = hash_code
61
+ self.metadata['ref']['type'] = str(obj.__class__)
62
+
63
+ # %% ../nbs/utils.ipynb 24
64
+ @patch
65
+ def to_obj(self:wandb.apis.public.Artifact):
66
+ """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
67
+ come from a call to `run.use_artifact` with a proper wandb run."""
68
+ if self.metadata.get('ref') is None:
69
+ print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
70
+ return None
71
+ original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
72
+ path = original_path if original_path.exists() else Path(self.download()).ls()[0]
73
+ with open(path, 'rb') as f:
74
+ obj = pickle.load(f)
75
+ return obj
76
+
77
+ # %% ../nbs/utils.ipynb 33
78
+ import torch.nn as nn
79
+ class PrintLayer(nn.Module):
80
+ def __init__(self):
81
+ super(PrintLayer, self).__init__()
82
+
83
+ def forward(self, x):
84
+ # Do your print / debug stuff here
85
+ print(x.shape)
86
+ return x
87
+
88
+ # %% ../nbs/utils.ipynb 34
89
+ @patch
90
+ def export_and_get(self:Learner, keep_exported_file=False):
91
+ """
92
+ Export the learner into an auxiliary file, load it and return it back.
93
+ """
94
+ aux_path = Path('aux.pkl')
95
+ self.export(fname='aux.pkl')
96
+ aux_learn = load_learner('aux.pkl')
97
+ if not keep_exported_file: aux_path.unlink()
98
+ return aux_learn
99
+
100
+ # %% ../nbs/utils.ipynb 35
101
+ def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
102
+ """
103
+ Get the artifacts logged in a wandb project.
104
+ Input:
105
+ - `project_path` (str): entity/project_name
106
+ - `type` (str): whether to return only one type of artifacts
107
+ - `name` (str): Leave none to have all artifact names
108
+ - `last_version`: whether to return only the last version of each artifact or not
109
+
110
+ Output: List of artifacts
111
+ """
112
+ public_api = wandb.Api()
113
+ if type is not None:
114
+ types = [public_api.artifact_type(type, project_path)]
115
+ else:
116
+ types = public_api.artifact_types(project_path)
117
+
118
+ res = L()
119
+ for kind in types:
120
+ for collection in kind.collections():
121
+ if name is None or name == collection.name:
122
+ versions = public_api.artifact_versions(
123
+ kind.type,
124
+ "/".join([kind.entity, kind.project, collection.name]),
125
+ per_page=1,
126
+ )
127
+ if last_version: res += next(versions)
128
+ else: res += L(versions)
129
+ return list(res)
130
+
131
+ # %% ../nbs/utils.ipynb 39
132
+ def get_pickle_artifact(filename):
133
+
134
+ with open(filename, "rb") as f:
135
+ df = pickle.load(f)
136
+
137
+ return df
138
+
139
+ # %% ../nbs/utils.ipynb 41
140
+ import pyarrow.feather as ft
141
+ import pickle
142
+
143
+ # %% ../nbs/utils.ipynb 42
144
+ def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs):
145
+ result = None
146
+ if not (path is none):
147
+ if print_flag: print("--> Exec with feather | reading input from ", path)
148
+ input = ft.read_feather(path)
149
+ if print_flag: print("--> Exec with feather | Apply function ", path)
150
+ result = function(input, *args, **kwargs)
151
+ if print_flag: print("Exec with feather --> ", path)
152
+ return result
153
+
154
+ # %% ../nbs/utils.ipynb 43
155
+ def py_function(module_name, function_name, print_flag = False):
156
+ try:
157
+ function = getattr(__import__('__main__'), function_name)
158
+ except:
159
+ module = __import__(module_name, fromlist=[''])
160
+ function = getattr(module, function_name)
161
+ print("py function: ", function_name, ": ", function)
162
+ return function
163
+
164
+ # %% ../nbs/utils.ipynb 46
165
+ import time
166
+ def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
167
+ result = None
168
+ function = py_function(module_name, function_name, print_flag)
169
+ if time_flag: t_start = time.time()
170
+ if not (path is None):
171
+ if print_flag: print("--> Exec with feather | reading input from ", path)
172
+ input = ft.read_feather(path)
173
+ if print_flag: print("--> Exec with feather | Apply function ", path)
174
+ result = function(input, *args, **kwargs)[k_output]
175
+ if time_flag:
176
+ t_end = time.time()
177
+ print("Exec with feather | time: ", t_end-t_start)
178
+ if print_flag: print("Exec with feather --> ", path)
179
+ return result
180
+
181
+ # %% ../nbs/utils.ipynb 48
182
+ def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
183
+ result = None
184
+ function = py_function(module_name, function_name, print_flag)
185
+ if time_flag: t_start = time.time()
186
+ if not (path_input is None):
187
+ if print_flag: print("--> Exec with feather | reading input from ", path_input)
188
+ input = ft.read_feather(path_input)
189
+ if print_flag:
190
+ print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input))
191
+
192
+ result = function(input, *args, **kwargs)[k_output]
193
+ ft.write_feather(df, path, compression = 'lz4')
194
+ if time_flag:
195
+ t_end = time.time()
196
+ print("Exec with feather | time: ", t_end-t_start)
197
+ if print_flag: print("Exec with feather --> ", path_output)
198
+ return path_output
199
+
200
+ # %% ../nbs/utils.ipynb 52
201
+ def learner_module_leaves(learner):
202
+ modules = list(learner.modules())[0] # Obtener el módulo raíz
203
+ rows = []
204
+
205
+ def find_leave_modules(module, path=[]):
206
+ for name, sub_module in module.named_children():
207
+ current_path = path + [f"{type(sub_module).__name__}"]
208
+ if not list(sub_module.children()):
209
+ leave_name = ' -> '.join(current_path)
210
+ leave_params = str(sub_module).strip()
211
+ rows.append([
212
+ leave_name,
213
+ f"{type(sub_module).__name__}",
214
+ name,
215
+ leave_params
216
+ ]
217
+ )
218
+
219
+ find_leave_modules(sub_module, current_path)
220
+
221
+ find_leave_modules(modules)
222
+
223
+ df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module'])
224
+ return df
225
+
226
+ # %% ../nbs/utils.ipynb 56
227
+ def learner_module_leaves_subtables(learner, print_flag = False):
228
+ df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module'])
229
+ md = learner_module_leaves(learner).drop(
230
+ 'Path', axis = 1
231
+ ).sort_values(
232
+ by = 'Module_type'
233
+ )
234
+ if print_flag: print("The layers are of this types:")
235
+
236
+ md_types = pd.DataFrame(md['Module_type'].drop_duplicates())
237
+ if print_flag:
238
+ display(md_types)
239
+ print("And they are called with this parameters:")
240
+
241
+ md_modules = pd.DataFrame(md['Module'].drop_duplicates())
242
+
243
+ if print_flag: display(md_modules)
244
+
245
+ return md_types, md_modules
dvats_xai/visualization.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/visualization.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['plot_TS', 'plot_validation_ts_ae', 'plot_mask']
5
+
6
+ # %% ../nbs/visualization.ipynb 3
7
+ from fastcore.all import *
8
+ import pandas as pd
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import torch
12
+
13
+ # %% ../nbs/visualization.ipynb 6
14
+ @delegates(pd.DataFrame.plot)
15
+ def plot_TS(df:pd.core.frame.DataFrame, **kwargs):
16
+ df.plot(subplots=True, **kwargs)
17
+ plt.show()
18
+
19
+ # %% ../nbs/visualization.ipynb 8
20
+ def plot_validation_ts_ae(prediction:np.array, original:np.array, title_str = "Validation plot", fig_size = (15,15), anchor = (-0.01, 0.89), window_num = 0, return_fig=True, title_pos = 0.9):
21
+ # Create the figure
22
+ fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
23
+ # Create the subplot axes
24
+ axes = fig.subplots(nrows=original.shape[2], ncols=1)
25
+ # We iterate over the sensor data and plot both the original and the prediction
26
+ for i,ax in zip(range(original.shape[2]),fig.axes):
27
+ ax.plot(original[window_num,:,i], label='Original Data')
28
+ ax.plot(prediction[window_num,:,i], label='Prediction')
29
+ # Handle the legend configuration and position
30
+ lines, labels = fig.axes[-1].get_legend_handles_labels()
31
+ fig.legend(lines, labels,loc='upper left', ncol=2)
32
+ # Write the plot title (and position it closer to the top of the graph)
33
+ fig.suptitle(title_str, y = title_pos)
34
+ # Tight results:
35
+ fig.tight_layout()
36
+ # Returns
37
+ if return_fig:
38
+ return fig
39
+ fig
40
+ return None
41
+
42
+ # %% ../nbs/visualization.ipynb 12
43
+ def plot_mask(mask, i=0, fig_size=(10,10), title_str="Mask", return_fig=False):
44
+ """
45
+ Plot the mask passed as argument. The mask is a 3D boolean tensor. The first
46
+ dimension is the window number (or item index), the second is the variable, and the third is the time step.
47
+ Input:
48
+ mask: 3D boolean tensor
49
+ i: index of the window to plot
50
+ fig_size: size of the figure
51
+ title_str: title of the plot
52
+ return_fig: if True, returns the figure
53
+ Output:
54
+ if return_fig is True, returns the figure, otherwise, it does not return anything
55
+ """
56
+ plt.figure(figsize=fig_size)
57
+ plt.pcolormesh(mask[i], cmap='cool')
58
+ plt.title(f'{title_str} {i}, mean: {mask[0].float().mean().item():.3f}')
59
+ if return_fig:
60
+ return plt.gcf()
61
+ else:
62
+ plt.show()
63
+ return None
dvats_xai/xai.py ADDED
File without changes
r_shiny_app/artifacts/toy:v0/2078634713863647172 ADDED
Binary file (19.2 kB). View file
 
r_shiny_app/global.R CHANGED
@@ -32,7 +32,10 @@ torch <- reticulate::import("torch")
32
  #options(shiny.trace = TRUE)
33
  if(torch$cuda$is_available()){
34
  print(paste0("CUDA AVAILABLE. Num devices: ", torch$cuda$device_count()))
35
- torch$cuda$set_device(as.integer(0))
 
 
 
36
  #torch$cuda$set_device(as.integer(1))
37
  #torch$cuda$set_device(as.integer(2))
38
  #print(torch$cuda$memory_summary())
 
32
  #options(shiny.trace = TRUE)
33
  if(torch$cuda$is_available()){
34
  print(paste0("CUDA AVAILABLE. Num devices: ", torch$cuda$device_count()))
35
+ device=as.integer(0)
36
+ print(paste0("Set up device ordinal "), device)
37
+ torch$cuda$set_device(device)
38
+ print(paste0("Device setted up"))
39
  #torch$cuda$set_device(as.integer(1))
40
  #torch$cuda$set_device(as.integer(2))
41
  #print(torch$cuda$memory_summary())