misantamaria commited on
Commit
7399708
·
1 Parent(s): c12b4db

bring dvats & requirements & entrypoint

Browse files
dvats/.ipynb_checkpoints/__init__-checkpoint.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.0.1"
dvats/.ipynb_checkpoints/__init__.py ADDED
File without changes
dvats/.ipynb_checkpoints/_modidx-checkpoint.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'master',
4
+ 'doc_baseurl': '/dvats/',
5
+ 'doc_host': 'https://vrodriguezf.github.io',
6
+ 'git_url': 'https://github.com/vrodriguezf/deepvats',
7
+ 'lib_path': 'dvats'},
8
+ 'syms': { 'dvats.all': {},
9
+ 'dvats.dr': {},
10
+ 'dvats.encoder': {},
11
+ 'dvats.imports': {},
12
+ 'dvats.load': {},
13
+ 'dvats.utils': {},
14
+ 'dvats.visualization': {}}}
dvats/.ipynb_checkpoints/_nbdev-checkpoint.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED BY NBDEV! DO NOT EDIT!
2
+
3
+ __all__ = ["index", "modules", "custom_doc_links", "git_url"]
4
+
5
+ index = {"check_compatibility": "dr.ipynb",
6
+ "get_UMAP_prjs": "dr.ipynb",
7
+ "get_PCA_prjs": "dr.ipynb",
8
+ "get_TSNE_prjs": "dr.ipynb",
9
+ "DCAE_torch": "encoder.ipynb",
10
+ "ENCODER_EMBS_MODULE_NAME": "encoder.ipynb",
11
+ "get_enc_embs": "encoder.ipynb",
12
+ "TSArtifact": "load.ipynb",
13
+ "wandb.apis.public.Artifact.to_df": "load.ipynb",
14
+ "wandb.apis.public.Artifact.to_tsartifact": "load.ipynb",
15
+ "infer_or_inject_freq": "load.ipynb",
16
+ "generate_TS_df": "utils.ipynb",
17
+ "normalize_columns": "utils.ipynb",
18
+ "remove_constant_columns": "utils.ipynb",
19
+ "ReferenceArtifact": "utils.ipynb",
20
+ "wandb.apis.public.Artifact.to_obj": "utils.ipynb",
21
+ "PrintLayer": "utils.ipynb",
22
+ "Learner.export_and_get": "utils.ipynb",
23
+ "get_wandb_artifacts": "utils.ipynb",
24
+ "get_pickle_artifact": "utils.ipynb",
25
+ "plot_TS": "visualization.ipynb",
26
+ "plot_validation_ts_ae": "visualization.ipynb",
27
+ "plot_mask": "visualization.ipynb"}
28
+
29
+ modules = ["dr.py",
30
+ "encoder.py",
31
+ "load.py",
32
+ "utils.py",
33
+ "visualization.py"]
34
+
35
+ doc_url = "https://vrodriguezf.github.io/tchub/"
36
+
37
+ git_url = "https://gitlab.geist.re/pml/x_timecluster_extension/tree/master/"
38
+
39
+ def custom_doc_links(name): return None
dvats/.ipynb_checkpoints/all-checkpoint.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import dvats
2
+ from .imports import *
3
+ from .load import *
4
+ from .utils import *
5
+ from .dr import *
6
+ from .encoder import *
7
+ from .visualization import *
8
+ from .xai import *
dvats/.ipynb_checkpoints/dr-checkpoint.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
5
+ 'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
6
+
7
+ # %% ../nbs/dr.ipynb 2
8
+ import subprocess
9
+ def get_gpu_memory(device = 0):
10
+ total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
11
+ total_memory = int(total_memory.decode().split('\n')[0])
12
+ used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
13
+ used_memory = int(used_memory.decode().split('\n')[0])
14
+
15
+ percentage = round((used_memory / total_memory) * 100)
16
+ return used_memory, total_memory, percentage
17
+
18
+ def color_for_percentage(percentage):
19
+ if percentage < 20:
20
+ return "\033[90m" # Gray
21
+ elif percentage < 40:
22
+ return "\033[94m" # Blue
23
+ elif percentage < 60:
24
+ return "\033[92m" # Green
25
+ elif percentage < 80:
26
+ return "\033[93m" # Orange
27
+ else:
28
+ return "\033[91m" # Red
29
+
30
+ def create_bar(percentage, color_code, length=20):
31
+ filled_length = int(length * percentage // 100)
32
+ bar = "█" * filled_length + "-" * (length - filled_length)
33
+ return color_code + bar + "\033[0m" # Apply color and reset after bar
34
+
35
+ def gpu_memory_status(device=0):
36
+ used, total, percentage = get_gpu_memory(device)
37
+ color_code = color_for_percentage(percentage)
38
+ bar = create_bar(percentage, color_code)
39
+ print(f"Used mem: {used}")
40
+ print(f"Used mem: {total}")
41
+ print(f"Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
42
+
43
+ # %% ../nbs/dr.ipynb 4
44
+ import umap
45
+ import cudf
46
+ import cuml
47
+ import pandas as pd
48
+ import numpy as np
49
+ from fastcore.all import *
50
+ from .imports import *
51
+ from .load import TSArtifact
52
+
53
+ # %% ../nbs/dr.ipynb 5
54
+ def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
55
+ "Function to check that the artifact used by the encoder model and the artifact that is \
56
+ going to be passed through the DR are compatible"
57
+ try:
58
+ # Check that both artifacts have the same variables
59
+ chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
60
+ # Check that both artifacts have the same freq
61
+ chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
62
+ # Check that the dr artifact is not normalized (not normalized data has not the key normalization)
63
+ chk_norm = dr_ar.metadata['TS'].get('normalization') is None
64
+ # Check that the dr artifact has not missing values
65
+ chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
66
+ # Check all logical vars.
67
+ if chk_vars and chk_freq and chk_norm and chk_miss:
68
+ print("Artifacts are compatible.")
69
+ else:
70
+ raise Exception
71
+ except Exception as e:
72
+ print("Artifacts are not compatible.")
73
+ raise e
74
+ return None
75
+
76
+ # %% ../nbs/dr.ipynb 7
77
+ #Comment this part after 4_seconds debugged
78
+ import hashlib
79
+
80
+ # %% ../nbs/dr.ipynb 8
81
+ import warnings
82
+ import sys
83
+ from numba.core.errors import NumbaPerformanceWarning
84
+ @delegates(cuml.UMAP)
85
+ def get_UMAP_prjs(
86
+ input_data,
87
+ cpu=True,
88
+ print_flag = False,
89
+ check_memory_usage = True,
90
+ **kwargs
91
+ ):
92
+ "Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
93
+ if print_flag:
94
+ print("--> get_UMAP_prjs")
95
+ print("kwargs: ", kwargs)
96
+ sys.stdout.flush()
97
+ ####
98
+ checksum = hashlib.md5(input_data.tobytes()).hexdigest()
99
+ print(checksum)
100
+ ####
101
+
102
+ if check_memory_usage: gpu_memory_status()
103
+
104
+ warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
105
+
106
+ #reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
107
+ if cpu:
108
+ print("-- umap.UMAP --", cpu)
109
+ sys.stdout.flush()
110
+ reducer = umap.UMAP(**kwargs)
111
+ else:
112
+ print("-- cuml.UMAP --", cpu)
113
+ sys.stdout.flush()
114
+ if 'random_state' in kwargs:
115
+ kwargs['random_state'] = np.uint64(kwargs['random_state'])
116
+ reducer = cuml.UMAP(**kwargs)
117
+
118
+ if print_flag:
119
+ print("------- reducer --------")
120
+ print(reducer)
121
+ print(reducer.get_params())
122
+ print("------- reducer --------")
123
+ sys.stdout.flush()
124
+
125
+ projections = reducer.fit_transform(input_data)
126
+
127
+ if check_memory_usage: gpu_memory_status()
128
+ if print_flag:
129
+ checksum = hashlib.md5(projections.tobytes()).hexdigest()
130
+ print("prjs checksum ", checksum)
131
+ print("get_UMAP_prjs -->")
132
+ sys.stdout.flush()
133
+ return projections
134
+
135
+ # %% ../nbs/dr.ipynb 13
136
+ @delegates(cuml.PCA)
137
+ def get_PCA_prjs(X, cpu=False, **kwargs):
138
+ r"""
139
+ Computes PCA projections of X
140
+ """
141
+ if cpu:
142
+ raise NotImplementedError
143
+ else:
144
+ reducer = cuml.PCA(**kwargs)
145
+ projections = reducer.fit_transform(X)
146
+ return projections
147
+
148
+ # %% ../nbs/dr.ipynb 15
149
+ @delegates(cuml.TSNE)
150
+ def get_TSNE_prjs(X, cpu=False, **kwargs):
151
+ r"""
152
+ Computes TSNE projections of X
153
+ """
154
+ if cpu:
155
+ raise NotImplementedError
156
+ else:
157
+ reducer = cuml.TSNE(**kwargs)
158
+ projections = reducer.fit_transform(X)
159
+ return projections
160
+
161
+ # %% ../nbs/dr.ipynb 18
162
+ from sklearn.metrics import silhouette_score
163
+ def cluster_score(prjs, clusters_labels, print_flag):
164
+ score = silhouette_score(prjs, clusters_labels)
165
+ if print_flag: print("Silhouette_score:", score)
166
+ return score
dvats/.ipynb_checkpoints/encoder-checkpoint.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """encoder.ipynb
3
+
4
+ Automatically generated.
5
+
6
+ Original file is located at:
7
+ /home/macu/work/nbs/encoder.ipynb
8
+ """
9
+
10
+ #default_exp encoder
11
+
12
+ #hide
13
+ %load_ext autoreload
14
+ %autoreload 2
15
+
16
+ #export
17
+ import pandas as pd
18
+ import numpy as np
19
+ from fastcore.all import *
20
+ from tsai.callback.MVP import *
21
+ from tsai.imports import *
22
+ from tsai.models.InceptionTimePlus import InceptionTimePlus
23
+ from tsai.models.explainability import get_acts_and_grads
24
+ from tsai.models.layers import *
25
+ from tsai.data.validation import combine_split_data
26
+
27
+ #hide
28
+ from tsai.all import *
29
+
30
+ #export
31
+ class DCAE_torch(Module):
32
+ def __init__(self, c_in, seq_len, delta, nfs=[64, 32, 12], kss=[10, 5, 5],
33
+ pool_szs=[2,2,3], output_fsz=10):
34
+ """
35
+ Create a Deep Convolutional Autoencoder for multivariate time series of `d` dimensions,
36
+ sliced with a window size of `w`. The parameter `delta` sets the number of latent features that will be
37
+ contained in the Dense layer of the network. The the number of features
38
+ maps (filters), the filter size and the pool size can also be adjusted."
39
+ """
40
+ assert all_equal([len(x) for x in [nfs, kss, pool_szs]], np.repeat(len(nfs), 3)), \
41
+ 'nfs, kss, and pool_szs must have the same length'
42
+ assert np.prod(pool_szs) == nfs[-1], \
43
+ 'The number of filters in the last conv layer must be equal to the product of pool sizes'
44
+ assert seq_len % np.prod(pool_szs) == 0, \
45
+ 'The product of pool sizes must be a divisor of the window size'
46
+ layers = []
47
+ for i in range_of(kss):
48
+ layers += [Conv1d(ni=nfs[i-1] if i>0 else c_in, nf=nfs[i], ks=kss[i]),
49
+ nn.MaxPool1d(kernel_size=pool_szs[i])]
50
+ self.downsample = nn.Sequential(*layers)
51
+ self.bottleneck = nn.Sequential(OrderedDict([
52
+ ('flatten', nn.Flatten()),
53
+ ('latent_in', nn.Linear(seq_len, delta)),
54
+ ('latent_out', nn.Linear(delta, seq_len)),
55
+ ('reshape', Reshape(nfs[-1], seq_len // np.prod(pool_szs)))
56
+ ]))
57
+ layers = []
58
+ for i in reversed(range_of(kss)):
59
+ layers += [Conv1d(ni=nfs[i+1] if i != (len(nfs)-1) else nfs[-1],
60
+ nf=nfs[i], ks=kss[i]),
61
+ nn.Upsample(scale_factor=pool_szs[i])]
62
+ layers += [Conv1d(ni=nfs[0], nf=c_in, kernel_size=output_fsz)]
63
+ self.upsample = nn.Sequential(*layers)
64
+
65
+ def forward(self, x):
66
+ x = self.downsample(x)
67
+ x = self.bottleneck(x)
68
+ x = self.upsample(x)
69
+ return x
70
+
71
+ #hide
72
+ foo = torch.rand(3, 1, 48)
73
+ m = DCAE_torch(c_in=foo.shape[1], seq_len=foo.shape[2], delta=12)
74
+ m(foo).shape
75
+
76
+ #export
77
+ ENCODER_EMBS_MODULE_NAME = {
78
+ InceptionTimePlus: 'backbone', # for mvp based models
79
+ DCAE_torch: 'bottleneck.latent_in'
80
+ }
81
+
82
+ #export
83
+ def get_enc_embs(X, enc_learn, module=None, cpu=False, average_seq_dim=True, to_numpy=True):
84
+ """
85
+ Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
86
+ learner. By default, the embeddings are obtained from the last layer
87
+ before the model head, although any layer can be passed to `model`.
88
+ Input
89
+ - `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
90
+ - `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
91
+ - `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
92
+ """
93
+ if cpu:
94
+ print("--> Get enc embs CPU")
95
+ enc_learn.dls.cpu()
96
+ enc_learn.cpu()
97
+ else:
98
+ print("--> Use CUDA |Get enc embs GPU")
99
+ enc_learn.dls.cuda()
100
+ enc_learn.cuda()
101
+ print("devices: ", enc_learn.dls.device, enc_learn.model.device)
102
+ print("Use CUDA -->")
103
+ if enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
104
+ print("--> Get enc embs bs: ", enc_learn.dls.bs)
105
+ aux_dl = enc_learn.dls.valid.new_dl(X=X)
106
+ aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
107
+ module = nested_attr(enc_learn.model,
108
+ ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) \
109
+ if module is None else module
110
+ embs = [get_acts_and_grads(model=enc_learn.model,
111
+ modules=module,
112
+ x=xb[0], cpu=cpu)[0] for xb in aux_dl]
113
+ embs = to_concat(embs)
114
+ if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
115
+ if to_numpy: embs = embs.numpy() if cpu else embs.cpu().numpy()
116
+ return embs
117
+
118
+ #hide
119
+ import wandb
120
+ from dvats.utils import *
121
+ wandb_api = wandb.Api()
122
+ enc_artifact = wandb_api.artifact('deepvats/mvp:latest')
123
+ enc_learner = enc_artifact.to_obj()
124
+ X = torch.rand(9, 1, 48)
125
+
126
+ #hide
127
+ #slow
128
+ #%%time
129
+ embs = get_enc_embs(X, enc_learner, cpu=True)
130
+ test_eq(embs.shape[0], X.shape[0])
131
+ embs.shape, embs.__class__
132
+
133
+ #hide
134
+ %%time
135
+ embs = get_enc_embs(X, enc_learner, cpu=False, to_numpy=False)
136
+ test_eq(embs.shape[0], X.shape[0])
137
+ embs.shape, embs.__class__, embs.device
138
+
139
+ #hide
140
+ %%time
141
+ embs = get_enc_embs(X, enc_learner, cpu=False, to_numpy=True)
142
+ test_eq(embs.shape[0], X.shape[0])
143
+ embs.shape, embs.__class__
144
+
145
+ #hide
146
+
147
+ #from nbdev.export import notebook2script
148
+
149
+ #notebook2script()
150
+
151
+ #from tsai import nb2py
152
+ #nb2py
153
+ #beep(1)
dvats/.ipynb_checkpoints/imports-checkpoint.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from IPython.display import Audio, display, HTML, Javascript, clear_output # from tsai
2
+ import importlib
3
+ import numpy as np
4
+ import time
5
+ import sys
6
+
7
+ ##
8
+ # Constants
9
+ ##
10
+ WANDB_ARTIFACTS_DIR = 'data/wandb_artifacts'
11
+
12
+ # General purpose functions
13
+ def beep(inp=1, duration=.1, n=1):
14
+ rate = 10000
15
+ mult = 1.6 * inp if inp else .08
16
+ wave = np.sin(mult*np.arange(rate*duration))
17
+ for i in range(n):
18
+ display(Audio(wave, rate=10000, autoplay=True))
19
+ time.sleep(duration / .1)
20
+
21
+ def m_reload(package_name):
22
+ for k,v in sys.modules.items():
23
+ if k.startswith(package_name):
24
+ importlib.reload(v)
dvats/.ipynb_checkpoints/load-checkpoint.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/load.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['TSArtifact', 'infer_or_inject_freq']
5
+
6
+ # %% ../nbs/load.ipynb 2
7
+ import pandas as pd
8
+ import numpy as np
9
+ from fastcore.all import *
10
+ import wandb
11
+ from datetime import datetime, timedelta
12
+ from .imports import *
13
+ from .utils import *
14
+ import pickle
15
+ import pyarrow.feather as ft
16
+
17
+ # %% ../nbs/load.ipynb 7
18
+ class TSArtifact(wandb.Artifact):
19
+
20
+ default_storage_path = Path(Path.home()/'data/wandb_artifacts/')
21
+ date_format = '%Y-%m-%d %H:%M:%S' # TODO add milliseconds
22
+ handle_missing_values_techniques = {
23
+ 'linear_interpolation': lambda df : df.interpolate(method='linear', limit_direction='both'),
24
+ 'overall_mean': lambda df : df.fillna(df.mean()),
25
+ 'overall_median': lambda df : df.fillna(df.median()),
26
+ 'backward_fill' : lambda df : df.fillna(method='bfill'),
27
+ 'forward_fill' : lambda df : df.fillna(method='ffill')
28
+ }
29
+
30
+ "Class that represents a wandb artifact containing time series data. sd stands for start_date \
31
+ and ed for end_date. Both should be pd.Timestamps"
32
+
33
+ @delegates(wandb.Artifact.__init__)
34
+ def __init__(self, name, sd:pd.Timestamp, ed:pd.Timestamp, **kwargs):
35
+ super().__init__(type='dataset', name=name, **kwargs)
36
+ self.sd = sd
37
+ self.ed = ed
38
+ if self.metadata is None:
39
+ self.metadata = dict()
40
+ self.metadata['TS'] = dict(sd = self.sd.strftime(self.date_format),
41
+ ed = self.ed.strftime(self.date_format))
42
+
43
+
44
+ @classmethod
45
+ def from_daily_csv_files(cls, root_path, fread=pd.read_csv, start_date=None, end_date=None, metadata=None, **kwargs):
46
+
47
+ "Create a wandb artifact of type `dataset`, containing the CSV files from `start_date` \
48
+ to `end_date`. Dates must be pased as `datetime.datetime` objects. If a `wandb_run` is \
49
+ defined, the created artifact will be logged to that run, using the longwall name as \
50
+ artifact name, and the date range as version."
51
+
52
+ return None
53
+
54
+
55
+ @classmethod
56
+ @delegates(__init__)
57
+ def from_df(cls, df:pd.DataFrame, name:str, path:str=None, sd:pd.Timestamp=None, ed:pd.Timestamp=None,
58
+ normalize:bool=False, missing_values_technique:str=None, resampling_freq:str=None, **kwargs):
59
+
60
+ """
61
+ Create a TSArtifact of type `dataset`, using the DataFrame `df` samples from \
62
+ `sd` (start date) to `ed` (end date). Dates must be passed as `datetime.datetime` \
63
+ objects. The transformed DataFrame is stored as a pickle file in the path `path` \
64
+ and its reference is added to the artifact entries. Additionally, the dataset can \
65
+ be normalized (see `normalize` argument) or transformed using missing values \
66
+ handling techniques (see `missing_values_technique` argument) or resampling (see \
67
+ `resampling_freq` argument).
68
+
69
+ Arguments:
70
+ df: (DataFrame) The dataframe you want to convert into an artifact.
71
+ name: (str) The artifact name.
72
+ path: (str, optional) The path where the file, containing the new transformed \
73
+ dataframe, is saved. Default None.
74
+ sd: (sd, optional) Start date. By default, the first index of `df` is taken.
75
+ ed: (ed, optional) End date. By default, the last index of `df` is taken.
76
+ normalize: (bool, optional) If the dataset values should be normalized. Default\
77
+ False.
78
+ missing_values_technique: (str, optional) The technique used to handle missing \
79
+ values. Options: "linear_iterpolation", "overall_mean", "overall_median" or \
80
+ None. Default None.
81
+ resampling_freq: (str, optional) The offset string or object representing \
82
+ frequency conversion for time series resampling. Default None.
83
+
84
+ Returns:
85
+ TSArtifact object.
86
+ """
87
+ sd = df.index[0] if sd is None else sd
88
+ ed = df.index[-1] if ed is None else ed
89
+ obj = cls(name, sd=sd, ed=ed, **kwargs)
90
+ df = df.query('@obj.sd <= index <= @obj.ed')
91
+ obj.metadata['TS']['created'] = 'from-df'
92
+ obj.metadata['TS']['n_vars'] = df.columns.__len__()
93
+
94
+ # Handle Missing Values
95
+ df = obj.handle_missing_values_techniques[missing_values_technique](df) if missing_values_technique is not None else df
96
+ obj.metadata['TS']['handle_missing_values_technique'] = missing_values_technique.__str__()
97
+ obj.metadata['TS']['has_missing_values'] = np.any(df.isna().values).__str__()
98
+
99
+ # Indexing and Resampling
100
+ if resampling_freq: df = df.resample(resampling_freq).mean()
101
+ obj.metadata['TS']['n_samples'] = len(df)
102
+ obj.metadata['TS']['freq'] = str(df.index.freq)
103
+
104
+ # Time Series Variables
105
+ obj.metadata['TS']['vars'] = list(df.columns)
106
+
107
+ # Normalization - Save the previous means and stds
108
+ if normalize:
109
+ obj.metadata['TS']['normalization'] = dict(means = df.describe().loc['mean'].to_dict(),
110
+ stds = df.describe().loc['std'].to_dict())
111
+ df = normalize_columns(df)
112
+
113
+ # Hash and save
114
+ hash_code = str(pd.util.hash_pandas_object(df).sum()) # str(hash(df.values.tobytes()))
115
+ path = obj.default_storage_path/f'{hash_code}' if path is None else Path(path)/f'{hash_code}.feather'
116
+ ft.write_feather(df, path)
117
+ obj.metadata['TS']['hash'] = hash_code
118
+ obj.add_file(str(path))
119
+
120
+ return obj
121
+
122
+ # %% ../nbs/load.ipynb 11
123
+ @patch
124
+ def to_df(self:wandb.apis.public.Artifact):
125
+ "Download the files of a saved wandb artifact and process them as a single dataframe. The artifact must \
126
+ come from a call to `run.use_artifact` with a proper wandb run."
127
+ # The way we have to ensure that the argument comes from a TS arfitact is the metadata
128
+ if self.metadata.get('TS') is None:
129
+ print(f'ERROR:{self} does not come from a logged TSArtifact')
130
+ return None
131
+ dir = Path(self.download())
132
+ if self.metadata['TS']['created'] == 'from-df':
133
+ # Call read_pickle with the single file from dir
134
+ #return pd.read_pickle(dir.ls()[0])
135
+ return ft.read_feather(dir.ls()[0])
136
+ else:
137
+ print("ERROR: Only from_df method is allowed yet")
138
+
139
+ # %% ../nbs/load.ipynb 13
140
+ @patch
141
+ def to_tsartifact(self:wandb.apis.public.Artifact):
142
+ "Cast an artifact as a TS artifact. The artifact must have been created from one of the \
143
+ class creation methods of the class `TSArtifact`. This is useful to go back to a TSArtifact \
144
+ after downloading an artifact through the wand API"
145
+ return TSArtifact(name=self.digest, #TODO change this
146
+ sd=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
147
+ ed=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
148
+ description=self.description,
149
+ metadata=self.metadata)
150
+
151
+ # %% ../nbs/load.ipynb 15
152
+ @delegates(pd.to_datetime)
153
+ def infer_or_inject_freq(df, injected_freq='1s', start_date=None, **kwargs):
154
+ """
155
+ Infer index frequency. If there's not a proper time index, create fake timestamps,
156
+ keeping the desired `injected_freq`. If that is None, set a default one of 1 second.
157
+ start_date: the first date of the index (int or string).
158
+ """
159
+ inferred_freq = pd.infer_freq(df.index)
160
+ if inferred_freq == 'N':
161
+ timedelta = pd.to_timedelta(injected_freq)
162
+ df.index = pd.to_datetime(ifnone(start_date, 0), **kwargs) + timedelta*df.index
163
+ df.index.freq = pd.infer_freq(df.index)
164
+ else:
165
+ df.index.freq = inferred_freq
166
+ return df
dvats/.ipynb_checkpoints/utils-checkpoint.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/utils.ipynb (unless otherwise specified).
2
+
3
+ __all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
4
+ 'get_wandb_artifacts', 'get_pickle_artifact']
5
+
6
+ # Cell
7
+ from .imports import *
8
+ from fastcore.all import *
9
+ import wandb
10
+ import pickle
11
+ import pandas as pd
12
+ import numpy as np
13
+ #import tensorflow as tf
14
+ import torch.nn as nn
15
+ from fastai.basics import *
16
+
17
+ # Cell
18
+ def generate_TS_df(rows, cols):
19
+ "Generates a dataframe containing a multivariate time series, where each column \
20
+ represents a variable and each row a time point (sample). The timestamp is in the \
21
+ index of the dataframe, and it is created with a even space of 1 second between samples"
22
+ index = np.arange(pd.Timestamp.now(),
23
+ pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
24
+ pd.Timedelta(1, 'seconds'))
25
+ data = np.random.randn(len(index), cols)
26
+ return pd.DataFrame(data, index=index)
27
+
28
+ # Cell
29
+ def normalize_columns(df:pd.DataFrame):
30
+ "Normalize columns from `df` to have 0 mean and 1 standard deviation"
31
+ mean = df.mean()
32
+ std = df.std() + 1e-7
33
+ return (df-mean)/std
34
+
35
+ # Cell
36
+ def remove_constant_columns(df:pd.DataFrame):
37
+ return df.loc[:, (df != df.iloc[0]).any()]
38
+
39
+ # Cell
40
+ class ReferenceArtifact(wandb.Artifact):
41
+ default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
42
+ "This class is meant to create an artifact with a single reference to an object \
43
+ passed as argument in the contructor. The object will be pickled, hashed and stored \
44
+ in a specified folder."
45
+ @delegates(wandb.Artifact.__init__)
46
+ def __init__(self, obj, name, type='object', folder=None, **kwargs):
47
+ super().__init__(type=type, name=name, **kwargs)
48
+ # pickle dumps the object and then hash it
49
+ hash_code = str(hash(pickle.dumps(obj)))
50
+ folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
51
+ with open(f'{folder}/{hash_code}', 'wb') as f:
52
+ pickle.dump(obj, f)
53
+ self.add_reference(f'file://{folder}/{hash_code}')
54
+ if self.metadata is None:
55
+ self.metadata = dict()
56
+ self.metadata['ref'] = dict()
57
+ self.metadata['ref']['hash'] = hash_code
58
+ self.metadata['ref']['type'] = str(obj.__class__)
59
+
60
+ # Cell
61
+ @patch
62
+ def to_obj(self:wandb.apis.public.Artifact):
63
+ """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
64
+ come from a call to `run.use_artifact` with a proper wandb run."""
65
+ if self.metadata.get('ref') is None:
66
+ print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
67
+ return None
68
+ original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
69
+ path = original_path if original_path.exists() else Path(self.download()).ls()[0]
70
+ with open(path, 'rb') as f:
71
+ obj = pickle.load(f)
72
+ return obj
73
+
74
+ # Cell
75
+ import torch.nn as nn
76
+ class PrintLayer(nn.Module):
77
+ def __init__(self):
78
+ super(PrintLayer, self).__init__()
79
+
80
+ def forward(self, x):
81
+ # Do your print / debug stuff here
82
+ print(x.shape)
83
+ return x
84
+
85
+ # Cell
86
+ @patch
87
+ def export_and_get(self:Learner, keep_exported_file=False):
88
+ """
89
+ Export the learner into an auxiliary file, load it and return it back.
90
+ """
91
+ aux_path = Path('aux.pkl')
92
+ self.export(fname='aux.pkl')
93
+ aux_learn = load_learner('aux.pkl')
94
+ if not keep_exported_file: aux_path.unlink()
95
+ return aux_learn
96
+
97
+ # Cell
98
+ def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
99
+ """
100
+ Get the artifacts logged in a wandb project.
101
+ Input:
102
+ - `project_path` (str): entity/project_name
103
+ - `type` (str): whether to return only one type of artifacts
104
+ - `name` (str): Leave none to have all artifact names
105
+ - `last_version`: whether to return only the last version of each artifact or not
106
+
107
+ Output: List of artifacts
108
+ """
109
+ public_api = wandb.Api()
110
+ if type is not None:
111
+ types = [public_api.artifact_type(type, project_path)]
112
+ else:
113
+ types = public_api.artifact_types(project_path)
114
+
115
+ res = L()
116
+ for kind in types:
117
+ for collection in kind.collections():
118
+ if name is None or name == collection.name:
119
+ versions = public_api.artifact_versions(
120
+ kind.type,
121
+ "/".join([kind.entity, kind.project, collection.name]),
122
+ per_page=1,
123
+ )
124
+ if last_version: res += next(versions)
125
+ else: res += L(versions)
126
+ return list(res)
127
+
128
+ # Cell
129
+ def get_pickle_artifact(filename):
130
+
131
+ with open(filename, "rb") as f:
132
+ df = pickle.load(f)
133
+
134
+ return df
dvats/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.0.1"
dvats/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (147 Bytes). View file
 
dvats/__pycache__/all.cpython-310.pyc ADDED
Binary file (273 Bytes). View file
 
dvats/__pycache__/dr.cpython-310.pyc ADDED
Binary file (4.12 kB). View file
 
dvats/__pycache__/encoder.cpython-310.pyc ADDED
Binary file (10.5 kB). View file
 
dvats/__pycache__/imports.cpython-310.pyc ADDED
Binary file (940 Bytes). View file
 
dvats/__pycache__/load.cpython-310.pyc ADDED
Binary file (7.04 kB). View file
 
dvats/__pycache__/utils.cpython-310.pyc ADDED
Binary file (7.84 kB). View file
 
dvats/__pycache__/visualization.cpython-310.pyc ADDED
Binary file (2.22 kB). View file
 
dvats/__pycache__/xai.cpython-310.pyc ADDED
Binary file (26.1 kB). View file
 
dvats/_modidx.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Autogenerated by nbdev
2
+
3
+ d = { 'settings': { 'branch': 'master',
4
+ 'doc_baseurl': '/dvats/',
5
+ 'doc_host': 'https://vrodriguezf.github.io',
6
+ 'git_url': 'https://github.com/vrodriguezf/deepvats',
7
+ 'lib_path': 'dvats'},
8
+ 'syms': { 'dvats.all': {},
9
+ 'dvats.dr': { 'dvats.dr.check_compatibility': ('dr.html#check_compatibility', 'dvats/dr.py'),
10
+ 'dvats.dr.cluster_score': ('dr.html#cluster_score', 'dvats/dr.py'),
11
+ 'dvats.dr.color_for_percentage': ('dr.html#color_for_percentage', 'dvats/dr.py'),
12
+ 'dvats.dr.create_bar': ('dr.html#create_bar', 'dvats/dr.py'),
13
+ 'dvats.dr.get_PCA_prjs': ('dr.html#get_pca_prjs', 'dvats/dr.py'),
14
+ 'dvats.dr.get_TSNE_prjs': ('dr.html#get_tsne_prjs', 'dvats/dr.py'),
15
+ 'dvats.dr.get_UMAP_prjs': ('dr.html#get_umap_prjs', 'dvats/dr.py'),
16
+ 'dvats.dr.get_gpu_memory': ('dr.html#get_gpu_memory', 'dvats/dr.py'),
17
+ 'dvats.dr.gpu_memory_status': ('dr.html#gpu_memory_status', 'dvats/dr.py')},
18
+ 'dvats.encoder': { 'dvats.encoder.DCAE_torch': ('encoder.html#dcae_torch', 'dvats/encoder.py'),
19
+ 'dvats.encoder.DCAE_torch.__init__': ('encoder.html#__init__', 'dvats/encoder.py'),
20
+ 'dvats.encoder.DCAE_torch.forward': ('encoder.html#forward', 'dvats/encoder.py'),
21
+ 'dvats.encoder.color_for_percentage': ('encoder.html#color_for_percentage', 'dvats/encoder.py'),
22
+ 'dvats.encoder.create_bar': ('encoder.html#create_bar', 'dvats/encoder.py'),
23
+ 'dvats.encoder.get_enc_embs': ('encoder.html#get_enc_embs', 'dvats/encoder.py'),
24
+ 'dvats.encoder.get_enc_embs_set_stride_set_batch_size': ( 'encoder.html#get_enc_embs_set_stride_set_batch_size',
25
+ 'dvats/encoder.py'),
26
+ 'dvats.encoder.get_gpu_memory_': ('encoder.html#get_gpu_memory_', 'dvats/encoder.py'),
27
+ 'dvats.encoder.gpu_memory_status_': ('encoder.html#gpu_memory_status_', 'dvats/encoder.py')},
28
+ 'dvats.imports': {},
29
+ 'dvats.load': { 'dvats.load.TSArtifact': ('load.html#tsartifact', 'dvats/load.py'),
30
+ 'dvats.load.TSArtifact.__init__': ('load.html#__init__', 'dvats/load.py'),
31
+ 'dvats.load.TSArtifact.from_daily_csv_files': ('load.html#from_daily_csv_files', 'dvats/load.py'),
32
+ 'dvats.load.TSArtifact.from_df': ('load.html#from_df', 'dvats/load.py'),
33
+ 'dvats.load.infer_or_inject_freq': ('load.html#infer_or_inject_freq', 'dvats/load.py'),
34
+ 'dvats.load.wandb.apis.public.Artifact.to_df': ('load.html#wandb.apis.public.artifact.to_df', 'dvats/load.py'),
35
+ 'dvats.load.wandb.apis.public.Artifact.to_tsartifact': ( 'load.html#wandb.apis.public.artifact.to_tsartifact',
36
+ 'dvats/load.py')},
37
+ 'dvats.utils': { 'dvats.utils.Learner.export_and_get': ('utils.html#learner.export_and_get', 'dvats/utils.py'),
38
+ 'dvats.utils.PrintLayer': ('utils.html#printlayer', 'dvats/utils.py'),
39
+ 'dvats.utils.PrintLayer.__init__': ('utils.html#__init__', 'dvats/utils.py'),
40
+ 'dvats.utils.PrintLayer.forward': ('utils.html#forward', 'dvats/utils.py'),
41
+ 'dvats.utils.ReferenceArtifact': ('utils.html#referenceartifact', 'dvats/utils.py'),
42
+ 'dvats.utils.ReferenceArtifact.__init__': ('utils.html#__init__', 'dvats/utils.py'),
43
+ 'dvats.utils.exec_with_and_feather_k_output': ('utils.html#exec_with_and_feather_k_output', 'dvats/utils.py'),
44
+ 'dvats.utils.exec_with_feather': ('utils.html#exec_with_feather', 'dvats/utils.py'),
45
+ 'dvats.utils.exec_with_feather_k_output': ('utils.html#exec_with_feather_k_output', 'dvats/utils.py'),
46
+ 'dvats.utils.generate_TS_df': ('utils.html#generate_ts_df', 'dvats/utils.py'),
47
+ 'dvats.utils.get_pickle_artifact': ('utils.html#get_pickle_artifact', 'dvats/utils.py'),
48
+ 'dvats.utils.get_wandb_artifacts': ('utils.html#get_wandb_artifacts', 'dvats/utils.py'),
49
+ 'dvats.utils.learner_module_leaves': ('utils.html#learner_module_leaves', 'dvats/utils.py'),
50
+ 'dvats.utils.learner_module_leaves_subtables': ( 'utils.html#learner_module_leaves_subtables',
51
+ 'dvats/utils.py'),
52
+ 'dvats.utils.normalize_columns': ('utils.html#normalize_columns', 'dvats/utils.py'),
53
+ 'dvats.utils.py_function': ('utils.html#py_function', 'dvats/utils.py'),
54
+ 'dvats.utils.remove_constant_columns': ('utils.html#remove_constant_columns', 'dvats/utils.py'),
55
+ 'dvats.utils.wandb.apis.public.Artifact.to_obj': ( 'utils.html#wandb.apis.public.artifact.to_obj',
56
+ 'dvats/utils.py')},
57
+ 'dvats.visualization': { 'dvats.visualization.plot_TS': ('visualization.html#plot_ts', 'dvats/visualization.py'),
58
+ 'dvats.visualization.plot_mask': ('visualization.html#plot_mask', 'dvats/visualization.py'),
59
+ 'dvats.visualization.plot_validation_ts_ae': ( 'visualization.html#plot_validation_ts_ae',
60
+ 'dvats/visualization.py')},
61
+ 'dvats.xai': { 'dvats.xai.InteractiveAnomalyPlot': ('xai.html#interactiveanomalyplot', 'dvats/xai.py'),
62
+ 'dvats.xai.InteractiveAnomalyPlot.__init__': ('xai.html#__init__', 'dvats/xai.py'),
63
+ 'dvats.xai.InteractiveAnomalyPlot.plot_projections_clusters_interactive': ( 'xai.html#plot_projections_clusters_interactive',
64
+ 'dvats/xai.py'),
65
+ 'dvats.xai.InteractiveTSPlot': ('xai.html#interactivetsplot', 'dvats/xai.py'),
66
+ 'dvats.xai.InteractiveTSPlot.__init__': ('xai.html#__init__', 'dvats/xai.py'),
67
+ 'dvats.xai.add_movement_buttons': ('xai.html#add_movement_buttons', 'dvats/xai.py'),
68
+ 'dvats.xai.add_selected_features': ('xai.html#add_selected_features', 'dvats/xai.py'),
69
+ 'dvats.xai.add_windows': ('xai.html#add_windows', 'dvats/xai.py'),
70
+ 'dvats.xai.anomaly_score': ('xai.html#anomaly_score', 'dvats/xai.py'),
71
+ 'dvats.xai.calculate_cluster_stats': ('xai.html#calculate_cluster_stats', 'dvats/xai.py'),
72
+ 'dvats.xai.delta_x_bigger': ('xai.html#delta_x_bigger', 'dvats/xai.py'),
73
+ 'dvats.xai.delta_x_lower': ('xai.html#delta_x_lower', 'dvats/xai.py'),
74
+ 'dvats.xai.delta_y_bigger': ('xai.html#delta_y_bigger', 'dvats/xai.py'),
75
+ 'dvats.xai.delta_y_lower': ('xai.html#delta_y_lower', 'dvats/xai.py'),
76
+ 'dvats.xai.detector': ('xai.html#detector', 'dvats/xai.py'),
77
+ 'dvats.xai.get_anomalies': ('xai.html#get_anomalies', 'dvats/xai.py'),
78
+ 'dvats.xai.get_anomaly_styles': ('xai.html#get_anomaly_styles', 'dvats/xai.py'),
79
+ 'dvats.xai.get_dataset': ('xai.html#get_dataset', 'dvats/xai.py'),
80
+ 'dvats.xai.get_dateformat': ('xai.html#get_dateformat', 'dvats/xai.py'),
81
+ 'dvats.xai.get_df_selected': ('xai.html#get_df_selected', 'dvats/xai.py'),
82
+ 'dvats.xai.get_embeddings': ('xai.html#get_embeddings', 'dvats/xai.py'),
83
+ 'dvats.xai.get_prjs': ('xai.html#get_prjs', 'dvats/xai.py'),
84
+ 'dvats.xai.initial_plot': ('xai.html#initial_plot', 'dvats/xai.py'),
85
+ 'dvats.xai.merge_overlapping_windows': ('xai.html#merge_overlapping_windows', 'dvats/xai.py'),
86
+ 'dvats.xai.move_down': ('xai.html#move_down', 'dvats/xai.py'),
87
+ 'dvats.xai.move_left': ('xai.html#move_left', 'dvats/xai.py'),
88
+ 'dvats.xai.move_right': ('xai.html#move_right', 'dvats/xai.py'),
89
+ 'dvats.xai.move_up': ('xai.html#move_up', 'dvats/xai.py'),
90
+ 'dvats.xai.plot_anomaly_scores_distribution': ('xai.html#plot_anomaly_scores_distribution', 'dvats/xai.py'),
91
+ 'dvats.xai.plot_clusters_with_anomalies': ('xai.html#plot_clusters_with_anomalies', 'dvats/xai.py'),
92
+ 'dvats.xai.plot_clusters_with_anomalies_interactive_plot': ( 'xai.html#plot_clusters_with_anomalies_interactive_plot',
93
+ 'dvats/xai.py'),
94
+ 'dvats.xai.plot_initial_config': ('xai.html#plot_initial_config', 'dvats/xai.py'),
95
+ 'dvats.xai.plot_projections': ('xai.html#plot_projections', 'dvats/xai.py'),
96
+ 'dvats.xai.plot_projections_clusters': ('xai.html#plot_projections_clusters', 'dvats/xai.py'),
97
+ 'dvats.xai.plot_save': ('xai.html#plot_save', 'dvats/xai.py'),
98
+ 'dvats.xai.set_features_buttons': ('xai.html#set_features_buttons', 'dvats/xai.py'),
99
+ 'dvats.xai.setup_boxes': ('xai.html#setup_boxes', 'dvats/xai.py'),
100
+ 'dvats.xai.setup_style': ('xai.html#setup_style', 'dvats/xai.py'),
101
+ 'dvats.xai.shift_datetime': ('xai.html#shift_datetime', 'dvats/xai.py'),
102
+ 'dvats.xai.show': ('xai.html#show', 'dvats/xai.py'),
103
+ 'dvats.xai.toggle_trace': ('xai.html#toggle_trace', 'dvats/xai.py'),
104
+ 'dvats.xai.umap_parameters': ('xai.html#umap_parameters', 'dvats/xai.py'),
105
+ 'dvats.xai.update_plot': ('xai.html#update_plot', 'dvats/xai.py')}}}
dvats/_nbdev.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED BY NBDEV! DO NOT EDIT!
2
+
3
+ __all__ = ["index", "modules", "custom_doc_links", "git_url"]
4
+
5
+ index = {"check_compatibility": "dr.ipynb",
6
+ "get_UMAP_prjs": "dr.ipynb",
7
+ "get_PCA_prjs": "dr.ipynb",
8
+ "get_TSNE_prjs": "dr.ipynb",
9
+ "DCAE_torch": "encoder.ipynb",
10
+ "ENCODER_EMBS_MODULE_NAME": "encoder.ipynb",
11
+ "get_enc_embs": "encoder.ipynb",
12
+ "TSArtifact": "load.ipynb",
13
+ "wandb.apis.public.Artifact.to_df": "load.ipynb",
14
+ "wandb.apis.public.Artifact.to_tsartifact": "load.ipynb",
15
+ "infer_or_inject_freq": "load.ipynb",
16
+ "generate_TS_df": "utils.ipynb",
17
+ "normalize_columns": "utils.ipynb",
18
+ "remove_constant_columns": "utils.ipynb",
19
+ "ReferenceArtifact": "utils.ipynb",
20
+ "wandb.apis.public.Artifact.to_obj": "utils.ipynb",
21
+ "PrintLayer": "utils.ipynb",
22
+ "Learner.export_and_get": "utils.ipynb",
23
+ "get_wandb_artifacts": "utils.ipynb",
24
+ "get_pickle_artifact": "utils.ipynb",
25
+ "plot_TS": "visualization.ipynb",
26
+ "plot_validation_ts_ae": "visualization.ipynb",
27
+ "plot_mask": "visualization.ipynb"}
28
+
29
+ modules = ["dr.py",
30
+ "encoder.py",
31
+ "load.py",
32
+ "utils.py",
33
+ "visualization.py"]
34
+
35
+ doc_url = "https://vrodriguezf.github.io/tchub/"
36
+
37
+ git_url = "https://gitlab.geist.re/pml/x_timecluster_extension/tree/master/"
38
+
39
+ def custom_doc_links(name): return None
dvats/all.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import dvats
2
+ from .imports import *
3
+ from .load import *
4
+ from .utils import *
5
+ from .dr import *
6
+ from .encoder import *
7
+ from .visualization import *
8
+ from .xai import *
dvats/dr.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/dr.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['get_gpu_memory', 'color_for_percentage', 'create_bar', 'gpu_memory_status', 'check_compatibility', 'get_UMAP_prjs',
5
+ 'get_PCA_prjs', 'get_TSNE_prjs', 'cluster_score']
6
+
7
+ # %% ../nbs/dr.ipynb 2
8
+ import subprocess
9
+ def get_gpu_memory(device = 0):
10
+ total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
11
+ total_memory = int(total_memory.decode().split('\n')[0])
12
+ used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
13
+ used_memory = int(used_memory.decode().split('\n')[0])
14
+
15
+ percentage = round((used_memory / total_memory) * 100)
16
+ return used_memory, total_memory, percentage
17
+
18
+ def color_for_percentage(percentage):
19
+ if percentage < 20:
20
+ return "\033[90m" # Gray
21
+ elif percentage < 40:
22
+ return "\033[94m" # Blue
23
+ elif percentage < 60:
24
+ return "\033[92m" # Green
25
+ elif percentage < 80:
26
+ return "\033[93m" # Orange
27
+ else:
28
+ return "\033[91m" # Red
29
+
30
+ def create_bar(percentage, color_code, length=20):
31
+ filled_length = int(length * percentage // 100)
32
+ bar = "█" * filled_length + "-" * (length - filled_length)
33
+ return color_code + bar + "\033[0m" # Apply color and reset after bar
34
+
35
+ def gpu_memory_status(device=0):
36
+ used, total, percentage = get_gpu_memory(device)
37
+ color_code = color_for_percentage(percentage)
38
+ bar = create_bar(percentage, color_code)
39
+ print(f"GPU | Used mem: {used}")
40
+ print(f"GPU | Used mem: {total}")
41
+ print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
42
+
43
+ # %% ../nbs/dr.ipynb 4
44
+ import umap
45
+ import cudf
46
+ import cuml
47
+ import pandas as pd
48
+ import numpy as np
49
+ from fastcore.all import *
50
+ from .imports import *
51
+ from .load import TSArtifact
52
+
53
+ # %% ../nbs/dr.ipynb 5
54
+ def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
55
+ "Function to check that the artifact used by the encoder model and the artifact that is \
56
+ going to be passed through the DR are compatible"
57
+ try:
58
+ # Check that both artifacts have the same variables
59
+ chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
60
+ # Check that both artifacts have the same freq
61
+ chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
62
+ # Check that the dr artifact is not normalized (not normalized data has not the key normalization)
63
+ chk_norm = dr_ar.metadata['TS'].get('normalization') is None
64
+ # Check that the dr artifact has not missing values
65
+ chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
66
+ # Check all logical vars.
67
+ if chk_vars and chk_freq and chk_norm and chk_miss:
68
+ print("Artifacts are compatible.")
69
+ else:
70
+ raise Exception
71
+ except Exception as e:
72
+ print("Artifacts are not compatible.")
73
+ raise e
74
+ return None
75
+
76
+ # %% ../nbs/dr.ipynb 7
77
+ #Comment this part after 4_seconds debugged
78
+ import hashlib
79
+
80
+ # %% ../nbs/dr.ipynb 8
81
+ import warnings
82
+ import sys
83
+ from numba.core.errors import NumbaPerformanceWarning
84
+ @delegates(cuml.UMAP)
85
+ def get_UMAP_prjs(
86
+ input_data,
87
+ cpu=True,
88
+ print_flag = False,
89
+ check_memory_usage = True,
90
+ **kwargs
91
+ ):
92
+ "Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
93
+ if print_flag:
94
+ print("--> get_UMAP_prjs")
95
+ print("kwargs: ", kwargs)
96
+ sys.stdout.flush()
97
+ ####
98
+ checksum = hashlib.md5(input_data.tobytes()).hexdigest()
99
+ print(checksum)
100
+ ####
101
+
102
+ if check_memory_usage: gpu_memory_status()
103
+
104
+ warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
105
+
106
+ #reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
107
+ if cpu:
108
+ print("-- umap.UMAP --", cpu)
109
+ sys.stdout.flush()
110
+ reducer = umap.UMAP(**kwargs)
111
+ else:
112
+ print("-- cuml.UMAP --", cpu)
113
+ sys.stdout.flush()
114
+ if 'random_state' in kwargs:
115
+ kwargs['random_state'] = np.uint64(kwargs['random_state'])
116
+ reducer = cuml.UMAP(**kwargs)
117
+
118
+ if print_flag:
119
+ print("------- reducer --------")
120
+ print(reducer)
121
+ print(reducer.get_params())
122
+ print("------- reducer --------")
123
+ sys.stdout.flush()
124
+
125
+ projections = reducer.fit_transform(input_data)
126
+
127
+ if check_memory_usage: gpu_memory_status()
128
+ if print_flag:
129
+ checksum = hashlib.md5(projections.tobytes()).hexdigest()
130
+ print("prjs checksum ", checksum)
131
+ print("get_UMAP_prjs -->")
132
+ sys.stdout.flush()
133
+ return projections
134
+
135
+ # %% ../nbs/dr.ipynb 13
136
+ @delegates(cuml.PCA)
137
+ def get_PCA_prjs(X, cpu=False, **kwargs):
138
+ r"""
139
+ Computes PCA projections of X
140
+ """
141
+ if cpu:
142
+ raise NotImplementedError
143
+ else:
144
+ reducer = cuml.PCA(**kwargs)
145
+ projections = reducer.fit_transform(X)
146
+ return projections
147
+
148
+ # %% ../nbs/dr.ipynb 15
149
+ @delegates(cuml.TSNE)
150
+ def get_TSNE_prjs(X, cpu=False, **kwargs):
151
+ r"""
152
+ Computes TSNE projections of X
153
+ """
154
+ if cpu:
155
+ raise NotImplementedError
156
+ else:
157
+ reducer = cuml.TSNE(**kwargs)
158
+ projections = reducer.fit_transform(X)
159
+ return projections
160
+
161
+ # %% ../nbs/dr.ipynb 18
162
+ from sklearn.metrics import silhouette_score
163
+ def cluster_score(prjs, clusters_labels, print_flag):
164
+ score = silhouette_score(prjs, clusters_labels)
165
+ if print_flag: print("Silhouette_score:", score)
166
+ return score
dvats/encoder.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/encoder.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['ENCODER_EMBS_MODULE_NAME', 'get_gpu_memory_', 'color_for_percentage', 'create_bar', 'gpu_memory_status_',
5
+ 'DCAE_torch', 'get_enc_embs', 'get_enc_embs_set_stride_set_batch_size']
6
+
7
+ # %% ../nbs/encoder.ipynb 2
8
+ import subprocess
9
+ def get_gpu_memory_(device = 0):
10
+ total_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits", "--id=" + str(device)])
11
+ total_memory = int(total_memory.decode().split('\n')[0])
12
+ used_memory = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits", "--id=" + str(device)])
13
+ used_memory = int(used_memory.decode().split('\n')[0])
14
+
15
+ percentage = round((used_memory / total_memory) * 100)
16
+ return used_memory, total_memory, percentage
17
+
18
+ def color_for_percentage(percentage):
19
+ if percentage < 20:
20
+ return "\033[90m" # Gray
21
+ elif percentage < 40:
22
+ return "\033[94m" # Blue
23
+ elif percentage < 60:
24
+ return "\033[92m" # Green
25
+ elif percentage < 80:
26
+ return "\033[93m" # Orange
27
+ else:
28
+ return "\033[91m" # Red
29
+
30
+ def create_bar(percentage, color_code, length=20):
31
+ filled_length = int(length * percentage // 100)
32
+ bar = "█" * filled_length + "-" * (length - filled_length)
33
+ return color_code + bar + "\033[0m" # Apply color and reset after bar
34
+
35
+ def gpu_memory_status_(device=0):
36
+ used, total, percentage = get_gpu_memory_(device)
37
+ color_code = color_for_percentage(percentage)
38
+ bar = create_bar(percentage, color_code)
39
+ print(f"GPU | Used mem: {used}")
40
+ print(f"GPU | Used mem: {total}")
41
+ print(f"GPU | Memory Usage: [{bar}] {color_code}{percentage}%\033[0m")
42
+
43
+
44
+ # %% ../nbs/encoder.ipynb 4
45
+ import pandas as pd
46
+ import numpy as np
47
+ from fastcore.all import *
48
+ from tsai.callback.MVP import *
49
+ from tsai.imports import *
50
+ from tsai.models.InceptionTimePlus import InceptionTimePlus
51
+ from tsai.models.explainability import get_acts_and_grads
52
+ from tsai.models.layers import *
53
+ from tsai.data.validation import combine_split_data
54
+ import time
55
+
56
+ # %% ../nbs/encoder.ipynb 7
57
+ class DCAE_torch(Module):
58
+ def __init__(self, c_in, seq_len, delta, nfs=[64, 32, 12], kss=[10, 5, 5],
59
+ pool_szs=[2,2,3], output_fsz=10):
60
+ """
61
+ Create a Deep Convolutional Autoencoder for multivariate time series of `d` dimensions,
62
+ sliced with a window size of `w`. The parameter `delta` sets the number of latent features that will be
63
+ contained in the Dense layer of the network. The the number of features
64
+ maps (filters), the filter size and the pool size can also be adjusted."
65
+ """
66
+ assert all_equal([len(x) for x in [nfs, kss, pool_szs]], np.repeat(len(nfs), 3)), \
67
+ 'nfs, kss, and pool_szs must have the same length'
68
+ assert np.prod(pool_szs) == nfs[-1], \
69
+ 'The number of filters in the last conv layer must be equal to the product of pool sizes'
70
+ assert seq_len % np.prod(pool_szs) == 0, \
71
+ 'The product of pool sizes must be a divisor of the window size'
72
+ layers = []
73
+ for i in range_of(kss):
74
+ layers += [Conv1d(ni=nfs[i-1] if i>0 else c_in, nf=nfs[i], ks=kss[i]),
75
+ nn.MaxPool1d(kernel_size=pool_szs[i])]
76
+ self.downsample = nn.Sequential(*layers)
77
+ self.bottleneck = nn.Sequential(OrderedDict([
78
+ ('flatten', nn.Flatten()),
79
+ ('latent_in', nn.Linear(seq_len, delta)),
80
+ ('latent_out', nn.Linear(delta, seq_len)),
81
+ ('reshape', Reshape(nfs[-1], seq_len // np.prod(pool_szs)))
82
+ ]))
83
+ layers = []
84
+ for i in reversed(range_of(kss)):
85
+ layers += [Conv1d(ni=nfs[i+1] if i != (len(nfs)-1) else nfs[-1],
86
+ nf=nfs[i], ks=kss[i]),
87
+ nn.Upsample(scale_factor=pool_szs[i])]
88
+ layers += [Conv1d(ni=nfs[0], nf=c_in, kernel_size=output_fsz)]
89
+ self.upsample = nn.Sequential(*layers)
90
+
91
+
92
+ def forward(self, x):
93
+ x = self.downsample(x)
94
+ x = self.bottleneck(x)
95
+ x = self.upsample(x)
96
+ return x
97
+
98
+ # %% ../nbs/encoder.ipynb 10
99
+ ENCODER_EMBS_MODULE_NAME = {
100
+ InceptionTimePlus: 'backbone', # for mvp based models
101
+ DCAE_torch: 'bottleneck.latent_in'
102
+ }
103
+
104
+ # %% ../nbs/encoder.ipynb 12
105
+ def get_enc_embs(X, enc_learn, module=None, cpu=False, average_seq_dim=True, to_numpy=True):
106
+ """
107
+ Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
108
+ learner. By default, the embeddings are obtained from the last layer
109
+ before the model head, although any layer can be passed to `model`.
110
+ Input
111
+ - `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
112
+ - `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
113
+ - `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
114
+ """
115
+ print("--> Check CUDA")
116
+ if cpu:
117
+ print("--> Get enc embs CPU")
118
+ enc_learn.dls.cpu()
119
+ enc_learn.cpu()
120
+ else:
121
+ print("--> Ensure empty cache")
122
+ torch.cuda.empty_cache()
123
+ print("--> Use CUDA |Get enc embs GPU ")
124
+ enc_learn.dls.cuda()
125
+ enc_learn.cuda()
126
+ if torch.cuda.is_available():
127
+ print("CUDA está disponible")
128
+ print("Dispositivo CUDA actual: ", torch.cuda.current_device())
129
+ print("Nombre del dispositivo CUDA actual: ", torch.cuda.get_device_name(torch.cuda.current_device()))
130
+
131
+ else:
132
+ print("CUDA no está disponible ")
133
+ print("Use CUDA -->")
134
+ if enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
135
+
136
+ print("--> Set dataset from X (enc_learn does not contain dls)")
137
+ aux_dl = enc_learn.dls.valid.new_dl(X=X)
138
+ aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
139
+ print("--> Get module")
140
+ module = nested_attr(enc_learn.model,ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) if module is None else module
141
+
142
+ print("--> Get enc embs bs: ", aux_dl.bs)
143
+ embs = [
144
+ get_acts_and_grads(
145
+ model=enc_learn.model,
146
+ modules=module,
147
+ x=xb[0],
148
+ cpu=cpu
149
+ )[0]
150
+ for xb in aux_dl
151
+ ]
152
+ print("--> Concat")
153
+ if not cpu:
154
+ total_emb_size = sum([emb.element_size() * emb.nelement() for emb in embs])
155
+ free_memory = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()
156
+ if (total_emb_size < free_memory):
157
+ print("Fit in GPU")
158
+ embs=[emb.cuda() for emb in embs]
159
+ else:
160
+ print("Dont fit in GPU --> Go to CPU")
161
+ embs=[emb.cpu() for emb in embs]
162
+ embs = to_concat(embs)
163
+ print("--> reduce")
164
+ if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
165
+ print("--> 2 numpy")
166
+ if to_numpy: embs = embs.numpy() if cpu else embs.cpu().numpy()
167
+ return embs
168
+
169
+ # %% ../nbs/encoder.ipynb 13
170
+ def get_enc_embs_set_stride_set_batch_size(
171
+ X, enc_learn, stride, batch_size, module=None, cpu=False, average_seq_dim=True, to_numpy=True,
172
+ print_flag = False, time_flag=False, chunk_size = 0, check_memory_usage = False
173
+ ):
174
+ """
175
+ Get the embeddings of X from an encoder, passed in `enc_learn as a fastai
176
+ learner. By default, the embeddings are obtained from the last layer
177
+ before the model head, although any layer can be passed to `model`.
178
+ Input
179
+ - `cpu`: Whether to do the model inference in cpu of gpu (GPU recommended)
180
+ - `average_seq_dim`: Whether to aggregate the embeddings in the sequence dimensions
181
+ - `to_numpy`: Whether to return the result as a numpy array (if false returns a tensor)
182
+ """
183
+ if time_flag:
184
+ t_start = time.time()
185
+ if print_flag:
186
+ print("--> get_enc_embs_set_stride_set_batch_size")
187
+ if check_memory_usage: gpu_memory_status_()
188
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions")
189
+ #import sys
190
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions | Python version", sys.version)
191
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions | PyTorch version", torch.__version__)
192
+ #print("get_enc_embs_set_stride_set_batch_size | Check versions | CUDA version", torch.version.cuda)
193
+ #print("get_enc_embs_set_stride_set_batch_size | Apply stride & batch size")
194
+
195
+ X = X[::stride]
196
+ enc_learn.dls.bs = batch_size
197
+
198
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Check CUDA | X ~ ", X.shape[0])
199
+ if cpu:
200
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Get enc embs CPU")
201
+ enc_learn.dls.cpu()
202
+ enc_learn.cpu()
203
+ else:
204
+ if torch.cuda.is_available():
205
+ if (print_flag):
206
+ print("get_enc_embs_set_stride_set_batch_size | CUDA device id:", torch.cuda.current_device())
207
+ print("get_enc_embs_set_stride_set_batch_size | CUDA device name: ", torch.cuda.get_device_name(torch.cuda.current_device()))
208
+ print("get_enc_embs_set_stride_set_batch_size | Ensure empty cache & move 2 GPU")
209
+ torch.cuda.empty_cache()
210
+ enc_learn.dls.cuda()
211
+ enc_learn.cuda()
212
+ else:
213
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | No cuda available. Set CPU = true")
214
+ cpu = True
215
+
216
+ if enc_learn.dls.bs is None or enc_learn.dls.bs == 0: enc_learn.dls.bs = 64
217
+
218
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Set dataset from X (enc_learn does not contain dls)")
219
+ aux_dl = enc_learn.dls.valid.new_dl(X=X)
220
+ aux_dl.bs = enc_learn.dls.bs if enc_learn.dls.bs>0 else 64
221
+ if (print_flag): print("get_enc_embs_set_stride_set_batch_size | Get module")
222
+ module = nested_attr(enc_learn.model,ENCODER_EMBS_MODULE_NAME[type(enc_learn.model)]) if module is None else module
223
+
224
+ if (print_flag):
225
+ #print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | module ", module)
226
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len", len(aux_dl))
227
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.batch_len ", len(next(iter(aux_dl))))
228
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl.bs ", aux_dl.bs)
229
+ if (not cpu):
230
+ total = torch.cuda.get_device_properties(device).total_memory
231
+ used = torch.cuda.memory_allocated(torch.cuda.current_device())
232
+ reserved = torch.cuda.memory_reserved(torch.cuda.current_device())
233
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | total_mem ", total)
234
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | used_mem ", used)
235
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | reserved_mem ", reserved)
236
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | available_mem ", total-reserved)
237
+ sys.stdout.flush()
238
+
239
+ if (cpu or ( chunk_size == 0 )):
240
+ embs = [
241
+ get_acts_and_grads(
242
+ model=enc_learn.model,
243
+ modules=module,
244
+ x=xb[0],
245
+ cpu=cpu
246
+ )[0]
247
+ for xb in aux_dl
248
+ ]
249
+ if not cpu: embs=[emb.cpu() for emb in embs]
250
+ else:
251
+ embs = []
252
+ total_chunks=max(1,round(len(X)/chunk_size))
253
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | aux_dl len | " + str(len(X)) + " chunk size: " + str(chunk_size) + " => " + str(total_chunks) + " chunks")
254
+ for i in range(0, total_chunks):
255
+ if print_flag:
256
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | Chunk [ " + str(i) + "/"+str(total_chunks)+"] => " + str(round(i*100/total_chunks)) + "%")
257
+ sys.stdout.flush()
258
+ chunk = [batch for (n, batch) in enumerate(aux_dl) if (chunk_size*i <= n and chunk_size*(i+1) > n) ]
259
+ chunk_embs = [
260
+ get_acts_and_grads(
261
+ model=enc_learn.model,
262
+ modules=module,
263
+ x=xb[0],
264
+ cpu=cpu
265
+ )[0]
266
+ for xb in chunk
267
+ ]
268
+ # Mueve los embeddings del bloque a la CPU
269
+ chunk_embs = [emb.cpu() for emb in chunk_embs]
270
+ embs.extend(chunk_embs)
271
+ torch.cuda.empty_cache()
272
+ if print_flag:
273
+ print("get_enc_embs_set_stride_set_batch_size | Get acts and grads | 100%")
274
+ sys.stdout.flush()
275
+
276
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | concat embeddings")
277
+
278
+ embs = to_concat(embs)
279
+
280
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | Reduce")
281
+
282
+ if embs.ndim == 3 and average_seq_dim: embs = embs.mean(axis=2)
283
+
284
+ if print_flag: print("get_enc_embs_set_stride_set_batch_size | Convert to numpy")
285
+
286
+ if to_numpy:
287
+ if cpu or chunk_size > 0:
288
+ embs = embs.numpy()
289
+ else:
290
+ embs = embs.cpu().numpy()
291
+ torch.cuda.empty_cache()
292
+ if time_flag:
293
+ t = time.time()-t_start
294
+ if print_flag:
295
+ print("get_enc_embs_set_stride_set_batch_size " + str(t) + " seconds -->")
296
+ else:
297
+ print("get_enc_embs_set_stride_set_batch_size " + str(t) + " seconds")
298
+ if check_memory_usage: gpu_memory_status_()
299
+ if print_flag:
300
+ print("get_enc_embs_set_stride_set_batch_size -->")
301
+ return embs
dvats/imports.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from IPython.display import Audio, display, HTML, Javascript, clear_output # from tsai
2
+ import importlib
3
+ import numpy as np
4
+ import time
5
+ import sys
6
+
7
+ ##
8
+ # Constants
9
+ ##
10
+ WANDB_ARTIFACTS_DIR = 'data/wandb_artifacts'
11
+
12
+ # General purpose functions
13
+ def beep(inp=1, duration=.1, n=1):
14
+ rate = 10000
15
+ mult = 1.6 * inp if inp else .08
16
+ wave = np.sin(mult*np.arange(rate*duration))
17
+ for i in range(n):
18
+ display(Audio(wave, rate=10000, autoplay=True))
19
+ time.sleep(duration / .1)
20
+
21
+ def m_reload(package_name):
22
+ for k,v in sys.modules.items():
23
+ if k.startswith(package_name):
24
+ importlib.reload(v)
dvats/load.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/load.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['TSArtifact', 'infer_or_inject_freq']
5
+
6
+ # %% ../nbs/load.ipynb 2
7
+ import pandas as pd
8
+ import numpy as np
9
+ from fastcore.all import *
10
+ import wandb
11
+ from datetime import datetime, timedelta
12
+ from .imports import *
13
+ from .utils import *
14
+ import pickle
15
+ import pyarrow.feather as ft
16
+
17
+ # %% ../nbs/load.ipynb 7
18
+ class TSArtifact(wandb.Artifact):
19
+
20
+ default_storage_path = Path(Path.home()/'data/wandb_artifacts/')
21
+ date_format = '%Y-%m-%d %H:%M:%S' # TODO add milliseconds
22
+ handle_missing_values_techniques = {
23
+ 'linear_interpolation': lambda df : df.interpolate(method='linear', limit_direction='both'),
24
+ 'overall_mean': lambda df : df.fillna(df.mean()),
25
+ 'overall_median': lambda df : df.fillna(df.median()),
26
+ 'backward_fill' : lambda df : df.fillna(method='bfill'),
27
+ 'forward_fill' : lambda df : df.fillna(method='ffill')
28
+ }
29
+
30
+ "Class that represents a wandb artifact containing time series data. sd stands for start_date \
31
+ and ed for end_date. Both should be pd.Timestamps"
32
+
33
+ @delegates(wandb.Artifact.__init__)
34
+ def __init__(self, name, sd:pd.Timestamp, ed:pd.Timestamp, **kwargs):
35
+ super().__init__(type='dataset', name=name, **kwargs)
36
+ self.sd = sd
37
+ self.ed = ed
38
+ if self.metadata is None:
39
+ self.metadata = dict()
40
+ self.metadata['TS'] = dict(sd = self.sd.strftime(self.date_format),
41
+ ed = self.ed.strftime(self.date_format))
42
+
43
+
44
+ @classmethod
45
+ def from_daily_csv_files(cls, root_path, fread=pd.read_csv, start_date=None, end_date=None, metadata=None, **kwargs):
46
+
47
+ "Create a wandb artifact of type `dataset`, containing the CSV files from `start_date` \
48
+ to `end_date`. Dates must be pased as `datetime.datetime` objects. If a `wandb_run` is \
49
+ defined, the created artifact will be logged to that run, using the longwall name as \
50
+ artifact name, and the date range as version."
51
+
52
+ return None
53
+
54
+
55
+ @classmethod
56
+ @delegates(__init__)
57
+ def from_df(cls, df:pd.DataFrame, name:str, path:str=None, sd:pd.Timestamp=None, ed:pd.Timestamp=None,
58
+ normalize:bool=False, missing_values_technique:str=None, resampling_freq:str=None, **kwargs):
59
+
60
+ """
61
+ Create a TSArtifact of type `dataset`, using the DataFrame `df` samples from \
62
+ `sd` (start date) to `ed` (end date). Dates must be passed as `datetime.datetime` \
63
+ objects. The transformed DataFrame is stored as a pickle file in the path `path` \
64
+ and its reference is added to the artifact entries. Additionally, the dataset can \
65
+ be normalized (see `normalize` argument) or transformed using missing values \
66
+ handling techniques (see `missing_values_technique` argument) or resampling (see \
67
+ `resampling_freq` argument).
68
+
69
+ Arguments:
70
+ df: (DataFrame) The dataframe you want to convert into an artifact.
71
+ name: (str) The artifact name.
72
+ path: (str, optional) The path where the file, containing the new transformed \
73
+ dataframe, is saved. Default None.
74
+ sd: (sd, optional) Start date. By default, the first index of `df` is taken.
75
+ ed: (ed, optional) End date. By default, the last index of `df` is taken.
76
+ normalize: (bool, optional) If the dataset values should be normalized. Default\
77
+ False.
78
+ missing_values_technique: (str, optional) The technique used to handle missing \
79
+ values. Options: "linear_iterpolation", "overall_mean", "overall_median" or \
80
+ None. Default None.
81
+ resampling_freq: (str, optional) The offset string or object representing \
82
+ frequency conversion for time series resampling. Default None.
83
+
84
+ Returns:
85
+ TSArtifact object.
86
+ """
87
+ sd = df.index[0] if sd is None else sd
88
+ ed = df.index[-1] if ed is None else ed
89
+ obj = cls(name, sd=sd, ed=ed, **kwargs)
90
+ df = df.query('@obj.sd <= index <= @obj.ed')
91
+ obj.metadata['TS']['created'] = 'from-df'
92
+ obj.metadata['TS']['n_vars'] = df.columns.__len__()
93
+
94
+ # Handle Missing Values
95
+ df = obj.handle_missing_values_techniques[missing_values_technique](df) if missing_values_technique is not None else df
96
+ obj.metadata['TS']['handle_missing_values_technique'] = missing_values_technique.__str__()
97
+ obj.metadata['TS']['has_missing_values'] = np.any(df.isna().values).__str__()
98
+
99
+ # Indexing and Resampling
100
+ if resampling_freq: df = df.resample(resampling_freq).mean()
101
+ obj.metadata['TS']['n_samples'] = len(df)
102
+ obj.metadata['TS']['freq'] = str(df.index.freq)
103
+
104
+ # Time Series Variables
105
+ obj.metadata['TS']['vars'] = list(df.columns)
106
+
107
+ # Normalization - Save the previous means and stds
108
+ if normalize:
109
+ obj.metadata['TS']['normalization'] = dict(means = df.describe().loc['mean'].to_dict(),
110
+ stds = df.describe().loc['std'].to_dict())
111
+ df = normalize_columns(df)
112
+
113
+ # Hash and save
114
+ hash_code = str(pd.util.hash_pandas_object(df).sum()) # str(hash(df.values.tobytes()))
115
+ path = obj.default_storage_path/f'{hash_code}' if path is None else Path(path)/f'{hash_code}'
116
+ print("About to write df to ", path)
117
+ ft.write_feather(df, path, compression = 'lz4')
118
+ #feather.write_dataframe
119
+ obj.metadata['TS']['hash'] = hash_code
120
+ obj.add_file(str(path))
121
+
122
+ return obj
123
+
124
+ # %% ../nbs/load.ipynb 14
125
+ @patch
126
+ def to_df(self:wandb.apis.public.Artifact):
127
+ "Download the files of a saved wandb artifact and process them as a single dataframe. The artifact must \
128
+ come from a call to `run.use_artifact` with a proper wandb run."
129
+ # The way we have to ensure that the argument comes from a TS arfitact is the metadata
130
+ if self.metadata.get('TS') is None:
131
+ print(f'ERROR:{self} does not come from a logged TSArtifact')
132
+ return None
133
+ dir = Path(self.download())
134
+ if self.metadata['TS']['created'] == 'from-df':
135
+ # Call read_pickle with the single file from dir
136
+ #return pd.read_pickle(dir.ls()[0])
137
+ return ft.read_feather(dir.ls()[0])
138
+ else:
139
+ print("ERROR: Only from_df method is allowed yet")
140
+
141
+ # %% ../nbs/load.ipynb 16
142
+ @patch
143
+ def to_tsartifact(self:wandb.apis.public.Artifact):
144
+ "Cast an artifact as a TS artifact. The artifact must have been created from one of the \
145
+ class creation methods of the class `TSArtifact`. This is useful to go back to a TSArtifact \
146
+ after downloading an artifact through the wand API"
147
+ return TSArtifact(name=self.digest, #TODO change this
148
+ sd=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
149
+ ed=pd.to_datetime(self.metadata['TS']['sd'], format=TSArtifact.date_format),
150
+ description=self.description,
151
+ metadata=self.metadata)
152
+
153
+ # %% ../nbs/load.ipynb 18
154
+ @delegates(pd.to_datetime)
155
+ def infer_or_inject_freq(df, injected_freq='1s', start_date=None, **kwargs):
156
+ """
157
+ Infer index frequency. If there's not a proper time index, create fake timestamps,
158
+ keeping the desired `injected_freq`. If that is None, set a default one of 1 second.
159
+ start_date: the first date of the index (int or string).
160
+ """
161
+ inferred_freq = pd.infer_freq(df.index)
162
+ if inferred_freq == 'N':
163
+ timedelta = pd.to_timedelta(injected_freq)
164
+ df.index = pd.to_datetime(ifnone(start_date, 0), **kwargs) + timedelta*df.index
165
+ df.index.freq = pd.infer_freq(df.index)
166
+ else:
167
+ df.index.freq = inferred_freq
168
+ return df
dvats/utils.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['generate_TS_df', 'normalize_columns', 'remove_constant_columns', 'ReferenceArtifact', 'PrintLayer',
5
+ 'get_wandb_artifacts', 'get_pickle_artifact', 'exec_with_feather', 'py_function',
6
+ 'exec_with_feather_k_output', 'exec_with_and_feather_k_output', 'learner_module_leaves',
7
+ 'learner_module_leaves_subtables']
8
+
9
+ # %% ../nbs/utils.ipynb 3
10
+ from .imports import *
11
+ from fastcore.all import *
12
+ import wandb
13
+ import pickle
14
+ import pandas as pd
15
+ import numpy as np
16
+ #import tensorflow as tf
17
+ import torch.nn as nn
18
+ from fastai.basics import *
19
+
20
+ # %% ../nbs/utils.ipynb 5
21
+ def generate_TS_df(rows, cols):
22
+ "Generates a dataframe containing a multivariate time series, where each column \
23
+ represents a variable and each row a time point (sample). The timestamp is in the \
24
+ index of the dataframe, and it is created with a even space of 1 second between samples"
25
+ index = np.arange(pd.Timestamp.now(),
26
+ pd.Timestamp.now() + pd.Timedelta(rows-1, 'seconds'),
27
+ pd.Timedelta(1, 'seconds'))
28
+ data = np.random.randn(len(index), cols)
29
+ return pd.DataFrame(data, index=index)
30
+
31
+ # %% ../nbs/utils.ipynb 10
32
+ def normalize_columns(df:pd.DataFrame):
33
+ "Normalize columns from `df` to have 0 mean and 1 standard deviation"
34
+ mean = df.mean()
35
+ std = df.std() + 1e-7
36
+ return (df-mean)/std
37
+
38
+ # %% ../nbs/utils.ipynb 16
39
+ def remove_constant_columns(df:pd.DataFrame):
40
+ return df.loc[:, (df != df.iloc[0]).any()]
41
+
42
+ # %% ../nbs/utils.ipynb 21
43
+ class ReferenceArtifact(wandb.Artifact):
44
+ default_storage_path = Path('data/wandb_artifacts/') # * this path is relative to Path.home()
45
+ "This class is meant to create an artifact with a single reference to an object \
46
+ passed as argument in the contructor. The object will be pickled, hashed and stored \
47
+ in a specified folder."
48
+ @delegates(wandb.Artifact.__init__)
49
+ def __init__(self, obj, name, type='object', folder=None, **kwargs):
50
+ super().__init__(type=type, name=name, **kwargs)
51
+ # pickle dumps the object and then hash it
52
+ hash_code = str(hash(pickle.dumps(obj)))
53
+ folder = Path(ifnone(folder, Path.home()/self.default_storage_path))
54
+ with open(f'{folder}/{hash_code}', 'wb') as f:
55
+ pickle.dump(obj, f)
56
+ self.add_reference(f'file://{folder}/{hash_code}')
57
+ if self.metadata is None:
58
+ self.metadata = dict()
59
+ self.metadata['ref'] = dict()
60
+ self.metadata['ref']['hash'] = hash_code
61
+ self.metadata['ref']['type'] = str(obj.__class__)
62
+
63
+ # %% ../nbs/utils.ipynb 24
64
+ @patch
65
+ def to_obj(self:wandb.apis.public.Artifact):
66
+ """Download the files of a saved ReferenceArtifact and get the referenced object. The artifact must \
67
+ come from a call to `run.use_artifact` with a proper wandb run."""
68
+ if self.metadata.get('ref') is None:
69
+ print(f'ERROR:{self} does not come from a saved ReferenceArtifact')
70
+ return None
71
+ original_path = ReferenceArtifact.default_storage_path/self.metadata['ref']['hash']
72
+ path = original_path if original_path.exists() else Path(self.download()).ls()[0]
73
+ with open(path, 'rb') as f:
74
+ obj = pickle.load(f)
75
+ return obj
76
+
77
+ # %% ../nbs/utils.ipynb 33
78
+ import torch.nn as nn
79
+ class PrintLayer(nn.Module):
80
+ def __init__(self):
81
+ super(PrintLayer, self).__init__()
82
+
83
+ def forward(self, x):
84
+ # Do your print / debug stuff here
85
+ print(x.shape)
86
+ return x
87
+
88
+ # %% ../nbs/utils.ipynb 34
89
+ @patch
90
+ def export_and_get(self:Learner, keep_exported_file=False):
91
+ """
92
+ Export the learner into an auxiliary file, load it and return it back.
93
+ """
94
+ aux_path = Path('aux.pkl')
95
+ self.export(fname='aux.pkl')
96
+ aux_learn = load_learner('aux.pkl')
97
+ if not keep_exported_file: aux_path.unlink()
98
+ return aux_learn
99
+
100
+ # %% ../nbs/utils.ipynb 35
101
+ def get_wandb_artifacts(project_path, type=None, name=None, last_version=True):
102
+ """
103
+ Get the artifacts logged in a wandb project.
104
+ Input:
105
+ - `project_path` (str): entity/project_name
106
+ - `type` (str): whether to return only one type of artifacts
107
+ - `name` (str): Leave none to have all artifact names
108
+ - `last_version`: whether to return only the last version of each artifact or not
109
+
110
+ Output: List of artifacts
111
+ """
112
+ public_api = wandb.Api()
113
+ if type is not None:
114
+ types = [public_api.artifact_type(type, project_path)]
115
+ else:
116
+ types = public_api.artifact_types(project_path)
117
+
118
+ res = L()
119
+ for kind in types:
120
+ for collection in kind.collections():
121
+ if name is None or name == collection.name:
122
+ versions = public_api.artifact_versions(
123
+ kind.type,
124
+ "/".join([kind.entity, kind.project, collection.name]),
125
+ per_page=1,
126
+ )
127
+ if last_version: res += next(versions)
128
+ else: res += L(versions)
129
+ return list(res)
130
+
131
+ # %% ../nbs/utils.ipynb 39
132
+ def get_pickle_artifact(filename):
133
+
134
+ with open(filename, "rb") as f:
135
+ df = pickle.load(f)
136
+
137
+ return df
138
+
139
+ # %% ../nbs/utils.ipynb 41
140
+ import pyarrow.feather as ft
141
+ import pickle
142
+
143
+ # %% ../nbs/utils.ipynb 42
144
+ def exec_with_feather(function, path = None, print_flag = False, *args, **kwargs):
145
+ result = None
146
+ if not (path is none):
147
+ if print_flag: print("--> Exec with feather | reading input from ", path)
148
+ input = ft.read_feather(path)
149
+ if print_flag: print("--> Exec with feather | Apply function ", path)
150
+ result = function(input, *args, **kwargs)
151
+ if print_flag: print("Exec with feather --> ", path)
152
+ return result
153
+
154
+ # %% ../nbs/utils.ipynb 43
155
+ def py_function(module_name, function_name, print_flag = False):
156
+ try:
157
+ function = getattr(__import__('__main__'), function_name)
158
+ except:
159
+ module = __import__(module_name, fromlist=[''])
160
+ function = getattr(module, function_name)
161
+ print("py function: ", function_name, ": ", function)
162
+ return function
163
+
164
+ # %% ../nbs/utils.ipynb 46
165
+ import time
166
+ def exec_with_feather_k_output(function_name, module_name = "main", path = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
167
+ result = None
168
+ function = py_function(module_name, function_name, print_flag)
169
+ if time_flag: t_start = time.time()
170
+ if not (path is None):
171
+ if print_flag: print("--> Exec with feather | reading input from ", path)
172
+ input = ft.read_feather(path)
173
+ if print_flag: print("--> Exec with feather | Apply function ", path)
174
+ result = function(input, *args, **kwargs)[k_output]
175
+ if time_flag:
176
+ t_end = time.time()
177
+ print("Exec with feather | time: ", t_end-t_start)
178
+ if print_flag: print("Exec with feather --> ", path)
179
+ return result
180
+
181
+ # %% ../nbs/utils.ipynb 48
182
+ def exec_with_and_feather_k_output(function_name, module_name = "main", path_input = None, path_output = None, k_output = 0, print_flag = False, time_flag = False, *args, **kwargs):
183
+ result = None
184
+ function = py_function(module_name, function_name, print_flag)
185
+ if time_flag: t_start = time.time()
186
+ if not (path_input is None):
187
+ if print_flag: print("--> Exec with feather | reading input from ", path_input)
188
+ input = ft.read_feather(path_input)
189
+ if print_flag:
190
+ print("--> Exec with feather | Apply function ", function_name, "input type: ", type(input))
191
+
192
+ result = function(input, *args, **kwargs)[k_output]
193
+ ft.write_feather(df, path, compression = 'lz4')
194
+ if time_flag:
195
+ t_end = time.time()
196
+ print("Exec with feather | time: ", t_end-t_start)
197
+ if print_flag: print("Exec with feather --> ", path_output)
198
+ return path_output
199
+
200
+ # %% ../nbs/utils.ipynb 52
201
+ def learner_module_leaves(learner):
202
+ modules = list(learner.modules())[0] # Obtener el módulo raíz
203
+ rows = []
204
+
205
+ def find_leave_modules(module, path=[]):
206
+ for name, sub_module in module.named_children():
207
+ current_path = path + [f"{type(sub_module).__name__}"]
208
+ if not list(sub_module.children()):
209
+ leave_name = ' -> '.join(current_path)
210
+ leave_params = str(sub_module).strip()
211
+ rows.append([
212
+ leave_name,
213
+ f"{type(sub_module).__name__}",
214
+ name,
215
+ leave_params
216
+ ]
217
+ )
218
+
219
+ find_leave_modules(sub_module, current_path)
220
+
221
+ find_leave_modules(modules)
222
+
223
+ df = pd.DataFrame(rows, columns=['Path', 'Module_type', 'Module_name', 'Module'])
224
+ return df
225
+
226
+ # %% ../nbs/utils.ipynb 56
227
+ def learner_module_leaves_subtables(learner, print_flag = False):
228
+ df = pd.DataFrame(columns=['Path', 'Module_type', 'Module_name', 'Module'])
229
+ md = learner_module_leaves(learner).drop(
230
+ 'Path', axis = 1
231
+ ).sort_values(
232
+ by = 'Module_type'
233
+ )
234
+ if print_flag: print("The layers are of this types:")
235
+
236
+ md_types = pd.DataFrame(md['Module_type'].drop_duplicates())
237
+ if print_flag:
238
+ display(md_types)
239
+ print("And they are called with this parameters:")
240
+
241
+ md_modules = pd.DataFrame(md['Module'].drop_duplicates())
242
+
243
+ if print_flag: display(md_modules)
244
+
245
+ return md_types, md_modules
dvats/visualization.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/visualization.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['plot_TS', 'plot_validation_ts_ae', 'plot_mask']
5
+
6
+ # %% ../nbs/visualization.ipynb 3
7
+ from fastcore.all import *
8
+ import pandas as pd
9
+ import numpy as np
10
+ import matplotlib.pyplot as plt
11
+ import torch
12
+
13
+ # %% ../nbs/visualization.ipynb 6
14
+ @delegates(pd.DataFrame.plot)
15
+ def plot_TS(df:pd.core.frame.DataFrame, **kwargs):
16
+ df.plot(subplots=True, **kwargs)
17
+ plt.show()
18
+
19
+ # %% ../nbs/visualization.ipynb 8
20
+ def plot_validation_ts_ae(prediction:np.array, original:np.array, title_str = "Validation plot", fig_size = (15,15), anchor = (-0.01, 0.89), window_num = 0, return_fig=True, title_pos = 0.9):
21
+ # Create the figure
22
+ fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
23
+ # Create the subplot axes
24
+ axes = fig.subplots(nrows=original.shape[2], ncols=1)
25
+ # We iterate over the sensor data and plot both the original and the prediction
26
+ for i,ax in zip(range(original.shape[2]),fig.axes):
27
+ ax.plot(original[window_num,:,i], label='Original Data')
28
+ ax.plot(prediction[window_num,:,i], label='Prediction')
29
+ # Handle the legend configuration and position
30
+ lines, labels = fig.axes[-1].get_legend_handles_labels()
31
+ fig.legend(lines, labels,loc='upper left', ncol=2)
32
+ # Write the plot title (and position it closer to the top of the graph)
33
+ fig.suptitle(title_str, y = title_pos)
34
+ # Tight results:
35
+ fig.tight_layout()
36
+ # Returns
37
+ if return_fig:
38
+ return fig
39
+ fig
40
+ return None
41
+
42
+ # %% ../nbs/visualization.ipynb 12
43
+ def plot_mask(mask, i=0, fig_size=(10,10), title_str="Mask", return_fig=False):
44
+ """
45
+ Plot the mask passed as argument. The mask is a 3D boolean tensor. The first
46
+ dimension is the window number (or item index), the second is the variable, and the third is the time step.
47
+ Input:
48
+ mask: 3D boolean tensor
49
+ i: index of the window to plot
50
+ fig_size: size of the figure
51
+ title_str: title of the plot
52
+ return_fig: if True, returns the figure
53
+ Output:
54
+ if return_fig is True, returns the figure, otherwise, it does not return anything
55
+ """
56
+ plt.figure(figsize=fig_size)
57
+ plt.pcolormesh(mask[i], cmap='cool')
58
+ plt.title(f'{title_str} {i}, mean: {mask[0].float().mean().item():.3f}')
59
+ if return_fig:
60
+ return plt.gcf()
61
+ else:
62
+ plt.show()
63
+ return None
dvats/xai.py ADDED
@@ -0,0 +1,964 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/xai.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['get_embeddings', 'get_dataset', 'umap_parameters', 'get_prjs', 'plot_projections', 'plot_projections_clusters',
5
+ 'calculate_cluster_stats', 'anomaly_score', 'detector', 'plot_anomaly_scores_distribution',
6
+ 'plot_clusters_with_anomalies', 'update_plot', 'plot_clusters_with_anomalies_interactive_plot',
7
+ 'get_df_selected', 'shift_datetime', 'get_dateformat', 'get_anomalies', 'get_anomaly_styles',
8
+ 'InteractiveAnomalyPlot', 'plot_save', 'plot_initial_config', 'merge_overlapping_windows',
9
+ 'InteractiveTSPlot', 'add_selected_features', 'add_windows', 'setup_style', 'toggle_trace',
10
+ 'set_features_buttons', 'move_left', 'move_right', 'move_down', 'move_up', 'delta_x_bigger',
11
+ 'delta_y_bigger', 'delta_x_lower', 'delta_y_lower', 'add_movement_buttons', 'setup_boxes', 'initial_plot',
12
+ 'show']
13
+
14
+ # %% ../nbs/xai.ipynb 1
15
+ #Weight & Biases
16
+ import wandb
17
+
18
+ #Yaml
19
+ from yaml import load, FullLoader
20
+
21
+ #Embeddings
22
+ from .all import *
23
+ from tsai.data.preparation import prepare_forecasting_data
24
+ from tsai.data.validation import get_forecasting_splits
25
+ from fastcore.all import *
26
+
27
+ #Dimensionality reduction
28
+ from tsai.imports import *
29
+
30
+ #Clustering
31
+ import hdbscan
32
+ import time
33
+ from .dr import get_PCA_prjs, get_UMAP_prjs, get_TSNE_prjs
34
+
35
+ import seaborn as sns
36
+ import matplotlib.pyplot as plt
37
+ import pandas as pd
38
+ import ipywidgets as widgets
39
+ from IPython.display import display
40
+ from functools import partial
41
+
42
+ from IPython.display import display, clear_output, HTML as IPHTML
43
+ from ipywidgets import Button, Output, VBox, HBox, HTML, Layout, FloatSlider
44
+
45
+ import plotly.graph_objs as go
46
+ import plotly.offline as py
47
+ import plotly.io as pio
48
+ #! pip install kaleido
49
+ import kaleido
50
+
51
+
52
+ # %% ../nbs/xai.ipynb 4
53
+ def get_embeddings(config_lrp, run_lrp, api, print_flag = False):
54
+ artifacts_gettr = run_lrp.use_artifact if config_lrp.use_wandb else api.artifact
55
+ emb_artifact = artifacts_gettr(config_lrp.emb_artifact, type='embeddings')
56
+ if print_flag: print(emb_artifact.name)
57
+ emb_config = emb_artifact.logged_by().config
58
+ return emb_artifact.to_obj(), emb_artifact, emb_config
59
+
60
+ # %% ../nbs/xai.ipynb 5
61
+ def get_dataset(
62
+ config_lrp,
63
+ config_emb,
64
+ config_dr,
65
+ run_lrp,
66
+ api,
67
+ print_flag = False
68
+ ):
69
+ # Botch to use artifacts offline
70
+ artifacts_gettr = run_lrp.use_artifact if config_lrp.use_wandb else api.artifact
71
+ enc_artifact = artifacts_gettr(config_emb['enc_artifact'], type='learner')
72
+ if print_flag: print (enc_artifact.name)
73
+ ## TODO: This only works when you run it two timeS! WTF?
74
+ try:
75
+ enc_learner = enc_artifact.to_obj()
76
+ except:
77
+ enc_learner = enc_artifact.to_obj()
78
+
79
+ ## Restore artifact
80
+ enc_logger = enc_artifact.logged_by()
81
+ enc_artifact_train = artifacts_gettr(enc_logger.config['train_artifact'], type='dataset')
82
+ #cfg_.show_attrdict(enc_logger.config)
83
+ if enc_logger.config['valid_artifact'] is not None:
84
+ enc_artifact_valid = artifacts_gettr(enc_logger.config['valid_artifact'], type='dataset')
85
+ if print_flag: print("enc_artifact_valid:", enc_artifact_valid.name)
86
+ if print_flag: print("enc_artifact_train: ", enc_artifact_train.name)
87
+
88
+ if config_dr['dr_artifact'] is not None:
89
+ print("Is not none")
90
+ dr_artifact = artifacts_gettr(config_dr['enc_artifact'])
91
+ else:
92
+ dr_artifact = enc_artifact_train
93
+ if print_flag: print("DR artifact train: ", dr_artifact.name)
94
+ if print_flag: print("--> DR artifact name", dr_artifact.name)
95
+ dr_artifact
96
+ df = dr_artifact.to_df()
97
+ if print_flag: print("--> DR After to df", df.shape)
98
+ if print_flag: display(df.head())
99
+ return df, dr_artifact, enc_artifact, enc_learner
100
+
101
+ # %% ../nbs/xai.ipynb 6
102
+ def umap_parameters(config_dr, config):
103
+ umap_params_cpu = {
104
+ 'n_neighbors' : config_dr.n_neighbors,
105
+ 'min_dist' : config_dr.min_dist,
106
+ 'random_state': np.uint64(822569775),
107
+ 'metric': config_dr.metric,
108
+ #'a': 1.5769434601962196,
109
+ #'b': 0.8950608779914887,
110
+ #'metric_kwds': {'p': 2}, #No debería ser necesario, just in case
111
+ #'output_metric': 'euclidean',
112
+ 'verbose': 4,
113
+ #'n_epochs': 200
114
+ }
115
+ umap_params_gpu = {
116
+ 'n_neighbors' : config_dr.n_neighbors,
117
+ 'min_dist' : config_dr.min_dist,
118
+ 'random_state': np.uint64(1234),
119
+ 'metric': config_dr.metric,
120
+ 'a': 1.5769434601962196,
121
+ 'b': 0.8950608779914887,
122
+ 'target_metric': 'euclidean',
123
+ 'target_n_neighbors': config_dr.n_neighbors,
124
+ 'verbose': 4, #6, #CUML_LEVEL_TRACE
125
+ 'n_epochs': 200*3*2,
126
+ 'init': 'random',
127
+ 'hash_input': True
128
+ }
129
+ if config_dr.cpu_flag:
130
+ umap_params = umap_params_cpu
131
+ else:
132
+ umap_params = umap_params_gpu
133
+ return umap_params
134
+
135
+ # %% ../nbs/xai.ipynb 7
136
+ def get_prjs(embs_no_nan, config_dr, config, print_flag = False):
137
+ umap_params = umap_parameters(config_dr, config)
138
+ prjs_pca = get_PCA_prjs(
139
+ X = embs_no_nan,
140
+ cpu = False,
141
+ print_flag = print_flag,
142
+ **umap_params
143
+ )
144
+ if print_flag:
145
+ print(prjs_pca.shape)
146
+ prjs_umap = get_UMAP_prjs(
147
+ input_data = prjs_pca,
148
+ cpu = config_dr.cpu_flag, #config_dr.cpu,
149
+ print_flag = print_flag,
150
+ **umap_params
151
+ )
152
+ if print_flag: prjs_umap.shape
153
+ return prjs_umap
154
+
155
+ # %% ../nbs/xai.ipynb 9
156
+ def plot_projections(prjs, umap_params, fig_size = (25,25)):
157
+ "Plot 2D projections thorugh a connected scatter plot"
158
+ df_prjs = pd.DataFrame(prjs, columns = ['x1', 'x2'])
159
+ fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
160
+ ax = fig.add_subplot(111)
161
+ ax.scatter(df_prjs['x1'], df_prjs['x2'], marker='o', facecolors='none', edgecolors='b', alpha=0.1)
162
+ ax.plot(df_prjs['x1'], df_prjs['x2'], alpha=0.5, picker=1)
163
+ plt.title('DR params - n_neighbors:{:d} min_dist:{:f}'.format(
164
+ umap_params['n_neighbors'],umap_params['min_dist']))
165
+ return ax
166
+
167
+ # %% ../nbs/xai.ipynb 10
168
+ def plot_projections_clusters(prjs, clusters_labels, umap_params, fig_size = (25,25)):
169
+ "Plot 2D projections thorugh a connected scatter plot"
170
+ df_prjs = pd.DataFrame(prjs, columns = ['x1', 'x2'])
171
+ df_prjs['cluster'] = clusters_labels
172
+
173
+ fig = plt.figure(figsize=(fig_size[0],fig_size[1]))
174
+ ax = fig.add_subplot(111)
175
+
176
+ # Create a scatter plot for each cluster with different colors
177
+ unique_labels = df_prjs['cluster'].unique()
178
+ print(unique_labels)
179
+ for label in unique_labels:
180
+ cluster_data = df_prjs[df_prjs['cluster'] == label]
181
+ ax.scatter(cluster_data['x1'], cluster_data['x2'], label=f'Cluster {label}')
182
+ #ax.scatter(df_prjs['x1'], df_prjs['x2'], marker='o', facecolors='none', edgecolors='b', alpha=0.1)
183
+
184
+ #ax.plot(df_prjs['x1'], df_prjs['x2'], alpha=0.5, picker=1)
185
+ plt.title('DR params - n_neighbors:{:d} min_dist:{:f}'.format(
186
+ umap_params['n_neighbors'],umap_params['min_dist']))
187
+ return ax
188
+
189
+ # %% ../nbs/xai.ipynb 11
190
+ def calculate_cluster_stats(data, labels):
191
+ """Computes the media and the standard deviation for every cluster."""
192
+ cluster_stats = {}
193
+ for label in np.unique(labels):
194
+ #members = data[labels == label]
195
+ members = data
196
+ mean = np.mean(members, axis = 0)
197
+ std = np.std(members, axis = 0)
198
+ cluster_stats[label] = (mean, std)
199
+ return cluster_stats
200
+
201
+ # %% ../nbs/xai.ipynb 12
202
+ def anomaly_score(point, cluster_stats, label):
203
+ """Computes an anomaly score for each point."""
204
+ mean, std = cluster_stats[label]
205
+ return np.linalg.norm((point - mean) / std)
206
+
207
+ # %% ../nbs/xai.ipynb 13
208
+ def detector(data, labels):
209
+ """Anomaly detection function."""
210
+ cluster_stats = calculate_cluster_stats(data, labels)
211
+ scores = []
212
+ for point, label in zip(data, labels):
213
+ score = anomaly_score(point, cluster_stats, label)
214
+ scores.append(score)
215
+ return np.array(scores)
216
+
217
+ # %% ../nbs/xai.ipynb 15
218
+ def plot_anomaly_scores_distribution(anomaly_scores):
219
+ "Plot the distribution of anomaly scores to check for normality"
220
+ plt.figure(figsize=(10, 6))
221
+ sns.histplot(anomaly_scores, kde=True, bins=30)
222
+ plt.title("Distribución de Anomaly Scores")
223
+ plt.xlabel("Anomaly Score")
224
+ plt.ylabel("Frecuencia")
225
+ plt.show()
226
+
227
+ # %% ../nbs/xai.ipynb 16
228
+ def plot_clusters_with_anomalies(prjs, clusters_labels, anomaly_scores, threshold, fig_size=(25, 25)):
229
+ "Plot 2D projections of clusters and superimpose anomalies"
230
+ df_prjs = pd.DataFrame(prjs, columns=['x1', 'x2'])
231
+ df_prjs['cluster'] = clusters_labels
232
+ df_prjs['anomaly'] = anomaly_scores > threshold
233
+
234
+ fig = plt.figure(figsize=(fig_size[0], fig_size[1]))
235
+ ax = fig.add_subplot(111)
236
+
237
+ # Plot each cluster with different colors
238
+ unique_labels = df_prjs['cluster'].unique()
239
+ for label in unique_labels:
240
+ cluster_data = df_prjs[df_prjs['cluster'] == label]
241
+ ax.scatter(cluster_data['x1'], cluster_data['x2'], label=f'Cluster {label}', alpha=0.7)
242
+
243
+ # Superimpose anomalies
244
+ anomalies = df_prjs[df_prjs['anomaly']]
245
+ ax.scatter(anomalies['x1'], anomalies['x2'], color='red', label='Anomalies', edgecolor='k', s=50)
246
+
247
+ plt.title('Clusters and anomalies')
248
+ plt.legend()
249
+ plt.show()
250
+
251
+ def update_plot(threshold, prjs_umap, clusters_labels, anomaly_scores, fig_size):
252
+ plot_clusters_with_anomalies(prjs_umap, clusters_labels, anomaly_scores, threshold, fig_size)
253
+
254
+ def plot_clusters_with_anomalies_interactive_plot(threshold, prjs_umap, clusters_labels, anomaly_scores, fig_size):
255
+ threshold_slider = widgets.FloatSlider(value=threshold, min=0.001, max=3, step=0.001, description='Threshold')
256
+ interactive_plot = widgets.interactive(update_plot, threshold = threshold_slider,
257
+ prjs_umap = widgets.fixed(prjs_umap),
258
+ clusters_labels = widgets.fixed(clusters_labels),
259
+ anomaly_scores = widgets.fixed(anomaly_scores),
260
+ fig_size = widgets.fixed((25,25)))
261
+ display(interactive_plot)
262
+
263
+
264
+ # %% ../nbs/xai.ipynb 18
265
+ import plotly.express as px
266
+ from datetime import timedelta
267
+
268
+ # %% ../nbs/xai.ipynb 19
269
+ def get_df_selected(df, selected_indices, w, stride = 1): #Cuidado con stride
270
+ '''Links back the selected points to the original dataframe and returns the associated windows indices'''
271
+ n_windows = len(selected_indices)
272
+ window_ranges = [(id*stride, (id*stride)+w) for id in selected_indices]
273
+ #window_ranges = [(id*w, (id+1)*w+1) for id in selected_indices]
274
+ #window_ranges = [(id*stride, (id*stride)+w) for id in selected_indices]
275
+ #print(window_ranges)
276
+ valores_tramos = [df.iloc[inicio:fin+1] for inicio, fin in window_ranges]
277
+ df_selected = pd.concat(valores_tramos, ignore_index=False)
278
+ return window_ranges, n_windows, df_selected
279
+
280
+ # %% ../nbs/xai.ipynb 20
281
+ def shift_datetime(dt, seconds, sign, dateformat="%Y-%m-%d %H:%M:%S.%f", print_flag = False):
282
+ """
283
+ This function gets a datetime dt, a number of seconds,
284
+ a sign and moves the date such number of seconds to the future
285
+ if sign is '+' and to the past if sing is '-'.
286
+ """
287
+
288
+ if print_flag: print(dateformat)
289
+ dateformat2= "%Y-%m-%d %H:%M:%S.%f"
290
+ dateformat3 = "%Y-%m-%d"
291
+ ok = False
292
+
293
+ try:
294
+ if print_flag: print("dt ", dt, "seconds", seconds, "sign", sign)
295
+ new_dt = datetime.strptime(dt, dateformat)
296
+ if print_flag: print("ndt", new_dt)
297
+ ok = True
298
+ except ValueError as e:
299
+ if print_flag:
300
+ print("Error: ", e)
301
+
302
+ if (not ok):
303
+ try:
304
+ if print_flag: print("Parsing alternative dataformat", dt, "seconds", seconds, "sign", sign, dateformat2)
305
+ new_dt = datetime.strptime(dt, dateformat3)
306
+ if print_flag: print("2ndt", new_dt)
307
+ except ValueError as e:
308
+ print("Error: ", e)
309
+ if print_flag: print(new_dt)
310
+ try:
311
+
312
+ if new_dt.hour == 0 and new_dt.minute == 0 and new_dt.second == 0:
313
+ if print_flag: "Aqui"
314
+ new_dt = new_dt.replace(hour=0, minute=0, second=0, microsecond=0)
315
+ if print_flag: print(new_dt)
316
+
317
+ if print_flag: print("ndt", new_dt)
318
+
319
+ if (sign == '+'):
320
+ if print_flag: print("Aqui")
321
+ new_dt = new_dt + timedelta(seconds = seconds)
322
+ if print_flag: print(new_dt)
323
+ else:
324
+ if print_flag: print(sign, type(dt))
325
+ new_dt = new_dt - timedelta(seconds = seconds)
326
+ if print_flag: print(new_dt)
327
+ if new_dt.hour == 0 and new_dt.minute == 0 and new_dt.second == 0:
328
+ if print_flag: print("replacing")
329
+ new_dt = new_dt.replace(hour=0, minute=0, second=0, microsecond=0)
330
+
331
+ new_dt_str = new_dt.strftime(dateformat2)
332
+ if print_flag: print("new dt ", new_dt)
333
+ except ValueError as e:
334
+ if print_flag: print("Aqui3")
335
+ shift_datetime(dt, 0, sign, dateformat = "%Y-%m-%d", print_flag = False)
336
+ return str(e)
337
+ return new_dt_str
338
+
339
+
340
+
341
+ # %% ../nbs/xai.ipynb 21
342
+ def get_dateformat(text_date):
343
+ dateformat1 = "%Y-%m-%d %H:%M:%S"
344
+ dateformat2 = "%Y-%m-%d %H:%M:%S.%f"
345
+ dateformat3 = "%Y-%m-%d"
346
+ dateformat = ""
347
+ parts = text_date.split()
348
+
349
+ if len(parts) == 2:
350
+ time_parts = parts[1].split(':')
351
+ if len(time_parts) == 3:
352
+ sec_parts = time_parts[2].split('.')
353
+ if len(sec_parts) == 2:
354
+ dateformat = dateformat2
355
+ else:
356
+ dateformat = dateformat1
357
+ else:
358
+ dateformat = "unknown format 1"
359
+ elif len(parts) == 1:
360
+ dateformat = dateformat3
361
+ else:
362
+ dateformat = "unknown format 2"
363
+
364
+ return dateformat
365
+
366
+ # %% ../nbs/xai.ipynb 23
367
+ def get_anomalies(df, threshold, flag):
368
+ df['anomaly'] = [ (score > threshold) and flag for score in df['anomaly_score']]
369
+
370
+ def get_anomaly_styles(df, threshold, anomaly_scores, flag = False, print_flag = False):
371
+ if print_flag: print("Threshold: ", threshold)
372
+ if print_flag: print("Flag", flag)
373
+ if print_flag: print("df ~", df.shape)
374
+ df['anomaly'] = [ (score > threshold) and flag for score in df['anomaly_score'] ]
375
+ if print_flag: print(df)
376
+ get_anomalies(df, threshold, flag)
377
+ anomalies = df[df['anomaly']]
378
+ if flag:
379
+ df['anomaly'] = [
380
+ (score > threshold) and flag
381
+ for score in anomaly_scores
382
+ ]
383
+ symbols = [
384
+ 'x' if is_anomaly else 'circle'
385
+ for is_anomaly in df['anomaly']
386
+ ]
387
+ line_colors = [
388
+ 'black'
389
+ if (is_anomaly and flag) else 'rgba(0,0,0,0)'
390
+ for is_anomaly in df['anomaly']
391
+ ]
392
+ else:
393
+ symbols = ['circle' for _ in df['x1']]
394
+ line_colors = ['rgba(0,0,0,0)' for _ in df['x1']]
395
+ if print_flag: print(anomalies)
396
+ return symbols, line_colors
397
+ ### Example of use
398
+ #prjs_df = pd.DataFrame(prjs_umap, columns = ['x1', 'x2'])
399
+ #prjs_df['anomaly_score'] = anomaly_scores
400
+ #s, l = get_anomaly_styles(prjs_df, 1, True)
401
+
402
+ # %% ../nbs/xai.ipynb 24
403
+ class InteractiveAnomalyPlot():
404
+ def __init__(
405
+ self, selected_indices = [],
406
+ threshold = 0.15,
407
+ anomaly_flag = False,
408
+ path = "../imgs", w = 0
409
+ ):
410
+ self.selected_indices = selected_indices
411
+ self.selected_indices_tmp = selected_indices
412
+ self.threshold = threshold
413
+ self.threshold_ = threshold
414
+ self.anomaly_flag = anomaly_flag
415
+ self.w = w
416
+ self.name = f"w={self.w}"
417
+ self.path = f"{path}{self.name}.png"
418
+ self.interaction_enabled = True
419
+
420
+
421
+ def plot_projections_clusters_interactive(
422
+ self, prjs, cluster_labels, umap_params, anomaly_scores=[], fig_size=(7,7), print_flag = False
423
+ ):
424
+ self.selected_indices_tmp = self.selected_indices
425
+ py.init_notebook_mode()
426
+
427
+ prjs_df, cluster_colors = plot_initial_config(prjs, cluster_labels, anomaly_scores)
428
+ legend_items = [widgets.HTML(f'<b>Cluster {cluster}:</b> <span style="color:{color};">■</span>')
429
+ for cluster, color in cluster_colors.items()]
430
+ legend = widgets.VBox(legend_items)
431
+
432
+ marker_colors = prjs_df['cluster'].map(cluster_colors)
433
+
434
+ symbols, line_colors = get_anomaly_styles(prjs_df, self.threshold_, anomaly_scores, self.anomaly_flag, print_flag)
435
+
436
+ fig = go.FigureWidget(
437
+ [
438
+ go.Scatter(
439
+ x=prjs_df['x1'], y=prjs_df['x2'],
440
+ mode="markers",
441
+ marker= {
442
+ 'color': marker_colors,
443
+ 'line': { 'color': line_colors, 'width': 1 },
444
+ 'symbol': symbols
445
+ },
446
+ text = prjs_df.index
447
+ )
448
+ ]
449
+ )
450
+
451
+ line_trace = go.Scatter(
452
+ x=prjs_df['x1'],
453
+ y=prjs_df['x2'],
454
+ mode="lines",
455
+ line=dict(color='rgba(128, 128, 128, 0.5)', width=1)#,
456
+ #showlegend=False # Puedes configurar si deseas mostrar esta línea en la leyenda
457
+ )
458
+
459
+ fig.add_trace(line_trace)
460
+
461
+ sca = fig.data[0]
462
+
463
+ fig.update_layout(
464
+ dragmode='lasso',
465
+ width=700,
466
+ height=500,
467
+ title={
468
+ 'text': '<span style="font-weight:bold">DR params - n_neighbors:{:d} min_dist:{:f}</span>'.format(
469
+ umap_params['n_neighbors'], umap_params['min_dist']),
470
+ 'y':0.98,
471
+ 'x':0.5,
472
+ 'xanchor': 'center',
473
+ 'yanchor': 'top'
474
+ },
475
+ plot_bgcolor='white',
476
+ paper_bgcolor='#f0f0f0',
477
+ xaxis=dict(gridcolor='lightgray', zerolinecolor='black', title = 'x'),
478
+ yaxis=dict(gridcolor='lightgray', zerolinecolor='black', title = 'y'),
479
+ margin=dict(l=10, r=20, t=30, b=10)
480
+
481
+
482
+ )
483
+
484
+ output_tmp = Output()
485
+ output_button = Output()
486
+ output_anomaly = Output()
487
+ output_threshold = Output()
488
+ output_width = Output()
489
+
490
+ def select_action(trace, points, selector):
491
+ self.selected_indices_tmp = points.point_inds
492
+ with output_tmp:
493
+ output_tmp.clear_output(wait=True)
494
+ if print_flag: print("Selected indices tmp:", self.selected_indices_tmp)
495
+
496
+ def button_action(b):
497
+ self.selected_indices = self.selected_indices_tmp
498
+ with output_button:
499
+ output_button.clear_output(wait = True)
500
+ if print_flag: print("Selected indices:", self.selected_indices)
501
+
502
+
503
+ def update_anomalies():
504
+ if print_flag: print("About to update anomalies")
505
+
506
+ symbols, line_colors = get_anomaly_styles(prjs_df, self.threshold_, anomaly_scores, self.anomaly_flag, print_flag)
507
+
508
+ if print_flag: print("Anomaly styles got")
509
+
510
+ with fig.batch_update():
511
+ fig.data[0].marker.symbol = symbols
512
+ fig.data[0].marker.line.color = line_colors
513
+ if print_flag: print("Anomalies updated")
514
+ if print_flag: print("Threshold: ", self.threshold_)
515
+ if print_flag: print("Scores: ", anomaly_scores)
516
+
517
+
518
+ def anomaly_action(b):
519
+ with output_anomaly: # Cambia output_flag a output_anomaly
520
+ output_anomaly.clear_output(wait=True)
521
+ if print_fllag: print("Negate anomaly flag")
522
+ self.anomaly_flag = not self.anomaly_flag
523
+ if print_flag: print("Show anomalies:", self.anomaly_flag)
524
+ update_anomalies()
525
+
526
+ sca.on_selection(select_action)
527
+ layout = widgets.Layout(width='auto', height='40px')
528
+ button = Button(
529
+ description="Update selected_indices",
530
+ style = {'button_color': 'lightblue'},
531
+ display = 'flex',
532
+ flex_row = 'column',
533
+ align_items = 'stretch',
534
+ layout = layout
535
+ )
536
+ anomaly_button = Button(
537
+ description = "Show anomalies",
538
+ style = {'button_color': 'lightgray'},
539
+ display = 'flex',
540
+ flex_row = 'column',
541
+ align_items = 'stretch',
542
+ layout = layout
543
+ )
544
+
545
+ button.on_click(button_action)
546
+ anomaly_button.on_click(anomaly_action)
547
+
548
+ ##### Reactivity buttons
549
+ pause_button = Button(
550
+ description = "Pause interactiveness",
551
+ style = {'button_color': 'pink'},
552
+ display = 'flex',
553
+ flex_row = 'column',
554
+ align_items = 'stretch',
555
+ layout = layout
556
+ )
557
+ resume_button = Button(
558
+ description = "Resume interactiveness",
559
+ style = {'button_color': 'lightgreen'},
560
+ display = 'flex',
561
+ flex_row = 'column',
562
+ align_items = 'stretch',
563
+ layout = layout
564
+ )
565
+
566
+
567
+ threshold_slider = FloatSlider(
568
+ value=self.threshold_,
569
+ min=0.0,
570
+ max=float(np.ceil(self.threshold+5)),
571
+ step=0.0001,
572
+ description='Anomaly threshold:',
573
+ continuous_update=False
574
+ )
575
+
576
+ def pause_interaction(b):
577
+ self.interaction_enabled = False
578
+ fig.update_layout(dragmode='pan')
579
+
580
+ def resume_interaction(b):
581
+ self.interaction_enabled = True
582
+ fig.update_layout(dragmode='lasso')
583
+
584
+
585
+ def update_threshold(change):
586
+ with output_threshold:
587
+ output_threshold.clear_output(wait = True)
588
+ if print_flag: print("Update threshold")
589
+ self.threshold_ = change.new
590
+ if print_flag: print("Update anomalies threshold = ", self.threshold_)
591
+ update_anomalies()
592
+
593
+ #### Width
594
+ width_slider = FloatSlider(
595
+ value = 0.5,
596
+ min = 0.0,
597
+ max = 1.0,
598
+ step = 0.0001,
599
+ description = 'Line width:',
600
+ continuous_update = False
601
+ )
602
+
603
+ def update_width(change):
604
+ with output_width:
605
+ try:
606
+ output_width.clear_output(wait = True)
607
+ if print_flag:
608
+ print("Change line width")
609
+ print("Trace to update:", fig.data[1])
610
+ with fig.batch_update():
611
+ fig.data[1].line.width = change.new # Actualiza la opacidad de la línea
612
+ if print_flag: print("ChangeD line width")
613
+ except Exception as e:
614
+ print("Error updating line width:", e)
615
+
616
+
617
+
618
+ pause_button.on_click(pause_interaction)
619
+ resume_button.on_click(resume_interaction)
620
+
621
+ threshold_slider.observe(update_threshold, 'value')
622
+
623
+ ####
624
+ width_slider.observe(update_width, names = 'value')
625
+
626
+ #####
627
+ space = HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;")
628
+
629
+ vbox = VBox((output_tmp, output_button, output_anomaly, output_threshold, fig))
630
+ hbox = HBox((space, button, space, pause_button, space, resume_button, anomaly_button))
631
+
632
+ # Centrar las dos cajas horizontalmente en el VBox
633
+
634
+ box_layout = widgets.Layout(display='flex',
635
+ flex_flow='column',
636
+ align_items='center',
637
+ width='100%')
638
+
639
+ if self.anomaly_flag:
640
+ box = VBox((hbox,threshold_slider,width_slider, output_width, vbox), layout = box_layout)
641
+ else:
642
+ box = VBox((hbox, width_slider, output_width, vbox), layout = box_layout)
643
+ box.add_class("layout")
644
+ plot_save(fig, self.w)
645
+
646
+ display(box)
647
+
648
+
649
+ # %% ../nbs/xai.ipynb 25
650
+ def plot_save(fig, w):
651
+ image_bytes = pio.to_image(fig, format='png')
652
+ with open(f"../imgs/w={w}.png", 'wb') as f:
653
+ f.write(image_bytes)
654
+
655
+
656
+ # %% ../nbs/xai.ipynb 26
657
+ def plot_initial_config(prjs, cluster_labels, anomaly_scores):
658
+ prjs_df = pd.DataFrame(prjs, columns = ['x1', 'x2'])
659
+ prjs_df['cluster'] = cluster_labels
660
+ prjs_df['anomaly_score'] = anomaly_scores
661
+
662
+ cluster_colors_df = pd.DataFrame({'cluster': cluster_labels}).drop_duplicates()
663
+ cluster_colors_df['color'] = px.colors.qualitative.Set1[:len(cluster_colors_df)]
664
+ cluster_colors = dict(zip(cluster_colors_df['cluster'], cluster_colors_df['color']))
665
+ return prjs_df, cluster_colors
666
+
667
+ # %% ../nbs/xai.ipynb 27
668
+ def merge_overlapping_windows(windows):
669
+ if not windows:
670
+ return []
671
+
672
+ # Order
673
+ sorted_windows = sorted(windows, key=lambda x: x[0])
674
+
675
+ merged_windows = [sorted_windows[0]]
676
+
677
+ for window in sorted_windows[1:]:
678
+ if window[0] <= merged_windows[-1][1]:
679
+ # Merge!
680
+ merged_windows[-1] = (merged_windows[-1][0], max(window[1], merged_windows[-1][1]))
681
+ else:
682
+ merged_windows.append(window)
683
+
684
+ return merged_windows
685
+
686
+ # %% ../nbs/xai.ipynb 29
687
+ class InteractiveTSPlot:
688
+ def __init__(
689
+ self,
690
+ df,
691
+ selected_indices,
692
+ meaningful_features_subset_ids,
693
+ w,
694
+ stride=1,
695
+ print_flag=False,
696
+ num_points=10000,
697
+ dateformat='%Y-%m-%d %H:%M:%S',
698
+ delta_x = 10,
699
+ delta_y = 0.1
700
+ ):
701
+ self.df = df
702
+ self.selected_indices = selected_indices
703
+ self.meaningful_features_subset_ids = meaningful_features_subset_ids
704
+ self.w = w
705
+ self.stride = stride
706
+ self.print_flag = print_flag
707
+ self.num_points = num_points
708
+ self.dateformat = dateformat
709
+ self.fig = go.FigureWidget()
710
+ self.buttons = []
711
+ self.print_flag = print_flag
712
+
713
+ self.delta_x = delta_x
714
+ self.delta_y = delta_y
715
+
716
+ self.window_ranges, self.n_windows, self.df_selected = get_df_selected(
717
+ self.df, self.selected_indices, self.w, self.stride
718
+ )
719
+ # Ensure the small possible number of windows to plot (like in R Shiny App)
720
+ self.window_ranges = merge_overlapping_windows(self.window_ranges)
721
+
722
+ #Num points no va bien...
723
+ #num_points = min(df_selected.shape[0], num_points)
724
+
725
+ if self.print_flag:
726
+ print("windows: ", self.n_windows, self.window_ranges)
727
+ print("selected id: ", self.df_selected.index)
728
+ print("points: ", self.num_points)
729
+
730
+ self.df.index = self.df.index.astype(str)
731
+ self.fig = go.FigureWidget()
732
+ self.colors = [
733
+ f'rgb({np.random.randint(0, 256)}, {np.random.randint(0, 256)}, {np.random.randint(0, 256)})'
734
+ for _ in range(self.n_windows)
735
+ ]
736
+
737
+ ##############################
738
+ # Outputs for debug printing #
739
+ ##############################
740
+ self.output_windows = Output()
741
+ self.output_move = Output()
742
+ self.output_delta_x = Output()
743
+ self.output_delta_y = Output()
744
+
745
+
746
+
747
+
748
+
749
+ # %% ../nbs/xai.ipynb 30
750
+ def add_selected_features(self: InteractiveTSPlot):
751
+ # Add features time series
752
+ for feature_id in self.df.columns:
753
+ feature_pos = self.df.columns.get_loc(feature_id)
754
+ trace = go.Scatter(
755
+ #x=df.index[:num_points],
756
+ #y=df[feature_id][:num_points],
757
+ x = self.df.index,
758
+ y = self.df[feature_id],
759
+ mode='lines',
760
+ name=feature_id,
761
+ visible=feature_pos in self.meaningful_features_subset_ids,
762
+ text=self.df.index
763
+ #text=[f'{i}-{val}' for i, val in enumerate(df.index)]
764
+ )
765
+ self.fig.add_trace(trace)
766
+
767
+ InteractiveTSPlot.add_selected_features = add_selected_features
768
+
769
+ # %% ../nbs/xai.ipynb 31
770
+ def add_windows(self: InteractiveTSPlot):
771
+ for i, (start, end) in enumerate(self.window_ranges):
772
+ self.fig.add_shape(
773
+ type="rect",
774
+ x0=self.df.index[start],
775
+ x1=self.df.index[end],
776
+ y0= 0,
777
+ y1= 1,
778
+ yref = "paper",
779
+ fillcolor=self.colors[i], #"LightSalmon",
780
+ opacity=0.25,
781
+ layer="below",
782
+ line=dict(color=self.colors[i], width=1),
783
+ name = f"w_{i}"
784
+ )
785
+ with self.output_windows:
786
+ print("w[" + str( self.selected_indices[i] )+ "]="+str(self.df.index[start])+", "+str(self.df.index[end])+")")
787
+
788
+ InteractiveTSPlot.add_windows = add_windows
789
+
790
+ # %% ../nbs/xai.ipynb 32
791
+ def setup_style(self: InteractiveTSPlot):
792
+ self.fig.update_layout(
793
+ title='Time Series with time window plot',
794
+ xaxis_title='Datetime',
795
+ yaxis_title='Value',
796
+ legend_title='Variables',
797
+ margin=dict(l=10, r=10, t=30, b=10),
798
+ xaxis=dict(
799
+ tickformat = '%d-' + self.dateformat,
800
+ #tickvals=list(range(len(df.index))),
801
+ #ticktext = [f'{i}-{val}' for i, val in enumerate(df.index)]
802
+ #grid_color = 'lightgray', zerolinecolor='black', title = 'x'
803
+ ),
804
+ #yaxis = dict(grid_color = 'lightgray', zerolinecolor='black', title = 'y'),
805
+ #plot_color = 'white',
806
+ paper_bgcolor='#f0f0f0'
807
+ )
808
+ self.fig.update_yaxes(fixedrange=True)
809
+
810
+ InteractiveTSPlot.setup_style = setup_style
811
+
812
+ # %% ../nbs/xai.ipynb 34
813
+ def toggle_trace(self : InteractiveTSPlot, button : Button):
814
+ idx = button.description
815
+ trace = self.fig.data[self.df.columns.get_loc(idx)]
816
+ trace.visible = not trace.visible
817
+
818
+ InteractiveTSPlot.toggle_trace = toggle_trace
819
+
820
+ # %% ../nbs/xai.ipynb 35
821
+ def set_features_buttons(self):
822
+ self.buttons = [
823
+ Button(
824
+ description=str(feature_id),
825
+ button_style='success' if self.df.columns.get_loc(feature_id) in self.meaningful_features_subset_ids else ''
826
+ )
827
+ for feature_id in self.df.columns
828
+ ]
829
+ for button in self.buttons:
830
+ button.on_click(self.toggle_trace)
831
+ InteractiveTSPlot.set_features_buttons = set_features_buttons
832
+
833
+ # %% ../nbs/xai.ipynb 36
834
+ def move_left(self : InteractiveTSPlot, button : Button):
835
+ with self.output_move:
836
+ self.output_move.clear_output(wait=True)
837
+ start_date, end_date = self.fig.layout.xaxis.range
838
+ new_start_date = shift_datetime(start_date, self.delta_x, '-', self.dateformat, self.print_flag)
839
+ new_end_date = shift_datetime(end_date, self.delta_x, '-', self.dateformat, self.print_flag)
840
+ with self.fig.batch_update():
841
+ self.fig.layout.xaxis.range = [new_start_date, new_end_date]
842
+
843
+ def move_right(self : InteractiveTSPlot, button : Button):
844
+ self.output_move.clear_output(wait=True)
845
+ with self.output_move:
846
+ start_date, end_date = self.fig.layout.xaxis.range
847
+ new_start_date = shift_datetime(start_date, self.delta_x, '+', self.dateformat, self.print_flag)
848
+ new_end_date = shift_datetime(end_date, self.delta_x, '+', self.dateformat, self.print_flag)
849
+ with self.fig.batch_update():
850
+ self.fig.layout.xaxis.range = [new_start_date, new_end_date]
851
+
852
+ def move_down(self: InteractiveTSPlot, button : Button):
853
+ with self.output_move:
854
+ self.output_move.clear_output(wait=True)
855
+ start_y, end_y = self.fig.layout.yaxis.range
856
+ with self.fig.batch_update():
857
+ self.ig.layout.yaxis.range = [start_y-self.delta_y, end_y-self.delta_y]
858
+ def move_up(self: InteractiveTSPlot, button : Button):
859
+ with self.output_move:
860
+ self.output_move.clear_output(wait=True)
861
+ start_y, end_y = self.fig.layout.yaxis.range
862
+ with self.fig.batch_update():
863
+ self.fig.layout.yaxis.range = [start_y+self.delta_y, end_y+self.delta_y]
864
+
865
+ InteractiveTSPlot.move_left = move_left
866
+ InteractiveTSPlot.move_right = move_right
867
+ InteractiveTSPlot.move_down = move_down
868
+ InteractiveTSPlot.move_up = move_up
869
+
870
+ # %% ../nbs/xai.ipynb 37
871
+ def delta_x_bigger(self: InteractiveTSPlot):
872
+ with self.output_delta_x:
873
+ self.output_delta_x.clear_output(wait = True)
874
+ if self.print_flag: print("Delta before", self.delta_x)
875
+ self.delta_x *= 10
876
+ if self.print_flag: print("delta_x:", self.delta_x)
877
+
878
+ def delta_y_bigger(self: InteractiveTSPlot):
879
+ with self.output_delta_y:
880
+ self.output_delta_y.clear_output(wait = True)
881
+ if self.print_flag: print("Delta before", self.delta_y)
882
+ self.delta_y *= 10
883
+ if self.print_flag: print("delta_y:", self.delta_y)
884
+
885
+ def delta_x_lower(self:InteractiveTSPlot):
886
+ with self.output_delta_x:
887
+ self.output_delta_x.clear_output(wait = True)
888
+ if self.print_flag: print("Delta before", self.delta_x)
889
+ self.delta_x /= 10
890
+ if self.print_flag: print("delta_x:", self.delta_x)
891
+
892
+ def delta_y_lower(self:InteractiveTSPlot):
893
+ with self.output_delta_y:
894
+ self.output_delta_y.clear_output(wait = True)
895
+ print("Delta before", self.delta_y)
896
+ self.delta_y = self.delta_y * 10
897
+ print("delta_y:", self.delta_y)
898
+ InteractiveTSPlot.delta_x_bigger = delta_x_bigger
899
+ InteractiveTSPlot.delta_y_bigger = delta_y_bigger
900
+ InteractiveTSPlot.delta_x_lower = delta_x_lower
901
+ InteractiveTSPlot.delta_y_lower = delta_y_lower
902
+
903
+ # %% ../nbs/xai.ipynb 38
904
+ def add_movement_buttons(self: InteractiveTSPlot):
905
+ self.button_left = Button(description="←")
906
+ self.button_right = Button(description="→")
907
+ self.button_up = Button(description="↑")
908
+ self.button_down = Button(description="↓")
909
+
910
+ self.button_step_x_up = Button(description="dx ↑")
911
+ self.button_step_x_down = Button(description="dx ↓")
912
+ self.button_step_y_up = Button(description="dy↑")
913
+ self.button_step_y_down = Button(description="dy↓")
914
+
915
+
916
+ # TODO: Arreglar que se pueda modificar el paso con el que se avanza. No se ve el output y no se modifica el valor
917
+ self.button_step_x_up.on_click(self.delta_x_bigger)
918
+ self.button_step_x_down.on_click(self.delta_x_lower)
919
+ self.button_step_y_up.on_click(self.delta_y_bigger)
920
+ self.button_step_y_down.on_click(self.delta_y_lower)
921
+
922
+ self.button_left.on_click(self.move_left)
923
+ self.button_right.on_click(self.move_right)
924
+ self.button_up.on_click(self.move_up)
925
+ self.button_down.on_click(self.move_down)
926
+
927
+ InteractiveTSPlot.add_movement_buttons = add_movement_buttons
928
+
929
+ # %% ../nbs/xai.ipynb 40
930
+ def setup_boxes(self: InteractiveTSPlot):
931
+ self.steps_x = VBox([self.button_step_x_up, self.button_step_x_down])
932
+ self.steps_y = VBox([self.button_step_y_up, self.button_step_y_down])
933
+ arrow_buttons = HBox([self.button_left, self.button_right, self.button_up, self.button_down, self.steps_x, self.steps_y])
934
+ hbox_layout = widgets.Layout(display='flex', flex_flow='row wrap', align_items='flex-start')
935
+ hbox = HBox(self.buttons, layout=hbox_layout)
936
+ box_layout = widgets.Layout(
937
+ display='flex',
938
+ flex_flow='column',
939
+ align_items='center',
940
+ width='100%'
941
+ )
942
+ if self.print_flag:
943
+ self.box = VBox([hbox, arrow_buttons, self.output_move, self.output_delta_x, self.output_delta_y, self.fig, self.output_windows], layout=box_layout)
944
+ else:
945
+ self.box = VBox([hbox, arrow_buttons, self.fig, self.output_windows], layout=box_layout)
946
+
947
+ InteractiveTSPlot.setup_boxes = setup_boxes
948
+
949
+
950
+ # %% ../nbs/xai.ipynb 41
951
+ def initial_plot(self: InteractiveTSPlot):
952
+ self.add_selected_features()
953
+ self.add_windows()
954
+ self.setup_style()
955
+ self.set_features_buttons()
956
+ self.add_movement_buttons()
957
+ self.setup_boxes()
958
+ InteractiveTSPlot.initial_plot = initial_plot
959
+
960
+ # %% ../nbs/xai.ipynb 42
961
+ def show(self : InteractiveTSPlot):
962
+ self.initial_plot()
963
+ display(self.box)
964
+ InteractiveTSPlot.show = show
entrypoint-rstudio.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash --login
2
+ set -e
3
+
4
+ echo ". ${HOME}/miniconda3/etc/profile.d/conda.sh" >> ${HOME}/.bashrc
5
+ # Make bash automatically activate the conda environment
6
+ echo "conda activate ${ENV_PREFIX}" >> ~/.bashrc
7
+ #echo "export WANDB_ENTITY=${WANDB_ENTITY:-default}" >> ${HOME}/.bashrc
8
+ # echo "WANDB_ENTITY=${WANDB_ENTITY:-default}" >> ${HOME}/.Renviron
9
+
10
+ # Define an array of environment variable names from the ENV_VARS Compose variable
11
+ IFS=',' read -ra ENV_VAR_NAMES <<< "$ENV_VARS"
12
+
13
+ echo "ENV_VAR_NAMES=${ENV_VAR_NAMES[@]}"
14
+
15
+ # Loop through the array of environment variable names and set the variables
16
+ for ENV_VAR_NAME in "${ENV_VAR_NAMES[@]}"; do
17
+ ENV_VAR_VALUE="${!ENV_VAR_NAME:-default}"
18
+ echo "$ENV_VAR_NAME=$ENV_VAR_VALUE" >> ${HOME}/.Renviron
19
+ done
20
+
21
+ ulimit -s 16384
22
+
23
+ # start rstudio server
24
+ /init
25
+ exec "$@"
requirements.txt CHANGED
@@ -1,3 +1 @@
1
- gradio==3.23.0
2
- uvicorn
3
- fastapi
 
1
+